2 * QEMU System Emulator block driver
4 * Copyright (c) 2003 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 #include "config-host.h"
25 #include "qemu-common.h"
28 #include "block_int.h"
31 #include "qemu-coroutine.h"
32 #include "qmp-commands.h"
33 #include "qemu-timer.h"
36 #include <sys/types.h>
38 #include <sys/ioctl.h>
39 #include <sys/queue.h>
49 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
52 BDRV_REQ_COPY_ON_READ
= 0x1,
53 BDRV_REQ_ZERO_WRITE
= 0x2,
56 static void bdrv_dev_change_media_cb(BlockDriverState
*bs
, bool load
);
57 static BlockDriverAIOCB
*bdrv_aio_readv_em(BlockDriverState
*bs
,
58 int64_t sector_num
, QEMUIOVector
*qiov
, int nb_sectors
,
59 BlockDriverCompletionFunc
*cb
, void *opaque
);
60 static BlockDriverAIOCB
*bdrv_aio_writev_em(BlockDriverState
*bs
,
61 int64_t sector_num
, QEMUIOVector
*qiov
, int nb_sectors
,
62 BlockDriverCompletionFunc
*cb
, void *opaque
);
63 static int coroutine_fn
bdrv_co_readv_em(BlockDriverState
*bs
,
64 int64_t sector_num
, int nb_sectors
,
66 static int coroutine_fn
bdrv_co_writev_em(BlockDriverState
*bs
,
67 int64_t sector_num
, int nb_sectors
,
69 static int coroutine_fn
bdrv_co_do_readv(BlockDriverState
*bs
,
70 int64_t sector_num
, int nb_sectors
, QEMUIOVector
*qiov
,
71 BdrvRequestFlags flags
);
72 static int coroutine_fn
bdrv_co_do_writev(BlockDriverState
*bs
,
73 int64_t sector_num
, int nb_sectors
, QEMUIOVector
*qiov
,
74 BdrvRequestFlags flags
);
75 static BlockDriverAIOCB
*bdrv_co_aio_rw_vector(BlockDriverState
*bs
,
79 BlockDriverCompletionFunc
*cb
,
82 static void coroutine_fn
bdrv_co_do_rw(void *opaque
);
83 static int coroutine_fn
bdrv_co_do_write_zeroes(BlockDriverState
*bs
,
84 int64_t sector_num
, int nb_sectors
);
86 static bool bdrv_exceed_bps_limits(BlockDriverState
*bs
, int nb_sectors
,
87 bool is_write
, double elapsed_time
, uint64_t *wait
);
88 static bool bdrv_exceed_iops_limits(BlockDriverState
*bs
, bool is_write
,
89 double elapsed_time
, uint64_t *wait
);
90 static bool bdrv_exceed_io_limits(BlockDriverState
*bs
, int nb_sectors
,
91 bool is_write
, int64_t *wait
);
93 static QTAILQ_HEAD(, BlockDriverState
) bdrv_states
=
94 QTAILQ_HEAD_INITIALIZER(bdrv_states
);
96 static QLIST_HEAD(, BlockDriver
) bdrv_drivers
=
97 QLIST_HEAD_INITIALIZER(bdrv_drivers
);
99 /* The device to use for VM snapshots */
100 static BlockDriverState
*bs_snapshots
;
102 /* If non-zero, use only whitelisted block drivers */
103 static int use_bdrv_whitelist
;
106 static int is_windows_drive_prefix(const char *filename
)
108 return (((filename
[0] >= 'a' && filename
[0] <= 'z') ||
109 (filename
[0] >= 'A' && filename
[0] <= 'Z')) &&
113 int is_windows_drive(const char *filename
)
115 if (is_windows_drive_prefix(filename
) &&
118 if (strstart(filename
, "\\\\.\\", NULL
) ||
119 strstart(filename
, "//./", NULL
))
125 /* throttling disk I/O limits */
126 void bdrv_io_limits_disable(BlockDriverState
*bs
)
128 bs
->io_limits_enabled
= false;
130 while (qemu_co_queue_next(&bs
->throttled_reqs
));
132 if (bs
->block_timer
) {
133 qemu_del_timer(bs
->block_timer
);
134 qemu_free_timer(bs
->block_timer
);
135 bs
->block_timer
= NULL
;
141 memset(&bs
->io_base
, 0, sizeof(bs
->io_base
));
144 static void bdrv_block_timer(void *opaque
)
146 BlockDriverState
*bs
= opaque
;
148 qemu_co_queue_next(&bs
->throttled_reqs
);
151 void bdrv_io_limits_enable(BlockDriverState
*bs
)
153 qemu_co_queue_init(&bs
->throttled_reqs
);
154 bs
->block_timer
= qemu_new_timer_ns(vm_clock
, bdrv_block_timer
, bs
);
155 bs
->slice_time
= 5 * BLOCK_IO_SLICE_TIME
;
156 bs
->slice_start
= qemu_get_clock_ns(vm_clock
);
157 bs
->slice_end
= bs
->slice_start
+ bs
->slice_time
;
158 memset(&bs
->io_base
, 0, sizeof(bs
->io_base
));
159 bs
->io_limits_enabled
= true;
162 bool bdrv_io_limits_enabled(BlockDriverState
*bs
)
164 BlockIOLimit
*io_limits
= &bs
->io_limits
;
165 return io_limits
->bps
[BLOCK_IO_LIMIT_READ
]
166 || io_limits
->bps
[BLOCK_IO_LIMIT_WRITE
]
167 || io_limits
->bps
[BLOCK_IO_LIMIT_TOTAL
]
168 || io_limits
->iops
[BLOCK_IO_LIMIT_READ
]
169 || io_limits
->iops
[BLOCK_IO_LIMIT_WRITE
]
170 || io_limits
->iops
[BLOCK_IO_LIMIT_TOTAL
];
173 static void bdrv_io_limits_intercept(BlockDriverState
*bs
,
174 bool is_write
, int nb_sectors
)
176 int64_t wait_time
= -1;
178 if (!qemu_co_queue_empty(&bs
->throttled_reqs
)) {
179 qemu_co_queue_wait(&bs
->throttled_reqs
);
182 /* In fact, we hope to keep each request's timing, in FIFO mode. The next
183 * throttled requests will not be dequeued until the current request is
184 * allowed to be serviced. So if the current request still exceeds the
185 * limits, it will be inserted to the head. All requests followed it will
186 * be still in throttled_reqs queue.
189 while (bdrv_exceed_io_limits(bs
, nb_sectors
, is_write
, &wait_time
)) {
190 qemu_mod_timer(bs
->block_timer
,
191 wait_time
+ qemu_get_clock_ns(vm_clock
));
192 qemu_co_queue_wait_insert_head(&bs
->throttled_reqs
);
195 qemu_co_queue_next(&bs
->throttled_reqs
);
198 /* check if the path starts with "<protocol>:" */
199 static int path_has_protocol(const char *path
)
204 if (is_windows_drive(path
) ||
205 is_windows_drive_prefix(path
)) {
208 p
= path
+ strcspn(path
, ":/\\");
210 p
= path
+ strcspn(path
, ":/");
216 int path_is_absolute(const char *path
)
219 /* specific case for names like: "\\.\d:" */
220 if (is_windows_drive(path
) || is_windows_drive_prefix(path
)) {
223 return (*path
== '/' || *path
== '\\');
225 return (*path
== '/');
229 /* if filename is absolute, just copy it to dest. Otherwise, build a
230 path to it by considering it is relative to base_path. URL are
232 void path_combine(char *dest
, int dest_size
,
233 const char *base_path
,
234 const char *filename
)
241 if (path_is_absolute(filename
)) {
242 pstrcpy(dest
, dest_size
, filename
);
244 p
= strchr(base_path
, ':');
249 p1
= strrchr(base_path
, '/');
253 p2
= strrchr(base_path
, '\\');
265 if (len
> dest_size
- 1)
267 memcpy(dest
, base_path
, len
);
269 pstrcat(dest
, dest_size
, filename
);
273 void bdrv_get_full_backing_filename(BlockDriverState
*bs
, char *dest
, size_t sz
)
275 if (bs
->backing_file
[0] == '\0' || path_has_protocol(bs
->backing_file
)) {
276 pstrcpy(dest
, sz
, bs
->backing_file
);
278 path_combine(dest
, sz
, bs
->filename
, bs
->backing_file
);
282 void bdrv_register(BlockDriver
*bdrv
)
284 /* Block drivers without coroutine functions need emulation */
285 if (!bdrv
->bdrv_co_readv
) {
286 bdrv
->bdrv_co_readv
= bdrv_co_readv_em
;
287 bdrv
->bdrv_co_writev
= bdrv_co_writev_em
;
289 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
290 * the block driver lacks aio we need to emulate that too.
292 if (!bdrv
->bdrv_aio_readv
) {
293 /* add AIO emulation layer */
294 bdrv
->bdrv_aio_readv
= bdrv_aio_readv_em
;
295 bdrv
->bdrv_aio_writev
= bdrv_aio_writev_em
;
299 QLIST_INSERT_HEAD(&bdrv_drivers
, bdrv
, list
);
302 /* create a new block device (by default it is empty) */
303 BlockDriverState
*bdrv_new(const char *device_name
)
305 BlockDriverState
*bs
;
307 bs
= g_malloc0(sizeof(BlockDriverState
));
308 pstrcpy(bs
->device_name
, sizeof(bs
->device_name
), device_name
);
309 if (device_name
[0] != '\0') {
310 QTAILQ_INSERT_TAIL(&bdrv_states
, bs
, list
);
312 bdrv_iostatus_disable(bs
);
316 BlockDriver
*bdrv_find_format(const char *format_name
)
319 QLIST_FOREACH(drv1
, &bdrv_drivers
, list
) {
320 if (!strcmp(drv1
->format_name
, format_name
)) {
327 static int bdrv_is_whitelisted(BlockDriver
*drv
)
329 static const char *whitelist
[] = {
330 CONFIG_BDRV_WHITELIST
335 return 1; /* no whitelist, anything goes */
337 for (p
= whitelist
; *p
; p
++) {
338 if (!strcmp(drv
->format_name
, *p
)) {
345 BlockDriver
*bdrv_find_whitelisted_format(const char *format_name
)
347 BlockDriver
*drv
= bdrv_find_format(format_name
);
348 return drv
&& bdrv_is_whitelisted(drv
) ? drv
: NULL
;
351 typedef struct CreateCo
{
354 QEMUOptionParameter
*options
;
358 static void coroutine_fn
bdrv_create_co_entry(void *opaque
)
360 CreateCo
*cco
= opaque
;
363 cco
->ret
= cco
->drv
->bdrv_create(cco
->filename
, cco
->options
);
366 int bdrv_create(BlockDriver
*drv
, const char* filename
,
367 QEMUOptionParameter
*options
)
374 .filename
= g_strdup(filename
),
379 if (!drv
->bdrv_create
) {
383 if (qemu_in_coroutine()) {
384 /* Fast-path if already in coroutine context */
385 bdrv_create_co_entry(&cco
);
387 co
= qemu_coroutine_create(bdrv_create_co_entry
);
388 qemu_coroutine_enter(co
, &cco
);
389 while (cco
.ret
== NOT_DONE
) {
395 g_free(cco
.filename
);
400 int bdrv_create_file(const char* filename
, QEMUOptionParameter
*options
)
404 drv
= bdrv_find_protocol(filename
);
409 return bdrv_create(drv
, filename
, options
);
413 * Create a uniquely-named empty temporary file.
414 * Return 0 upon success, otherwise a negative errno value.
416 int get_tmp_filename(char *filename
, int size
)
419 char temp_dir
[MAX_PATH
];
420 /* GetTempFileName requires that its output buffer (4th param)
421 have length MAX_PATH or greater. */
422 assert(size
>= MAX_PATH
);
423 return (GetTempPath(MAX_PATH
, temp_dir
)
424 && GetTempFileName(temp_dir
, "qem", 0, filename
)
425 ? 0 : -GetLastError());
429 tmpdir
= getenv("TMPDIR");
432 if (snprintf(filename
, size
, "%s/vl.XXXXXX", tmpdir
) >= size
) {
435 fd
= mkstemp(filename
);
439 if (close(fd
) != 0) {
448 * Detect host devices. By convention, /dev/cdrom[N] is always
449 * recognized as a host CDROM.
451 static BlockDriver
*find_hdev_driver(const char *filename
)
453 int score_max
= 0, score
;
454 BlockDriver
*drv
= NULL
, *d
;
456 QLIST_FOREACH(d
, &bdrv_drivers
, list
) {
457 if (d
->bdrv_probe_device
) {
458 score
= d
->bdrv_probe_device(filename
);
459 if (score
> score_max
) {
469 BlockDriver
*bdrv_find_protocol(const char *filename
)
476 /* TODO Drivers without bdrv_file_open must be specified explicitly */
479 * XXX(hch): we really should not let host device detection
480 * override an explicit protocol specification, but moving this
481 * later breaks access to device names with colons in them.
482 * Thanks to the brain-dead persistent naming schemes on udev-
483 * based Linux systems those actually are quite common.
485 drv1
= find_hdev_driver(filename
);
490 if (!path_has_protocol(filename
)) {
491 return bdrv_find_format("file");
493 p
= strchr(filename
, ':');
496 if (len
> sizeof(protocol
) - 1)
497 len
= sizeof(protocol
) - 1;
498 memcpy(protocol
, filename
, len
);
499 protocol
[len
] = '\0';
500 QLIST_FOREACH(drv1
, &bdrv_drivers
, list
) {
501 if (drv1
->protocol_name
&&
502 !strcmp(drv1
->protocol_name
, protocol
)) {
509 static int find_image_format(const char *filename
, BlockDriver
**pdrv
)
511 int ret
, score
, score_max
;
512 BlockDriver
*drv1
, *drv
;
514 BlockDriverState
*bs
;
516 ret
= bdrv_file_open(&bs
, filename
, 0);
522 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
523 if (bs
->sg
|| !bdrv_is_inserted(bs
)) {
525 drv
= bdrv_find_format("raw");
533 ret
= bdrv_pread(bs
, 0, buf
, sizeof(buf
));
542 QLIST_FOREACH(drv1
, &bdrv_drivers
, list
) {
543 if (drv1
->bdrv_probe
) {
544 score
= drv1
->bdrv_probe(buf
, ret
, filename
);
545 if (score
> score_max
) {
559 * Set the current 'total_sectors' value
561 static int refresh_total_sectors(BlockDriverState
*bs
, int64_t hint
)
563 BlockDriver
*drv
= bs
->drv
;
565 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
569 /* query actual device if possible, otherwise just trust the hint */
570 if (drv
->bdrv_getlength
) {
571 int64_t length
= drv
->bdrv_getlength(bs
);
575 hint
= length
>> BDRV_SECTOR_BITS
;
578 bs
->total_sectors
= hint
;
583 * Set open flags for a given cache mode
585 * Return 0 on success, -1 if the cache mode was invalid.
587 int bdrv_parse_cache_flags(const char *mode
, int *flags
)
589 *flags
&= ~BDRV_O_CACHE_MASK
;
591 if (!strcmp(mode
, "off") || !strcmp(mode
, "none")) {
592 *flags
|= BDRV_O_NOCACHE
| BDRV_O_CACHE_WB
;
593 } else if (!strcmp(mode
, "directsync")) {
594 *flags
|= BDRV_O_NOCACHE
;
595 } else if (!strcmp(mode
, "writeback")) {
596 *flags
|= BDRV_O_CACHE_WB
;
597 } else if (!strcmp(mode
, "unsafe")) {
598 *flags
|= BDRV_O_CACHE_WB
;
599 *flags
|= BDRV_O_NO_FLUSH
;
600 } else if (!strcmp(mode
, "writethrough")) {
601 /* this is the default */
610 * The copy-on-read flag is actually a reference count so multiple users may
611 * use the feature without worrying about clobbering its previous state.
612 * Copy-on-read stays enabled until all users have called to disable it.
614 void bdrv_enable_copy_on_read(BlockDriverState
*bs
)
619 void bdrv_disable_copy_on_read(BlockDriverState
*bs
)
621 assert(bs
->copy_on_read
> 0);
626 * Common part for opening disk images and files
628 static int bdrv_open_common(BlockDriverState
*bs
, const char *filename
,
629 int flags
, BlockDriver
*drv
)
634 assert(bs
->file
== NULL
);
636 trace_bdrv_open_common(bs
, filename
, flags
, drv
->format_name
);
638 bs
->open_flags
= flags
;
639 bs
->buffer_alignment
= 512;
641 assert(bs
->copy_on_read
== 0); /* bdrv_new() and bdrv_close() make it so */
642 if ((flags
& BDRV_O_RDWR
) && (flags
& BDRV_O_COPY_ON_READ
)) {
643 bdrv_enable_copy_on_read(bs
);
646 pstrcpy(bs
->filename
, sizeof(bs
->filename
), filename
);
648 if (use_bdrv_whitelist
&& !bdrv_is_whitelisted(drv
)) {
653 bs
->opaque
= g_malloc0(drv
->instance_size
);
655 bs
->enable_write_cache
= !!(flags
& BDRV_O_CACHE_WB
);
656 open_flags
= flags
| BDRV_O_CACHE_WB
;
659 * Clear flags that are internal to the block layer before opening the
662 open_flags
&= ~(BDRV_O_SNAPSHOT
| BDRV_O_NO_BACKING
);
665 * Snapshots should be writable.
667 if (bs
->is_temporary
) {
668 open_flags
|= BDRV_O_RDWR
;
671 bs
->read_only
= !(open_flags
& BDRV_O_RDWR
);
673 /* Open the image, either directly or using a protocol */
674 if (drv
->bdrv_file_open
) {
675 ret
= drv
->bdrv_file_open(bs
, filename
, open_flags
);
677 ret
= bdrv_file_open(&bs
->file
, filename
, open_flags
);
679 ret
= drv
->bdrv_open(bs
, open_flags
);
687 ret
= refresh_total_sectors(bs
, bs
->total_sectors
);
693 if (bs
->is_temporary
) {
701 bdrv_delete(bs
->file
);
711 * Opens a file using a protocol (file, host_device, nbd, ...)
713 int bdrv_file_open(BlockDriverState
**pbs
, const char *filename
, int flags
)
715 BlockDriverState
*bs
;
719 drv
= bdrv_find_protocol(filename
);
725 ret
= bdrv_open_common(bs
, filename
, flags
, drv
);
736 * Opens a disk image (raw, qcow2, vmdk, ...)
738 int bdrv_open(BlockDriverState
*bs
, const char *filename
, int flags
,
742 char tmp_filename
[PATH_MAX
];
744 if (flags
& BDRV_O_SNAPSHOT
) {
745 BlockDriverState
*bs1
;
748 BlockDriver
*bdrv_qcow2
;
749 QEMUOptionParameter
*options
;
750 char backing_filename
[PATH_MAX
];
752 /* if snapshot, we create a temporary backing file and open it
753 instead of opening 'filename' directly */
755 /* if there is a backing file, use it */
757 ret
= bdrv_open(bs1
, filename
, 0, drv
);
762 total_size
= bdrv_getlength(bs1
) & BDRV_SECTOR_MASK
;
764 if (bs1
->drv
&& bs1
->drv
->protocol_name
)
769 ret
= get_tmp_filename(tmp_filename
, sizeof(tmp_filename
));
774 /* Real path is meaningless for protocols */
776 snprintf(backing_filename
, sizeof(backing_filename
),
778 else if (!realpath(filename
, backing_filename
))
781 bdrv_qcow2
= bdrv_find_format("qcow2");
782 options
= parse_option_parameters("", bdrv_qcow2
->create_options
, NULL
);
784 set_option_parameter_int(options
, BLOCK_OPT_SIZE
, total_size
);
785 set_option_parameter(options
, BLOCK_OPT_BACKING_FILE
, backing_filename
);
787 set_option_parameter(options
, BLOCK_OPT_BACKING_FMT
,
791 ret
= bdrv_create(bdrv_qcow2
, tmp_filename
, options
);
792 free_option_parameters(options
);
797 filename
= tmp_filename
;
799 bs
->is_temporary
= 1;
802 /* Find the right image format driver */
804 ret
= find_image_format(filename
, &drv
);
808 goto unlink_and_fail
;
811 if (flags
& BDRV_O_RDWR
) {
812 flags
|= BDRV_O_ALLOW_RDWR
;
815 bs
->keep_read_only
= !(flags
& BDRV_O_ALLOW_RDWR
);
818 ret
= bdrv_open_common(bs
, filename
, flags
, drv
);
820 goto unlink_and_fail
;
823 /* If there is a backing file, use it */
824 if ((flags
& BDRV_O_NO_BACKING
) == 0 && bs
->backing_file
[0] != '\0') {
825 char backing_filename
[PATH_MAX
];
827 BlockDriver
*back_drv
= NULL
;
829 bs
->backing_hd
= bdrv_new("");
830 bdrv_get_full_backing_filename(bs
, backing_filename
,
831 sizeof(backing_filename
));
833 if (bs
->backing_format
[0] != '\0') {
834 back_drv
= bdrv_find_format(bs
->backing_format
);
837 /* backing files always opened read-only */
839 flags
& ~(BDRV_O_RDWR
| BDRV_O_SNAPSHOT
| BDRV_O_NO_BACKING
);
841 ret
= bdrv_open(bs
->backing_hd
, backing_filename
, back_flags
, back_drv
);
848 if (!bdrv_key_required(bs
)) {
849 bdrv_dev_change_media_cb(bs
, true);
852 /* throttling disk I/O limits */
853 if (bs
->io_limits_enabled
) {
854 bdrv_io_limits_enable(bs
);
860 if (bs
->is_temporary
) {
866 typedef struct BlockReopenQueueEntry
{
868 BDRVReopenState state
;
869 QSIMPLEQ_ENTRY(BlockReopenQueueEntry
) entry
;
870 } BlockReopenQueueEntry
;
873 * Adds a BlockDriverState to a simple queue for an atomic, transactional
874 * reopen of multiple devices.
876 * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
877 * already performed, or alternatively may be NULL a new BlockReopenQueue will
878 * be created and initialized. This newly created BlockReopenQueue should be
879 * passed back in for subsequent calls that are intended to be of the same
882 * bs is the BlockDriverState to add to the reopen queue.
884 * flags contains the open flags for the associated bs
886 * returns a pointer to bs_queue, which is either the newly allocated
887 * bs_queue, or the existing bs_queue being used.
890 BlockReopenQueue
*bdrv_reopen_queue(BlockReopenQueue
*bs_queue
,
891 BlockDriverState
*bs
, int flags
)
895 BlockReopenQueueEntry
*bs_entry
;
896 if (bs_queue
== NULL
) {
897 bs_queue
= g_new0(BlockReopenQueue
, 1);
898 QSIMPLEQ_INIT(bs_queue
);
902 bdrv_reopen_queue(bs_queue
, bs
->file
, flags
);
905 bs_entry
= g_new0(BlockReopenQueueEntry
, 1);
906 QSIMPLEQ_INSERT_TAIL(bs_queue
, bs_entry
, entry
);
908 bs_entry
->state
.bs
= bs
;
909 bs_entry
->state
.flags
= flags
;
915 * Reopen multiple BlockDriverStates atomically & transactionally.
917 * The queue passed in (bs_queue) must have been built up previous
918 * via bdrv_reopen_queue().
920 * Reopens all BDS specified in the queue, with the appropriate
921 * flags. All devices are prepared for reopen, and failure of any
922 * device will cause all device changes to be abandonded, and intermediate
925 * If all devices prepare successfully, then the changes are committed
929 int bdrv_reopen_multiple(BlockReopenQueue
*bs_queue
, Error
**errp
)
932 BlockReopenQueueEntry
*bs_entry
, *next
;
933 Error
*local_err
= NULL
;
935 assert(bs_queue
!= NULL
);
939 QSIMPLEQ_FOREACH(bs_entry
, bs_queue
, entry
) {
940 if (bdrv_reopen_prepare(&bs_entry
->state
, bs_queue
, &local_err
)) {
941 error_propagate(errp
, local_err
);
944 bs_entry
->prepared
= true;
947 /* If we reach this point, we have success and just need to apply the
950 QSIMPLEQ_FOREACH(bs_entry
, bs_queue
, entry
) {
951 bdrv_reopen_commit(&bs_entry
->state
);
957 QSIMPLEQ_FOREACH_SAFE(bs_entry
, bs_queue
, entry
, next
) {
958 if (ret
&& bs_entry
->prepared
) {
959 bdrv_reopen_abort(&bs_entry
->state
);
968 /* Reopen a single BlockDriverState with the specified flags. */
969 int bdrv_reopen(BlockDriverState
*bs
, int bdrv_flags
, Error
**errp
)
972 Error
*local_err
= NULL
;
973 BlockReopenQueue
*queue
= bdrv_reopen_queue(NULL
, bs
, bdrv_flags
);
975 ret
= bdrv_reopen_multiple(queue
, &local_err
);
976 if (local_err
!= NULL
) {
977 error_propagate(errp
, local_err
);
984 * Prepares a BlockDriverState for reopen. All changes are staged in the
985 * 'opaque' field of the BDRVReopenState, which is used and allocated by
986 * the block driver layer .bdrv_reopen_prepare()
988 * bs is the BlockDriverState to reopen
989 * flags are the new open flags
990 * queue is the reopen queue
992 * Returns 0 on success, non-zero on error. On error errp will be set
995 * On failure, bdrv_reopen_abort() will be called to clean up any data.
996 * It is the responsibility of the caller to then call the abort() or
997 * commit() for any other BDS that have been left in a prepare() state
1000 int bdrv_reopen_prepare(BDRVReopenState
*reopen_state
, BlockReopenQueue
*queue
,
1004 Error
*local_err
= NULL
;
1007 assert(reopen_state
!= NULL
);
1008 assert(reopen_state
->bs
->drv
!= NULL
);
1009 drv
= reopen_state
->bs
->drv
;
1011 /* if we are to stay read-only, do not allow permission change
1013 if (!(reopen_state
->bs
->open_flags
& BDRV_O_ALLOW_RDWR
) &&
1014 reopen_state
->flags
& BDRV_O_RDWR
) {
1015 error_set(errp
, QERR_DEVICE_IS_READ_ONLY
,
1016 reopen_state
->bs
->device_name
);
1021 ret
= bdrv_flush(reopen_state
->bs
);
1023 error_set(errp
, ERROR_CLASS_GENERIC_ERROR
, "Error (%s) flushing drive",
1028 if (drv
->bdrv_reopen_prepare
) {
1029 ret
= drv
->bdrv_reopen_prepare(reopen_state
, queue
, &local_err
);
1031 if (local_err
!= NULL
) {
1032 error_propagate(errp
, local_err
);
1034 error_set(errp
, QERR_OPEN_FILE_FAILED
,
1035 reopen_state
->bs
->filename
);
1040 /* It is currently mandatory to have a bdrv_reopen_prepare()
1041 * handler for each supported drv. */
1042 error_set(errp
, QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED
,
1043 drv
->format_name
, reopen_state
->bs
->device_name
,
1044 "reopening of file");
1056 * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
1057 * makes them final by swapping the staging BlockDriverState contents into
1058 * the active BlockDriverState contents.
1060 void bdrv_reopen_commit(BDRVReopenState
*reopen_state
)
1064 assert(reopen_state
!= NULL
);
1065 drv
= reopen_state
->bs
->drv
;
1066 assert(drv
!= NULL
);
1068 /* If there are any driver level actions to take */
1069 if (drv
->bdrv_reopen_commit
) {
1070 drv
->bdrv_reopen_commit(reopen_state
);
1073 /* set BDS specific flags now */
1074 reopen_state
->bs
->open_flags
= reopen_state
->flags
;
1075 reopen_state
->bs
->enable_write_cache
= !!(reopen_state
->flags
&
1077 reopen_state
->bs
->read_only
= !(reopen_state
->flags
& BDRV_O_RDWR
);
1081 * Abort the reopen, and delete and free the staged changes in
1084 void bdrv_reopen_abort(BDRVReopenState
*reopen_state
)
1088 assert(reopen_state
!= NULL
);
1089 drv
= reopen_state
->bs
->drv
;
1090 assert(drv
!= NULL
);
1092 if (drv
->bdrv_reopen_abort
) {
1093 drv
->bdrv_reopen_abort(reopen_state
);
1098 void bdrv_close(BlockDriverState
*bs
)
1103 block_job_cancel_sync(bs
->job
);
1107 if (bs
== bs_snapshots
) {
1108 bs_snapshots
= NULL
;
1110 if (bs
->backing_hd
) {
1111 bdrv_delete(bs
->backing_hd
);
1112 bs
->backing_hd
= NULL
;
1114 bs
->drv
->bdrv_close(bs
);
1117 if (bs
->is_temporary
) {
1118 unlink(bs
->filename
);
1123 bs
->copy_on_read
= 0;
1124 bs
->backing_file
[0] = '\0';
1125 bs
->backing_format
[0] = '\0';
1126 bs
->total_sectors
= 0;
1132 if (bs
->file
!= NULL
) {
1133 bdrv_delete(bs
->file
);
1138 bdrv_dev_change_media_cb(bs
, false);
1140 /*throttling disk I/O limits*/
1141 if (bs
->io_limits_enabled
) {
1142 bdrv_io_limits_disable(bs
);
1146 void bdrv_close_all(void)
1148 BlockDriverState
*bs
;
1150 QTAILQ_FOREACH(bs
, &bdrv_states
, list
) {
1156 * Wait for pending requests to complete across all BlockDriverStates
1158 * This function does not flush data to disk, use bdrv_flush_all() for that
1159 * after calling this function.
1161 * Note that completion of an asynchronous I/O operation can trigger any
1162 * number of other I/O operations on other devices---for example a coroutine
1163 * can be arbitrarily complex and a constant flow of I/O can come until the
1164 * coroutine is complete. Because of this, it is not possible to have a
1165 * function to drain a single device's I/O queue.
1167 void bdrv_drain_all(void)
1169 BlockDriverState
*bs
;
1173 busy
= qemu_aio_wait();
1175 /* FIXME: We do not have timer support here, so this is effectively
1178 QTAILQ_FOREACH(bs
, &bdrv_states
, list
) {
1179 if (!qemu_co_queue_empty(&bs
->throttled_reqs
)) {
1180 qemu_co_queue_restart_all(&bs
->throttled_reqs
);
1186 /* If requests are still pending there is a bug somewhere */
1187 QTAILQ_FOREACH(bs
, &bdrv_states
, list
) {
1188 assert(QLIST_EMPTY(&bs
->tracked_requests
));
1189 assert(qemu_co_queue_empty(&bs
->throttled_reqs
));
1193 /* make a BlockDriverState anonymous by removing from bdrv_state list.
1194 Also, NULL terminate the device_name to prevent double remove */
1195 void bdrv_make_anon(BlockDriverState
*bs
)
1197 if (bs
->device_name
[0] != '\0') {
1198 QTAILQ_REMOVE(&bdrv_states
, bs
, list
);
1200 bs
->device_name
[0] = '\0';
1203 static void bdrv_rebind(BlockDriverState
*bs
)
1205 if (bs
->drv
&& bs
->drv
->bdrv_rebind
) {
1206 bs
->drv
->bdrv_rebind(bs
);
1210 static void bdrv_move_feature_fields(BlockDriverState
*bs_dest
,
1211 BlockDriverState
*bs_src
)
1213 /* move some fields that need to stay attached to the device */
1214 bs_dest
->open_flags
= bs_src
->open_flags
;
1217 bs_dest
->dev_ops
= bs_src
->dev_ops
;
1218 bs_dest
->dev_opaque
= bs_src
->dev_opaque
;
1219 bs_dest
->dev
= bs_src
->dev
;
1220 bs_dest
->buffer_alignment
= bs_src
->buffer_alignment
;
1221 bs_dest
->copy_on_read
= bs_src
->copy_on_read
;
1223 bs_dest
->enable_write_cache
= bs_src
->enable_write_cache
;
1225 /* i/o timing parameters */
1226 bs_dest
->slice_time
= bs_src
->slice_time
;
1227 bs_dest
->slice_start
= bs_src
->slice_start
;
1228 bs_dest
->slice_end
= bs_src
->slice_end
;
1229 bs_dest
->io_limits
= bs_src
->io_limits
;
1230 bs_dest
->io_base
= bs_src
->io_base
;
1231 bs_dest
->throttled_reqs
= bs_src
->throttled_reqs
;
1232 bs_dest
->block_timer
= bs_src
->block_timer
;
1233 bs_dest
->io_limits_enabled
= bs_src
->io_limits_enabled
;
1236 bs_dest
->on_read_error
= bs_src
->on_read_error
;
1237 bs_dest
->on_write_error
= bs_src
->on_write_error
;
1240 bs_dest
->iostatus_enabled
= bs_src
->iostatus_enabled
;
1241 bs_dest
->iostatus
= bs_src
->iostatus
;
1244 bs_dest
->dirty_count
= bs_src
->dirty_count
;
1245 bs_dest
->dirty_bitmap
= bs_src
->dirty_bitmap
;
1248 bs_dest
->in_use
= bs_src
->in_use
;
1249 bs_dest
->job
= bs_src
->job
;
1251 /* keep the same entry in bdrv_states */
1252 pstrcpy(bs_dest
->device_name
, sizeof(bs_dest
->device_name
),
1253 bs_src
->device_name
);
1254 bs_dest
->list
= bs_src
->list
;
1258 * Swap bs contents for two image chains while they are live,
1259 * while keeping required fields on the BlockDriverState that is
1260 * actually attached to a device.
1262 * This will modify the BlockDriverState fields, and swap contents
1263 * between bs_new and bs_old. Both bs_new and bs_old are modified.
1265 * bs_new is required to be anonymous.
1267 * This function does not create any image files.
1269 void bdrv_swap(BlockDriverState
*bs_new
, BlockDriverState
*bs_old
)
1271 BlockDriverState tmp
;
1273 /* bs_new must be anonymous and shouldn't have anything fancy enabled */
1274 assert(bs_new
->device_name
[0] == '\0');
1275 assert(bs_new
->dirty_bitmap
== NULL
);
1276 assert(bs_new
->job
== NULL
);
1277 assert(bs_new
->dev
== NULL
);
1278 assert(bs_new
->in_use
== 0);
1279 assert(bs_new
->io_limits_enabled
== false);
1280 assert(bs_new
->block_timer
== NULL
);
1286 /* there are some fields that should not be swapped, move them back */
1287 bdrv_move_feature_fields(&tmp
, bs_old
);
1288 bdrv_move_feature_fields(bs_old
, bs_new
);
1289 bdrv_move_feature_fields(bs_new
, &tmp
);
1291 /* bs_new shouldn't be in bdrv_states even after the swap! */
1292 assert(bs_new
->device_name
[0] == '\0');
1294 /* Check a few fields that should remain attached to the device */
1295 assert(bs_new
->dev
== NULL
);
1296 assert(bs_new
->job
== NULL
);
1297 assert(bs_new
->in_use
== 0);
1298 assert(bs_new
->io_limits_enabled
== false);
1299 assert(bs_new
->block_timer
== NULL
);
1301 bdrv_rebind(bs_new
);
1302 bdrv_rebind(bs_old
);
1306 * Add new bs contents at the top of an image chain while the chain is
1307 * live, while keeping required fields on the top layer.
1309 * This will modify the BlockDriverState fields, and swap contents
1310 * between bs_new and bs_top. Both bs_new and bs_top are modified.
1312 * bs_new is required to be anonymous.
1314 * This function does not create any image files.
1316 void bdrv_append(BlockDriverState
*bs_new
, BlockDriverState
*bs_top
)
1318 bdrv_swap(bs_new
, bs_top
);
1320 /* The contents of 'tmp' will become bs_top, as we are
1321 * swapping bs_new and bs_top contents. */
1322 bs_top
->backing_hd
= bs_new
;
1323 bs_top
->open_flags
&= ~BDRV_O_NO_BACKING
;
1324 pstrcpy(bs_top
->backing_file
, sizeof(bs_top
->backing_file
),
1326 pstrcpy(bs_top
->backing_format
, sizeof(bs_top
->backing_format
),
1327 bs_new
->drv
? bs_new
->drv
->format_name
: "");
1330 void bdrv_delete(BlockDriverState
*bs
)
1334 assert(!bs
->in_use
);
1336 /* remove from list, if necessary */
1341 assert(bs
!= bs_snapshots
);
1345 int bdrv_attach_dev(BlockDriverState
*bs
, void *dev
)
1346 /* TODO change to DeviceState *dev when all users are qdevified */
1352 bdrv_iostatus_reset(bs
);
1356 /* TODO qdevified devices don't use this, remove when devices are qdevified */
1357 void bdrv_attach_dev_nofail(BlockDriverState
*bs
, void *dev
)
1359 if (bdrv_attach_dev(bs
, dev
) < 0) {
1364 void bdrv_detach_dev(BlockDriverState
*bs
, void *dev
)
1365 /* TODO change to DeviceState *dev when all users are qdevified */
1367 assert(bs
->dev
== dev
);
1370 bs
->dev_opaque
= NULL
;
1371 bs
->buffer_alignment
= 512;
1374 /* TODO change to return DeviceState * when all users are qdevified */
1375 void *bdrv_get_attached_dev(BlockDriverState
*bs
)
1380 void bdrv_set_dev_ops(BlockDriverState
*bs
, const BlockDevOps
*ops
,
1384 bs
->dev_opaque
= opaque
;
1385 if (bdrv_dev_has_removable_media(bs
) && bs
== bs_snapshots
) {
1386 bs_snapshots
= NULL
;
1390 void bdrv_emit_qmp_error_event(const BlockDriverState
*bdrv
,
1391 BlockQMPEventAction action
, int is_read
)
1394 const char *action_str
;
1397 case BDRV_ACTION_REPORT
:
1398 action_str
= "report";
1400 case BDRV_ACTION_IGNORE
:
1401 action_str
= "ignore";
1403 case BDRV_ACTION_STOP
:
1404 action_str
= "stop";
1410 data
= qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
1413 is_read
? "read" : "write");
1414 monitor_protocol_event(QEVENT_BLOCK_IO_ERROR
, data
);
1416 qobject_decref(data
);
1419 static void bdrv_emit_qmp_eject_event(BlockDriverState
*bs
, bool ejected
)
1423 data
= qobject_from_jsonf("{ 'device': %s, 'tray-open': %i }",
1424 bdrv_get_device_name(bs
), ejected
);
1425 monitor_protocol_event(QEVENT_DEVICE_TRAY_MOVED
, data
);
1427 qobject_decref(data
);
1430 static void bdrv_dev_change_media_cb(BlockDriverState
*bs
, bool load
)
1432 if (bs
->dev_ops
&& bs
->dev_ops
->change_media_cb
) {
1433 bool tray_was_closed
= !bdrv_dev_is_tray_open(bs
);
1434 bs
->dev_ops
->change_media_cb(bs
->dev_opaque
, load
);
1435 if (tray_was_closed
) {
1437 bdrv_emit_qmp_eject_event(bs
, true);
1441 bdrv_emit_qmp_eject_event(bs
, false);
1446 bool bdrv_dev_has_removable_media(BlockDriverState
*bs
)
1448 return !bs
->dev
|| (bs
->dev_ops
&& bs
->dev_ops
->change_media_cb
);
1451 void bdrv_dev_eject_request(BlockDriverState
*bs
, bool force
)
1453 if (bs
->dev_ops
&& bs
->dev_ops
->eject_request_cb
) {
1454 bs
->dev_ops
->eject_request_cb(bs
->dev_opaque
, force
);
1458 bool bdrv_dev_is_tray_open(BlockDriverState
*bs
)
1460 if (bs
->dev_ops
&& bs
->dev_ops
->is_tray_open
) {
1461 return bs
->dev_ops
->is_tray_open(bs
->dev_opaque
);
1466 static void bdrv_dev_resize_cb(BlockDriverState
*bs
)
1468 if (bs
->dev_ops
&& bs
->dev_ops
->resize_cb
) {
1469 bs
->dev_ops
->resize_cb(bs
->dev_opaque
);
1473 bool bdrv_dev_is_medium_locked(BlockDriverState
*bs
)
1475 if (bs
->dev_ops
&& bs
->dev_ops
->is_medium_locked
) {
1476 return bs
->dev_ops
->is_medium_locked(bs
->dev_opaque
);
1482 * Run consistency checks on an image
1484 * Returns 0 if the check could be completed (it doesn't mean that the image is
1485 * free of errors) or -errno when an internal error occurred. The results of the
1486 * check are stored in res.
1488 int bdrv_check(BlockDriverState
*bs
, BdrvCheckResult
*res
, BdrvCheckMode fix
)
1490 if (bs
->drv
->bdrv_check
== NULL
) {
1494 memset(res
, 0, sizeof(*res
));
1495 return bs
->drv
->bdrv_check(bs
, res
, fix
);
1498 #define COMMIT_BUF_SECTORS 2048
1500 /* commit COW file into the raw image */
1501 int bdrv_commit(BlockDriverState
*bs
)
1503 BlockDriver
*drv
= bs
->drv
;
1504 BlockDriver
*backing_drv
;
1505 int64_t sector
, total_sectors
;
1506 int n
, ro
, open_flags
;
1507 int ret
= 0, rw_ret
= 0;
1509 char filename
[1024];
1510 BlockDriverState
*bs_rw
, *bs_ro
;
1515 if (!bs
->backing_hd
) {
1519 if (bs
->backing_hd
->keep_read_only
) {
1523 if (bdrv_in_use(bs
) || bdrv_in_use(bs
->backing_hd
)) {
1527 backing_drv
= bs
->backing_hd
->drv
;
1528 ro
= bs
->backing_hd
->read_only
;
1529 strncpy(filename
, bs
->backing_hd
->filename
, sizeof(filename
));
1530 open_flags
= bs
->backing_hd
->open_flags
;
1534 bdrv_delete(bs
->backing_hd
);
1535 bs
->backing_hd
= NULL
;
1536 bs_rw
= bdrv_new("");
1537 rw_ret
= bdrv_open(bs_rw
, filename
, open_flags
| BDRV_O_RDWR
,
1541 /* try to re-open read-only */
1542 bs_ro
= bdrv_new("");
1543 ret
= bdrv_open(bs_ro
, filename
, open_flags
& ~BDRV_O_RDWR
,
1547 /* drive not functional anymore */
1551 bs
->backing_hd
= bs_ro
;
1554 bs
->backing_hd
= bs_rw
;
1557 total_sectors
= bdrv_getlength(bs
) >> BDRV_SECTOR_BITS
;
1558 buf
= g_malloc(COMMIT_BUF_SECTORS
* BDRV_SECTOR_SIZE
);
1560 for (sector
= 0; sector
< total_sectors
; sector
+= n
) {
1561 if (bdrv_is_allocated(bs
, sector
, COMMIT_BUF_SECTORS
, &n
)) {
1563 if (bdrv_read(bs
, sector
, buf
, n
) != 0) {
1568 if (bdrv_write(bs
->backing_hd
, sector
, buf
, n
) != 0) {
1575 if (drv
->bdrv_make_empty
) {
1576 ret
= drv
->bdrv_make_empty(bs
);
1581 * Make sure all data we wrote to the backing device is actually
1585 bdrv_flush(bs
->backing_hd
);
1592 bdrv_delete(bs
->backing_hd
);
1593 bs
->backing_hd
= NULL
;
1594 bs_ro
= bdrv_new("");
1595 ret
= bdrv_open(bs_ro
, filename
, open_flags
& ~BDRV_O_RDWR
,
1599 /* drive not functional anymore */
1603 bs
->backing_hd
= bs_ro
;
1604 bs
->backing_hd
->keep_read_only
= 0;
1610 int bdrv_commit_all(void)
1612 BlockDriverState
*bs
;
1614 QTAILQ_FOREACH(bs
, &bdrv_states
, list
) {
1615 int ret
= bdrv_commit(bs
);
1623 struct BdrvTrackedRequest
{
1624 BlockDriverState
*bs
;
1628 QLIST_ENTRY(BdrvTrackedRequest
) list
;
1629 Coroutine
*co
; /* owner, used for deadlock detection */
1630 CoQueue wait_queue
; /* coroutines blocked on this request */
1634 * Remove an active request from the tracked requests list
1636 * This function should be called when a tracked request is completing.
1638 static void tracked_request_end(BdrvTrackedRequest
*req
)
1640 QLIST_REMOVE(req
, list
);
1641 qemu_co_queue_restart_all(&req
->wait_queue
);
1645 * Add an active request to the tracked requests list
1647 static void tracked_request_begin(BdrvTrackedRequest
*req
,
1648 BlockDriverState
*bs
,
1650 int nb_sectors
, bool is_write
)
1652 *req
= (BdrvTrackedRequest
){
1654 .sector_num
= sector_num
,
1655 .nb_sectors
= nb_sectors
,
1656 .is_write
= is_write
,
1657 .co
= qemu_coroutine_self(),
1660 qemu_co_queue_init(&req
->wait_queue
);
1662 QLIST_INSERT_HEAD(&bs
->tracked_requests
, req
, list
);
1666 * Round a region to cluster boundaries
1668 static void round_to_clusters(BlockDriverState
*bs
,
1669 int64_t sector_num
, int nb_sectors
,
1670 int64_t *cluster_sector_num
,
1671 int *cluster_nb_sectors
)
1673 BlockDriverInfo bdi
;
1675 if (bdrv_get_info(bs
, &bdi
) < 0 || bdi
.cluster_size
== 0) {
1676 *cluster_sector_num
= sector_num
;
1677 *cluster_nb_sectors
= nb_sectors
;
1679 int64_t c
= bdi
.cluster_size
/ BDRV_SECTOR_SIZE
;
1680 *cluster_sector_num
= QEMU_ALIGN_DOWN(sector_num
, c
);
1681 *cluster_nb_sectors
= QEMU_ALIGN_UP(sector_num
- *cluster_sector_num
+
1686 static bool tracked_request_overlaps(BdrvTrackedRequest
*req
,
1687 int64_t sector_num
, int nb_sectors
) {
1689 if (sector_num
>= req
->sector_num
+ req
->nb_sectors
) {
1693 if (req
->sector_num
>= sector_num
+ nb_sectors
) {
1699 static void coroutine_fn
wait_for_overlapping_requests(BlockDriverState
*bs
,
1700 int64_t sector_num
, int nb_sectors
)
1702 BdrvTrackedRequest
*req
;
1703 int64_t cluster_sector_num
;
1704 int cluster_nb_sectors
;
1707 /* If we touch the same cluster it counts as an overlap. This guarantees
1708 * that allocating writes will be serialized and not race with each other
1709 * for the same cluster. For example, in copy-on-read it ensures that the
1710 * CoR read and write operations are atomic and guest writes cannot
1711 * interleave between them.
1713 round_to_clusters(bs
, sector_num
, nb_sectors
,
1714 &cluster_sector_num
, &cluster_nb_sectors
);
1718 QLIST_FOREACH(req
, &bs
->tracked_requests
, list
) {
1719 if (tracked_request_overlaps(req
, cluster_sector_num
,
1720 cluster_nb_sectors
)) {
1721 /* Hitting this means there was a reentrant request, for
1722 * example, a block driver issuing nested requests. This must
1723 * never happen since it means deadlock.
1725 assert(qemu_coroutine_self() != req
->co
);
1727 qemu_co_queue_wait(&req
->wait_queue
);
1738 * -EINVAL - backing format specified, but no file
1739 * -ENOSPC - can't update the backing file because no space is left in the
1741 * -ENOTSUP - format driver doesn't support changing the backing file
1743 int bdrv_change_backing_file(BlockDriverState
*bs
,
1744 const char *backing_file
, const char *backing_fmt
)
1746 BlockDriver
*drv
= bs
->drv
;
1749 /* Backing file format doesn't make sense without a backing file */
1750 if (backing_fmt
&& !backing_file
) {
1754 if (drv
->bdrv_change_backing_file
!= NULL
) {
1755 ret
= drv
->bdrv_change_backing_file(bs
, backing_file
, backing_fmt
);
1761 pstrcpy(bs
->backing_file
, sizeof(bs
->backing_file
), backing_file
?: "");
1762 pstrcpy(bs
->backing_format
, sizeof(bs
->backing_format
), backing_fmt
?: "");
1767 static int bdrv_check_byte_request(BlockDriverState
*bs
, int64_t offset
,
1772 if (!bdrv_is_inserted(bs
))
1778 len
= bdrv_getlength(bs
);
1783 if ((offset
> len
) || (len
- offset
< size
))
1789 static int bdrv_check_request(BlockDriverState
*bs
, int64_t sector_num
,
1792 return bdrv_check_byte_request(bs
, sector_num
* BDRV_SECTOR_SIZE
,
1793 nb_sectors
* BDRV_SECTOR_SIZE
);
1796 typedef struct RwCo
{
1797 BlockDriverState
*bs
;
1805 static void coroutine_fn
bdrv_rw_co_entry(void *opaque
)
1807 RwCo
*rwco
= opaque
;
1809 if (!rwco
->is_write
) {
1810 rwco
->ret
= bdrv_co_do_readv(rwco
->bs
, rwco
->sector_num
,
1811 rwco
->nb_sectors
, rwco
->qiov
, 0);
1813 rwco
->ret
= bdrv_co_do_writev(rwco
->bs
, rwco
->sector_num
,
1814 rwco
->nb_sectors
, rwco
->qiov
, 0);
1819 * Process a synchronous request using coroutines
1821 static int bdrv_rw_co(BlockDriverState
*bs
, int64_t sector_num
, uint8_t *buf
,
1822 int nb_sectors
, bool is_write
)
1825 struct iovec iov
= {
1826 .iov_base
= (void *)buf
,
1827 .iov_len
= nb_sectors
* BDRV_SECTOR_SIZE
,
1832 .sector_num
= sector_num
,
1833 .nb_sectors
= nb_sectors
,
1835 .is_write
= is_write
,
1839 qemu_iovec_init_external(&qiov
, &iov
, 1);
1842 * In sync call context, when the vcpu is blocked, this throttling timer
1843 * will not fire; so the I/O throttling function has to be disabled here
1844 * if it has been enabled.
1846 if (bs
->io_limits_enabled
) {
1847 fprintf(stderr
, "Disabling I/O throttling on '%s' due "
1848 "to synchronous I/O.\n", bdrv_get_device_name(bs
));
1849 bdrv_io_limits_disable(bs
);
1852 if (qemu_in_coroutine()) {
1853 /* Fast-path if already in coroutine context */
1854 bdrv_rw_co_entry(&rwco
);
1856 co
= qemu_coroutine_create(bdrv_rw_co_entry
);
1857 qemu_coroutine_enter(co
, &rwco
);
1858 while (rwco
.ret
== NOT_DONE
) {
1865 /* return < 0 if error. See bdrv_write() for the return codes */
1866 int bdrv_read(BlockDriverState
*bs
, int64_t sector_num
,
1867 uint8_t *buf
, int nb_sectors
)
1869 return bdrv_rw_co(bs
, sector_num
, buf
, nb_sectors
, false);
1872 /* Just like bdrv_read(), but with I/O throttling temporarily disabled */
1873 int bdrv_read_unthrottled(BlockDriverState
*bs
, int64_t sector_num
,
1874 uint8_t *buf
, int nb_sectors
)
1879 enabled
= bs
->io_limits_enabled
;
1880 bs
->io_limits_enabled
= false;
1881 ret
= bdrv_read(bs
, 0, buf
, 1);
1882 bs
->io_limits_enabled
= enabled
;
1886 #define BITS_PER_LONG (sizeof(unsigned long) * 8)
1888 static void set_dirty_bitmap(BlockDriverState
*bs
, int64_t sector_num
,
1889 int nb_sectors
, int dirty
)
1892 unsigned long val
, idx
, bit
;
1894 start
= sector_num
/ BDRV_SECTORS_PER_DIRTY_CHUNK
;
1895 end
= (sector_num
+ nb_sectors
- 1) / BDRV_SECTORS_PER_DIRTY_CHUNK
;
1897 for (; start
<= end
; start
++) {
1898 idx
= start
/ BITS_PER_LONG
;
1899 bit
= start
% BITS_PER_LONG
;
1900 val
= bs
->dirty_bitmap
[idx
];
1902 if (!(val
& (1UL << bit
))) {
1907 if (val
& (1UL << bit
)) {
1909 val
&= ~(1UL << bit
);
1912 bs
->dirty_bitmap
[idx
] = val
;
1916 /* Return < 0 if error. Important errors are:
1917 -EIO generic I/O error (may happen for all errors)
1918 -ENOMEDIUM No media inserted.
1919 -EINVAL Invalid sector number or nb_sectors
1920 -EACCES Trying to write a read-only device
1922 int bdrv_write(BlockDriverState
*bs
, int64_t sector_num
,
1923 const uint8_t *buf
, int nb_sectors
)
1925 return bdrv_rw_co(bs
, sector_num
, (uint8_t *)buf
, nb_sectors
, true);
1928 int bdrv_pread(BlockDriverState
*bs
, int64_t offset
,
1929 void *buf
, int count1
)
1931 uint8_t tmp_buf
[BDRV_SECTOR_SIZE
];
1932 int len
, nb_sectors
, count
;
1937 /* first read to align to sector start */
1938 len
= (BDRV_SECTOR_SIZE
- offset
) & (BDRV_SECTOR_SIZE
- 1);
1941 sector_num
= offset
>> BDRV_SECTOR_BITS
;
1943 if ((ret
= bdrv_read(bs
, sector_num
, tmp_buf
, 1)) < 0)
1945 memcpy(buf
, tmp_buf
+ (offset
& (BDRV_SECTOR_SIZE
- 1)), len
);
1953 /* read the sectors "in place" */
1954 nb_sectors
= count
>> BDRV_SECTOR_BITS
;
1955 if (nb_sectors
> 0) {
1956 if ((ret
= bdrv_read(bs
, sector_num
, buf
, nb_sectors
)) < 0)
1958 sector_num
+= nb_sectors
;
1959 len
= nb_sectors
<< BDRV_SECTOR_BITS
;
1964 /* add data from the last sector */
1966 if ((ret
= bdrv_read(bs
, sector_num
, tmp_buf
, 1)) < 0)
1968 memcpy(buf
, tmp_buf
, count
);
1973 int bdrv_pwrite(BlockDriverState
*bs
, int64_t offset
,
1974 const void *buf
, int count1
)
1976 uint8_t tmp_buf
[BDRV_SECTOR_SIZE
];
1977 int len
, nb_sectors
, count
;
1982 /* first write to align to sector start */
1983 len
= (BDRV_SECTOR_SIZE
- offset
) & (BDRV_SECTOR_SIZE
- 1);
1986 sector_num
= offset
>> BDRV_SECTOR_BITS
;
1988 if ((ret
= bdrv_read(bs
, sector_num
, tmp_buf
, 1)) < 0)
1990 memcpy(tmp_buf
+ (offset
& (BDRV_SECTOR_SIZE
- 1)), buf
, len
);
1991 if ((ret
= bdrv_write(bs
, sector_num
, tmp_buf
, 1)) < 0)
2000 /* write the sectors "in place" */
2001 nb_sectors
= count
>> BDRV_SECTOR_BITS
;
2002 if (nb_sectors
> 0) {
2003 if ((ret
= bdrv_write(bs
, sector_num
, buf
, nb_sectors
)) < 0)
2005 sector_num
+= nb_sectors
;
2006 len
= nb_sectors
<< BDRV_SECTOR_BITS
;
2011 /* add data from the last sector */
2013 if ((ret
= bdrv_read(bs
, sector_num
, tmp_buf
, 1)) < 0)
2015 memcpy(tmp_buf
, buf
, count
);
2016 if ((ret
= bdrv_write(bs
, sector_num
, tmp_buf
, 1)) < 0)
2023 * Writes to the file and ensures that no writes are reordered across this
2024 * request (acts as a barrier)
2026 * Returns 0 on success, -errno in error cases.
2028 int bdrv_pwrite_sync(BlockDriverState
*bs
, int64_t offset
,
2029 const void *buf
, int count
)
2033 ret
= bdrv_pwrite(bs
, offset
, buf
, count
);
2038 /* No flush needed for cache modes that already do it */
2039 if (bs
->enable_write_cache
) {
2046 static int coroutine_fn
bdrv_co_do_copy_on_readv(BlockDriverState
*bs
,
2047 int64_t sector_num
, int nb_sectors
, QEMUIOVector
*qiov
)
2049 /* Perform I/O through a temporary buffer so that users who scribble over
2050 * their read buffer while the operation is in progress do not end up
2051 * modifying the image file. This is critical for zero-copy guest I/O
2052 * where anything might happen inside guest memory.
2054 void *bounce_buffer
;
2056 BlockDriver
*drv
= bs
->drv
;
2058 QEMUIOVector bounce_qiov
;
2059 int64_t cluster_sector_num
;
2060 int cluster_nb_sectors
;
2064 /* Cover entire cluster so no additional backing file I/O is required when
2065 * allocating cluster in the image file.
2067 round_to_clusters(bs
, sector_num
, nb_sectors
,
2068 &cluster_sector_num
, &cluster_nb_sectors
);
2070 trace_bdrv_co_do_copy_on_readv(bs
, sector_num
, nb_sectors
,
2071 cluster_sector_num
, cluster_nb_sectors
);
2073 iov
.iov_len
= cluster_nb_sectors
* BDRV_SECTOR_SIZE
;
2074 iov
.iov_base
= bounce_buffer
= qemu_blockalign(bs
, iov
.iov_len
);
2075 qemu_iovec_init_external(&bounce_qiov
, &iov
, 1);
2077 ret
= drv
->bdrv_co_readv(bs
, cluster_sector_num
, cluster_nb_sectors
,
2083 if (drv
->bdrv_co_write_zeroes
&&
2084 buffer_is_zero(bounce_buffer
, iov
.iov_len
)) {
2085 ret
= bdrv_co_do_write_zeroes(bs
, cluster_sector_num
,
2086 cluster_nb_sectors
);
2088 /* This does not change the data on the disk, it is not necessary
2089 * to flush even in cache=writethrough mode.
2091 ret
= drv
->bdrv_co_writev(bs
, cluster_sector_num
, cluster_nb_sectors
,
2096 /* It might be okay to ignore write errors for guest requests. If this
2097 * is a deliberate copy-on-read then we don't want to ignore the error.
2098 * Simply report it in all cases.
2103 skip_bytes
= (sector_num
- cluster_sector_num
) * BDRV_SECTOR_SIZE
;
2104 qemu_iovec_from_buf(qiov
, 0, bounce_buffer
+ skip_bytes
,
2105 nb_sectors
* BDRV_SECTOR_SIZE
);
2108 qemu_vfree(bounce_buffer
);
2113 * Handle a read request in coroutine context
2115 static int coroutine_fn
bdrv_co_do_readv(BlockDriverState
*bs
,
2116 int64_t sector_num
, int nb_sectors
, QEMUIOVector
*qiov
,
2117 BdrvRequestFlags flags
)
2119 BlockDriver
*drv
= bs
->drv
;
2120 BdrvTrackedRequest req
;
2126 if (bdrv_check_request(bs
, sector_num
, nb_sectors
)) {
2130 /* throttling disk read I/O */
2131 if (bs
->io_limits_enabled
) {
2132 bdrv_io_limits_intercept(bs
, false, nb_sectors
);
2135 if (bs
->copy_on_read
) {
2136 flags
|= BDRV_REQ_COPY_ON_READ
;
2138 if (flags
& BDRV_REQ_COPY_ON_READ
) {
2139 bs
->copy_on_read_in_flight
++;
2142 if (bs
->copy_on_read_in_flight
) {
2143 wait_for_overlapping_requests(bs
, sector_num
, nb_sectors
);
2146 tracked_request_begin(&req
, bs
, sector_num
, nb_sectors
, false);
2148 if (flags
& BDRV_REQ_COPY_ON_READ
) {
2151 ret
= bdrv_co_is_allocated(bs
, sector_num
, nb_sectors
, &pnum
);
2156 if (!ret
|| pnum
!= nb_sectors
) {
2157 ret
= bdrv_co_do_copy_on_readv(bs
, sector_num
, nb_sectors
, qiov
);
2162 ret
= drv
->bdrv_co_readv(bs
, sector_num
, nb_sectors
, qiov
);
2165 tracked_request_end(&req
);
2167 if (flags
& BDRV_REQ_COPY_ON_READ
) {
2168 bs
->copy_on_read_in_flight
--;
2174 int coroutine_fn
bdrv_co_readv(BlockDriverState
*bs
, int64_t sector_num
,
2175 int nb_sectors
, QEMUIOVector
*qiov
)
2177 trace_bdrv_co_readv(bs
, sector_num
, nb_sectors
);
2179 return bdrv_co_do_readv(bs
, sector_num
, nb_sectors
, qiov
, 0);
2182 int coroutine_fn
bdrv_co_copy_on_readv(BlockDriverState
*bs
,
2183 int64_t sector_num
, int nb_sectors
, QEMUIOVector
*qiov
)
2185 trace_bdrv_co_copy_on_readv(bs
, sector_num
, nb_sectors
);
2187 return bdrv_co_do_readv(bs
, sector_num
, nb_sectors
, qiov
,
2188 BDRV_REQ_COPY_ON_READ
);
2191 static int coroutine_fn
bdrv_co_do_write_zeroes(BlockDriverState
*bs
,
2192 int64_t sector_num
, int nb_sectors
)
2194 BlockDriver
*drv
= bs
->drv
;
2199 /* TODO Emulate only part of misaligned requests instead of letting block
2200 * drivers return -ENOTSUP and emulate everything */
2202 /* First try the efficient write zeroes operation */
2203 if (drv
->bdrv_co_write_zeroes
) {
2204 ret
= drv
->bdrv_co_write_zeroes(bs
, sector_num
, nb_sectors
);
2205 if (ret
!= -ENOTSUP
) {
2210 /* Fall back to bounce buffer if write zeroes is unsupported */
2211 iov
.iov_len
= nb_sectors
* BDRV_SECTOR_SIZE
;
2212 iov
.iov_base
= qemu_blockalign(bs
, iov
.iov_len
);
2213 memset(iov
.iov_base
, 0, iov
.iov_len
);
2214 qemu_iovec_init_external(&qiov
, &iov
, 1);
2216 ret
= drv
->bdrv_co_writev(bs
, sector_num
, nb_sectors
, &qiov
);
2218 qemu_vfree(iov
.iov_base
);
2223 * Handle a write request in coroutine context
2225 static int coroutine_fn
bdrv_co_do_writev(BlockDriverState
*bs
,
2226 int64_t sector_num
, int nb_sectors
, QEMUIOVector
*qiov
,
2227 BdrvRequestFlags flags
)
2229 BlockDriver
*drv
= bs
->drv
;
2230 BdrvTrackedRequest req
;
2236 if (bs
->read_only
) {
2239 if (bdrv_check_request(bs
, sector_num
, nb_sectors
)) {
2243 /* throttling disk write I/O */
2244 if (bs
->io_limits_enabled
) {
2245 bdrv_io_limits_intercept(bs
, true, nb_sectors
);
2248 if (bs
->copy_on_read_in_flight
) {
2249 wait_for_overlapping_requests(bs
, sector_num
, nb_sectors
);
2252 tracked_request_begin(&req
, bs
, sector_num
, nb_sectors
, true);
2254 if (flags
& BDRV_REQ_ZERO_WRITE
) {
2255 ret
= bdrv_co_do_write_zeroes(bs
, sector_num
, nb_sectors
);
2257 ret
= drv
->bdrv_co_writev(bs
, sector_num
, nb_sectors
, qiov
);
2260 if (ret
== 0 && !bs
->enable_write_cache
) {
2261 ret
= bdrv_co_flush(bs
);
2264 if (bs
->dirty_bitmap
) {
2265 set_dirty_bitmap(bs
, sector_num
, nb_sectors
, 1);
2268 if (bs
->wr_highest_sector
< sector_num
+ nb_sectors
- 1) {
2269 bs
->wr_highest_sector
= sector_num
+ nb_sectors
- 1;
2272 tracked_request_end(&req
);
2277 int coroutine_fn
bdrv_co_writev(BlockDriverState
*bs
, int64_t sector_num
,
2278 int nb_sectors
, QEMUIOVector
*qiov
)
2280 trace_bdrv_co_writev(bs
, sector_num
, nb_sectors
);
2282 return bdrv_co_do_writev(bs
, sector_num
, nb_sectors
, qiov
, 0);
2285 int coroutine_fn
bdrv_co_write_zeroes(BlockDriverState
*bs
,
2286 int64_t sector_num
, int nb_sectors
)
2288 trace_bdrv_co_write_zeroes(bs
, sector_num
, nb_sectors
);
2290 return bdrv_co_do_writev(bs
, sector_num
, nb_sectors
, NULL
,
2291 BDRV_REQ_ZERO_WRITE
);
2295 * Truncate file to 'offset' bytes (needed only for file protocols)
2297 int bdrv_truncate(BlockDriverState
*bs
, int64_t offset
)
2299 BlockDriver
*drv
= bs
->drv
;
2303 if (!drv
->bdrv_truncate
)
2307 if (bdrv_in_use(bs
))
2309 ret
= drv
->bdrv_truncate(bs
, offset
);
2311 ret
= refresh_total_sectors(bs
, offset
>> BDRV_SECTOR_BITS
);
2312 bdrv_dev_resize_cb(bs
);
2318 * Length of a allocated file in bytes. Sparse files are counted by actual
2319 * allocated space. Return < 0 if error or unknown.
2321 int64_t bdrv_get_allocated_file_size(BlockDriverState
*bs
)
2323 BlockDriver
*drv
= bs
->drv
;
2327 if (drv
->bdrv_get_allocated_file_size
) {
2328 return drv
->bdrv_get_allocated_file_size(bs
);
2331 return bdrv_get_allocated_file_size(bs
->file
);
2337 * Length of a file in bytes. Return < 0 if error or unknown.
2339 int64_t bdrv_getlength(BlockDriverState
*bs
)
2341 BlockDriver
*drv
= bs
->drv
;
2345 if (bs
->growable
|| bdrv_dev_has_removable_media(bs
)) {
2346 if (drv
->bdrv_getlength
) {
2347 return drv
->bdrv_getlength(bs
);
2350 return bs
->total_sectors
* BDRV_SECTOR_SIZE
;
2353 /* return 0 as number of sectors if no device present or error */
2354 void bdrv_get_geometry(BlockDriverState
*bs
, uint64_t *nb_sectors_ptr
)
2357 length
= bdrv_getlength(bs
);
2361 length
= length
>> BDRV_SECTOR_BITS
;
2362 *nb_sectors_ptr
= length
;
2365 /* throttling disk io limits */
2366 void bdrv_set_io_limits(BlockDriverState
*bs
,
2367 BlockIOLimit
*io_limits
)
2369 bs
->io_limits
= *io_limits
;
2370 bs
->io_limits_enabled
= bdrv_io_limits_enabled(bs
);
2373 void bdrv_set_on_error(BlockDriverState
*bs
, BlockErrorAction on_read_error
,
2374 BlockErrorAction on_write_error
)
2376 bs
->on_read_error
= on_read_error
;
2377 bs
->on_write_error
= on_write_error
;
2380 BlockErrorAction
bdrv_get_on_error(BlockDriverState
*bs
, int is_read
)
2382 return is_read
? bs
->on_read_error
: bs
->on_write_error
;
2385 int bdrv_is_read_only(BlockDriverState
*bs
)
2387 return bs
->read_only
;
2390 int bdrv_is_sg(BlockDriverState
*bs
)
2395 int bdrv_enable_write_cache(BlockDriverState
*bs
)
2397 return bs
->enable_write_cache
;
2400 void bdrv_set_enable_write_cache(BlockDriverState
*bs
, bool wce
)
2402 bs
->enable_write_cache
= wce
;
2404 /* so a reopen() will preserve wce */
2406 bs
->open_flags
|= BDRV_O_CACHE_WB
;
2408 bs
->open_flags
&= ~BDRV_O_CACHE_WB
;
2412 int bdrv_is_encrypted(BlockDriverState
*bs
)
2414 if (bs
->backing_hd
&& bs
->backing_hd
->encrypted
)
2416 return bs
->encrypted
;
2419 int bdrv_key_required(BlockDriverState
*bs
)
2421 BlockDriverState
*backing_hd
= bs
->backing_hd
;
2423 if (backing_hd
&& backing_hd
->encrypted
&& !backing_hd
->valid_key
)
2425 return (bs
->encrypted
&& !bs
->valid_key
);
2428 int bdrv_set_key(BlockDriverState
*bs
, const char *key
)
2431 if (bs
->backing_hd
&& bs
->backing_hd
->encrypted
) {
2432 ret
= bdrv_set_key(bs
->backing_hd
, key
);
2438 if (!bs
->encrypted
) {
2440 } else if (!bs
->drv
|| !bs
->drv
->bdrv_set_key
) {
2443 ret
= bs
->drv
->bdrv_set_key(bs
, key
);
2446 } else if (!bs
->valid_key
) {
2448 /* call the change callback now, we skipped it on open */
2449 bdrv_dev_change_media_cb(bs
, true);
2454 const char *bdrv_get_format_name(BlockDriverState
*bs
)
2456 return bs
->drv
? bs
->drv
->format_name
: NULL
;
2459 void bdrv_iterate_format(void (*it
)(void *opaque
, const char *name
),
2464 QLIST_FOREACH(drv
, &bdrv_drivers
, list
) {
2465 it(opaque
, drv
->format_name
);
2469 BlockDriverState
*bdrv_find(const char *name
)
2471 BlockDriverState
*bs
;
2473 QTAILQ_FOREACH(bs
, &bdrv_states
, list
) {
2474 if (!strcmp(name
, bs
->device_name
)) {
2481 BlockDriverState
*bdrv_next(BlockDriverState
*bs
)
2484 return QTAILQ_FIRST(&bdrv_states
);
2486 return QTAILQ_NEXT(bs
, list
);
2489 void bdrv_iterate(void (*it
)(void *opaque
, BlockDriverState
*bs
), void *opaque
)
2491 BlockDriverState
*bs
;
2493 QTAILQ_FOREACH(bs
, &bdrv_states
, list
) {
2498 const char *bdrv_get_device_name(BlockDriverState
*bs
)
2500 return bs
->device_name
;
2503 int bdrv_get_flags(BlockDriverState
*bs
)
2505 return bs
->open_flags
;
2508 void bdrv_flush_all(void)
2510 BlockDriverState
*bs
;
2512 QTAILQ_FOREACH(bs
, &bdrv_states
, list
) {
2517 int bdrv_has_zero_init(BlockDriverState
*bs
)
2521 if (bs
->drv
->bdrv_has_zero_init
) {
2522 return bs
->drv
->bdrv_has_zero_init(bs
);
2528 typedef struct BdrvCoIsAllocatedData
{
2529 BlockDriverState
*bs
;
2535 } BdrvCoIsAllocatedData
;
2538 * Returns true iff the specified sector is present in the disk image. Drivers
2539 * not implementing the functionality are assumed to not support backing files,
2540 * hence all their sectors are reported as allocated.
2542 * If 'sector_num' is beyond the end of the disk image the return value is 0
2543 * and 'pnum' is set to 0.
2545 * 'pnum' is set to the number of sectors (including and immediately following
2546 * the specified sector) that are known to be in the same
2547 * allocated/unallocated state.
2549 * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
2550 * beyond the end of the disk image it will be clamped.
2552 int coroutine_fn
bdrv_co_is_allocated(BlockDriverState
*bs
, int64_t sector_num
,
2553 int nb_sectors
, int *pnum
)
2557 if (sector_num
>= bs
->total_sectors
) {
2562 n
= bs
->total_sectors
- sector_num
;
2563 if (n
< nb_sectors
) {
2567 if (!bs
->drv
->bdrv_co_is_allocated
) {
2572 return bs
->drv
->bdrv_co_is_allocated(bs
, sector_num
, nb_sectors
, pnum
);
2575 /* Coroutine wrapper for bdrv_is_allocated() */
2576 static void coroutine_fn
bdrv_is_allocated_co_entry(void *opaque
)
2578 BdrvCoIsAllocatedData
*data
= opaque
;
2579 BlockDriverState
*bs
= data
->bs
;
2581 data
->ret
= bdrv_co_is_allocated(bs
, data
->sector_num
, data
->nb_sectors
,
2587 * Synchronous wrapper around bdrv_co_is_allocated().
2589 * See bdrv_co_is_allocated() for details.
2591 int bdrv_is_allocated(BlockDriverState
*bs
, int64_t sector_num
, int nb_sectors
,
2595 BdrvCoIsAllocatedData data
= {
2597 .sector_num
= sector_num
,
2598 .nb_sectors
= nb_sectors
,
2603 co
= qemu_coroutine_create(bdrv_is_allocated_co_entry
);
2604 qemu_coroutine_enter(co
, &data
);
2605 while (!data
.done
) {
2612 * Given an image chain: ... -> [BASE] -> [INTER1] -> [INTER2] -> [TOP]
2614 * Return true if the given sector is allocated in any image between
2615 * BASE and TOP (inclusive). BASE can be NULL to check if the given
2616 * sector is allocated in any image of the chain. Return false otherwise.
2618 * 'pnum' is set to the number of sectors (including and immediately following
2619 * the specified sector) that are known to be in the same
2620 * allocated/unallocated state.
2623 int coroutine_fn
bdrv_co_is_allocated_above(BlockDriverState
*top
,
2624 BlockDriverState
*base
,
2626 int nb_sectors
, int *pnum
)
2628 BlockDriverState
*intermediate
;
2629 int ret
, n
= nb_sectors
;
2632 while (intermediate
&& intermediate
!= base
) {
2634 ret
= bdrv_co_is_allocated(intermediate
, sector_num
, nb_sectors
,
2644 * [sector_num, nb_sectors] is unallocated on top but intermediate
2647 * [sector_num+x, nr_sectors] allocated.
2649 if (n
> pnum_inter
) {
2653 intermediate
= intermediate
->backing_hd
;
2660 BlockInfoList
*qmp_query_block(Error
**errp
)
2662 BlockInfoList
*head
= NULL
, *cur_item
= NULL
;
2663 BlockDriverState
*bs
;
2665 QTAILQ_FOREACH(bs
, &bdrv_states
, list
) {
2666 BlockInfoList
*info
= g_malloc0(sizeof(*info
));
2668 info
->value
= g_malloc0(sizeof(*info
->value
));
2669 info
->value
->device
= g_strdup(bs
->device_name
);
2670 info
->value
->type
= g_strdup("unknown");
2671 info
->value
->locked
= bdrv_dev_is_medium_locked(bs
);
2672 info
->value
->removable
= bdrv_dev_has_removable_media(bs
);
2674 if (bdrv_dev_has_removable_media(bs
)) {
2675 info
->value
->has_tray_open
= true;
2676 info
->value
->tray_open
= bdrv_dev_is_tray_open(bs
);
2679 if (bdrv_iostatus_is_enabled(bs
)) {
2680 info
->value
->has_io_status
= true;
2681 info
->value
->io_status
= bs
->iostatus
;
2685 info
->value
->has_inserted
= true;
2686 info
->value
->inserted
= g_malloc0(sizeof(*info
->value
->inserted
));
2687 info
->value
->inserted
->file
= g_strdup(bs
->filename
);
2688 info
->value
->inserted
->ro
= bs
->read_only
;
2689 info
->value
->inserted
->drv
= g_strdup(bs
->drv
->format_name
);
2690 info
->value
->inserted
->encrypted
= bs
->encrypted
;
2691 info
->value
->inserted
->encryption_key_missing
= bdrv_key_required(bs
);
2692 if (bs
->backing_file
[0]) {
2693 info
->value
->inserted
->has_backing_file
= true;
2694 info
->value
->inserted
->backing_file
= g_strdup(bs
->backing_file
);
2697 info
->value
->inserted
->backing_file_depth
=
2698 bdrv_get_backing_file_depth(bs
);
2700 if (bs
->io_limits_enabled
) {
2701 info
->value
->inserted
->bps
=
2702 bs
->io_limits
.bps
[BLOCK_IO_LIMIT_TOTAL
];
2703 info
->value
->inserted
->bps_rd
=
2704 bs
->io_limits
.bps
[BLOCK_IO_LIMIT_READ
];
2705 info
->value
->inserted
->bps_wr
=
2706 bs
->io_limits
.bps
[BLOCK_IO_LIMIT_WRITE
];
2707 info
->value
->inserted
->iops
=
2708 bs
->io_limits
.iops
[BLOCK_IO_LIMIT_TOTAL
];
2709 info
->value
->inserted
->iops_rd
=
2710 bs
->io_limits
.iops
[BLOCK_IO_LIMIT_READ
];
2711 info
->value
->inserted
->iops_wr
=
2712 bs
->io_limits
.iops
[BLOCK_IO_LIMIT_WRITE
];
2716 /* XXX: waiting for the qapi to support GSList */
2718 head
= cur_item
= info
;
2720 cur_item
->next
= info
;
2728 /* Consider exposing this as a full fledged QMP command */
2729 static BlockStats
*qmp_query_blockstat(const BlockDriverState
*bs
, Error
**errp
)
2733 s
= g_malloc0(sizeof(*s
));
2735 if (bs
->device_name
[0]) {
2736 s
->has_device
= true;
2737 s
->device
= g_strdup(bs
->device_name
);
2740 s
->stats
= g_malloc0(sizeof(*s
->stats
));
2741 s
->stats
->rd_bytes
= bs
->nr_bytes
[BDRV_ACCT_READ
];
2742 s
->stats
->wr_bytes
= bs
->nr_bytes
[BDRV_ACCT_WRITE
];
2743 s
->stats
->rd_operations
= bs
->nr_ops
[BDRV_ACCT_READ
];
2744 s
->stats
->wr_operations
= bs
->nr_ops
[BDRV_ACCT_WRITE
];
2745 s
->stats
->wr_highest_offset
= bs
->wr_highest_sector
* BDRV_SECTOR_SIZE
;
2746 s
->stats
->flush_operations
= bs
->nr_ops
[BDRV_ACCT_FLUSH
];
2747 s
->stats
->wr_total_time_ns
= bs
->total_time_ns
[BDRV_ACCT_WRITE
];
2748 s
->stats
->rd_total_time_ns
= bs
->total_time_ns
[BDRV_ACCT_READ
];
2749 s
->stats
->flush_total_time_ns
= bs
->total_time_ns
[BDRV_ACCT_FLUSH
];
2752 s
->has_parent
= true;
2753 s
->parent
= qmp_query_blockstat(bs
->file
, NULL
);
2759 BlockStatsList
*qmp_query_blockstats(Error
**errp
)
2761 BlockStatsList
*head
= NULL
, *cur_item
= NULL
;
2762 BlockDriverState
*bs
;
2764 QTAILQ_FOREACH(bs
, &bdrv_states
, list
) {
2765 BlockStatsList
*info
= g_malloc0(sizeof(*info
));
2766 info
->value
= qmp_query_blockstat(bs
, NULL
);
2768 /* XXX: waiting for the qapi to support GSList */
2770 head
= cur_item
= info
;
2772 cur_item
->next
= info
;
2780 const char *bdrv_get_encrypted_filename(BlockDriverState
*bs
)
2782 if (bs
->backing_hd
&& bs
->backing_hd
->encrypted
)
2783 return bs
->backing_file
;
2784 else if (bs
->encrypted
)
2785 return bs
->filename
;
2790 void bdrv_get_backing_filename(BlockDriverState
*bs
,
2791 char *filename
, int filename_size
)
2793 pstrcpy(filename
, filename_size
, bs
->backing_file
);
2796 int bdrv_write_compressed(BlockDriverState
*bs
, int64_t sector_num
,
2797 const uint8_t *buf
, int nb_sectors
)
2799 BlockDriver
*drv
= bs
->drv
;
2802 if (!drv
->bdrv_write_compressed
)
2804 if (bdrv_check_request(bs
, sector_num
, nb_sectors
))
2807 if (bs
->dirty_bitmap
) {
2808 set_dirty_bitmap(bs
, sector_num
, nb_sectors
, 1);
2811 return drv
->bdrv_write_compressed(bs
, sector_num
, buf
, nb_sectors
);
2814 int bdrv_get_info(BlockDriverState
*bs
, BlockDriverInfo
*bdi
)
2816 BlockDriver
*drv
= bs
->drv
;
2819 if (!drv
->bdrv_get_info
)
2821 memset(bdi
, 0, sizeof(*bdi
));
2822 return drv
->bdrv_get_info(bs
, bdi
);
2825 int bdrv_save_vmstate(BlockDriverState
*bs
, const uint8_t *buf
,
2826 int64_t pos
, int size
)
2828 BlockDriver
*drv
= bs
->drv
;
2831 if (drv
->bdrv_save_vmstate
)
2832 return drv
->bdrv_save_vmstate(bs
, buf
, pos
, size
);
2834 return bdrv_save_vmstate(bs
->file
, buf
, pos
, size
);
2838 int bdrv_load_vmstate(BlockDriverState
*bs
, uint8_t *buf
,
2839 int64_t pos
, int size
)
2841 BlockDriver
*drv
= bs
->drv
;
2844 if (drv
->bdrv_load_vmstate
)
2845 return drv
->bdrv_load_vmstate(bs
, buf
, pos
, size
);
2847 return bdrv_load_vmstate(bs
->file
, buf
, pos
, size
);
2851 void bdrv_debug_event(BlockDriverState
*bs
, BlkDebugEvent event
)
2853 BlockDriver
*drv
= bs
->drv
;
2855 if (!drv
|| !drv
->bdrv_debug_event
) {
2859 drv
->bdrv_debug_event(bs
, event
);
2863 /**************************************************************/
2864 /* handling of snapshots */
2866 int bdrv_can_snapshot(BlockDriverState
*bs
)
2868 BlockDriver
*drv
= bs
->drv
;
2869 if (!drv
|| !bdrv_is_inserted(bs
) || bdrv_is_read_only(bs
)) {
2873 if (!drv
->bdrv_snapshot_create
) {
2874 if (bs
->file
!= NULL
) {
2875 return bdrv_can_snapshot(bs
->file
);
2883 int bdrv_is_snapshot(BlockDriverState
*bs
)
2885 return !!(bs
->open_flags
& BDRV_O_SNAPSHOT
);
2888 BlockDriverState
*bdrv_snapshots(void)
2890 BlockDriverState
*bs
;
2893 return bs_snapshots
;
2897 while ((bs
= bdrv_next(bs
))) {
2898 if (bdrv_can_snapshot(bs
)) {
2906 int bdrv_snapshot_create(BlockDriverState
*bs
,
2907 QEMUSnapshotInfo
*sn_info
)
2909 BlockDriver
*drv
= bs
->drv
;
2912 if (drv
->bdrv_snapshot_create
)
2913 return drv
->bdrv_snapshot_create(bs
, sn_info
);
2915 return bdrv_snapshot_create(bs
->file
, sn_info
);
2919 int bdrv_snapshot_goto(BlockDriverState
*bs
,
2920 const char *snapshot_id
)
2922 BlockDriver
*drv
= bs
->drv
;
2927 if (drv
->bdrv_snapshot_goto
)
2928 return drv
->bdrv_snapshot_goto(bs
, snapshot_id
);
2931 drv
->bdrv_close(bs
);
2932 ret
= bdrv_snapshot_goto(bs
->file
, snapshot_id
);
2933 open_ret
= drv
->bdrv_open(bs
, bs
->open_flags
);
2935 bdrv_delete(bs
->file
);
2945 int bdrv_snapshot_delete(BlockDriverState
*bs
, const char *snapshot_id
)
2947 BlockDriver
*drv
= bs
->drv
;
2950 if (drv
->bdrv_snapshot_delete
)
2951 return drv
->bdrv_snapshot_delete(bs
, snapshot_id
);
2953 return bdrv_snapshot_delete(bs
->file
, snapshot_id
);
2957 int bdrv_snapshot_list(BlockDriverState
*bs
,
2958 QEMUSnapshotInfo
**psn_info
)
2960 BlockDriver
*drv
= bs
->drv
;
2963 if (drv
->bdrv_snapshot_list
)
2964 return drv
->bdrv_snapshot_list(bs
, psn_info
);
2966 return bdrv_snapshot_list(bs
->file
, psn_info
);
2970 int bdrv_snapshot_load_tmp(BlockDriverState
*bs
,
2971 const char *snapshot_name
)
2973 BlockDriver
*drv
= bs
->drv
;
2977 if (!bs
->read_only
) {
2980 if (drv
->bdrv_snapshot_load_tmp
) {
2981 return drv
->bdrv_snapshot_load_tmp(bs
, snapshot_name
);
2986 BlockDriverState
*bdrv_find_backing_image(BlockDriverState
*bs
,
2987 const char *backing_file
)
2993 if (bs
->backing_hd
) {
2994 if (strcmp(bs
->backing_file
, backing_file
) == 0) {
2995 return bs
->backing_hd
;
2997 return bdrv_find_backing_image(bs
->backing_hd
, backing_file
);
3004 int bdrv_get_backing_file_depth(BlockDriverState
*bs
)
3010 if (!bs
->backing_hd
) {
3014 return 1 + bdrv_get_backing_file_depth(bs
->backing_hd
);
3017 #define NB_SUFFIXES 4
3019 char *get_human_readable_size(char *buf
, int buf_size
, int64_t size
)
3021 static const char suffixes
[NB_SUFFIXES
] = "KMGT";
3026 snprintf(buf
, buf_size
, "%" PRId64
, size
);
3029 for(i
= 0; i
< NB_SUFFIXES
; i
++) {
3030 if (size
< (10 * base
)) {
3031 snprintf(buf
, buf_size
, "%0.1f%c",
3032 (double)size
/ base
,
3035 } else if (size
< (1000 * base
) || i
== (NB_SUFFIXES
- 1)) {
3036 snprintf(buf
, buf_size
, "%" PRId64
"%c",
3037 ((size
+ (base
>> 1)) / base
),
3047 char *bdrv_snapshot_dump(char *buf
, int buf_size
, QEMUSnapshotInfo
*sn
)
3049 char buf1
[128], date_buf
[128], clock_buf
[128];
3059 snprintf(buf
, buf_size
,
3060 "%-10s%-20s%7s%20s%15s",
3061 "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
3065 ptm
= localtime(&ti
);
3066 strftime(date_buf
, sizeof(date_buf
),
3067 "%Y-%m-%d %H:%M:%S", ptm
);
3069 localtime_r(&ti
, &tm
);
3070 strftime(date_buf
, sizeof(date_buf
),
3071 "%Y-%m-%d %H:%M:%S", &tm
);
3073 secs
= sn
->vm_clock_nsec
/ 1000000000;
3074 snprintf(clock_buf
, sizeof(clock_buf
),
3075 "%02d:%02d:%02d.%03d",
3077 (int)((secs
/ 60) % 60),
3079 (int)((sn
->vm_clock_nsec
/ 1000000) % 1000));
3080 snprintf(buf
, buf_size
,
3081 "%-10s%-20s%7s%20s%15s",
3082 sn
->id_str
, sn
->name
,
3083 get_human_readable_size(buf1
, sizeof(buf1
), sn
->vm_state_size
),
3090 /**************************************************************/
3093 BlockDriverAIOCB
*bdrv_aio_readv(BlockDriverState
*bs
, int64_t sector_num
,
3094 QEMUIOVector
*qiov
, int nb_sectors
,
3095 BlockDriverCompletionFunc
*cb
, void *opaque
)
3097 trace_bdrv_aio_readv(bs
, sector_num
, nb_sectors
, opaque
);
3099 return bdrv_co_aio_rw_vector(bs
, sector_num
, qiov
, nb_sectors
,
3103 BlockDriverAIOCB
*bdrv_aio_writev(BlockDriverState
*bs
, int64_t sector_num
,
3104 QEMUIOVector
*qiov
, int nb_sectors
,
3105 BlockDriverCompletionFunc
*cb
, void *opaque
)
3107 trace_bdrv_aio_writev(bs
, sector_num
, nb_sectors
, opaque
);
3109 return bdrv_co_aio_rw_vector(bs
, sector_num
, qiov
, nb_sectors
,
3114 typedef struct MultiwriteCB
{
3119 BlockDriverCompletionFunc
*cb
;
3121 QEMUIOVector
*free_qiov
;
3125 static void multiwrite_user_cb(MultiwriteCB
*mcb
)
3129 for (i
= 0; i
< mcb
->num_callbacks
; i
++) {
3130 mcb
->callbacks
[i
].cb(mcb
->callbacks
[i
].opaque
, mcb
->error
);
3131 if (mcb
->callbacks
[i
].free_qiov
) {
3132 qemu_iovec_destroy(mcb
->callbacks
[i
].free_qiov
);
3134 g_free(mcb
->callbacks
[i
].free_qiov
);
3138 static void multiwrite_cb(void *opaque
, int ret
)
3140 MultiwriteCB
*mcb
= opaque
;
3142 trace_multiwrite_cb(mcb
, ret
);
3144 if (ret
< 0 && !mcb
->error
) {
3148 mcb
->num_requests
--;
3149 if (mcb
->num_requests
== 0) {
3150 multiwrite_user_cb(mcb
);
3155 static int multiwrite_req_compare(const void *a
, const void *b
)
3157 const BlockRequest
*req1
= a
, *req2
= b
;
3160 * Note that we can't simply subtract req2->sector from req1->sector
3161 * here as that could overflow the return value.
3163 if (req1
->sector
> req2
->sector
) {
3165 } else if (req1
->sector
< req2
->sector
) {
3173 * Takes a bunch of requests and tries to merge them. Returns the number of
3174 * requests that remain after merging.
3176 static int multiwrite_merge(BlockDriverState
*bs
, BlockRequest
*reqs
,
3177 int num_reqs
, MultiwriteCB
*mcb
)
3181 // Sort requests by start sector
3182 qsort(reqs
, num_reqs
, sizeof(*reqs
), &multiwrite_req_compare
);
3184 // Check if adjacent requests touch the same clusters. If so, combine them,
3185 // filling up gaps with zero sectors.
3187 for (i
= 1; i
< num_reqs
; i
++) {
3189 int64_t oldreq_last
= reqs
[outidx
].sector
+ reqs
[outidx
].nb_sectors
;
3191 // Handle exactly sequential writes and overlapping writes.
3192 if (reqs
[i
].sector
<= oldreq_last
) {
3196 if (reqs
[outidx
].qiov
->niov
+ reqs
[i
].qiov
->niov
+ 1 > IOV_MAX
) {
3202 QEMUIOVector
*qiov
= g_malloc0(sizeof(*qiov
));
3203 qemu_iovec_init(qiov
,
3204 reqs
[outidx
].qiov
->niov
+ reqs
[i
].qiov
->niov
+ 1);
3206 // Add the first request to the merged one. If the requests are
3207 // overlapping, drop the last sectors of the first request.
3208 size
= (reqs
[i
].sector
- reqs
[outidx
].sector
) << 9;
3209 qemu_iovec_concat(qiov
, reqs
[outidx
].qiov
, 0, size
);
3211 // We should need to add any zeros between the two requests
3212 assert (reqs
[i
].sector
<= oldreq_last
);
3214 // Add the second request
3215 qemu_iovec_concat(qiov
, reqs
[i
].qiov
, 0, reqs
[i
].qiov
->size
);
3217 reqs
[outidx
].nb_sectors
= qiov
->size
>> 9;
3218 reqs
[outidx
].qiov
= qiov
;
3220 mcb
->callbacks
[i
].free_qiov
= reqs
[outidx
].qiov
;
3223 reqs
[outidx
].sector
= reqs
[i
].sector
;
3224 reqs
[outidx
].nb_sectors
= reqs
[i
].nb_sectors
;
3225 reqs
[outidx
].qiov
= reqs
[i
].qiov
;
3233 * Submit multiple AIO write requests at once.
3235 * On success, the function returns 0 and all requests in the reqs array have
3236 * been submitted. In error case this function returns -1, and any of the
3237 * requests may or may not be submitted yet. In particular, this means that the
3238 * callback will be called for some of the requests, for others it won't. The
3239 * caller must check the error field of the BlockRequest to wait for the right
3240 * callbacks (if error != 0, no callback will be called).
3242 * The implementation may modify the contents of the reqs array, e.g. to merge
3243 * requests. However, the fields opaque and error are left unmodified as they
3244 * are used to signal failure for a single request to the caller.
3246 int bdrv_aio_multiwrite(BlockDriverState
*bs
, BlockRequest
*reqs
, int num_reqs
)
3251 /* don't submit writes if we don't have a medium */
3252 if (bs
->drv
== NULL
) {
3253 for (i
= 0; i
< num_reqs
; i
++) {
3254 reqs
[i
].error
= -ENOMEDIUM
;
3259 if (num_reqs
== 0) {
3263 // Create MultiwriteCB structure
3264 mcb
= g_malloc0(sizeof(*mcb
) + num_reqs
* sizeof(*mcb
->callbacks
));
3265 mcb
->num_requests
= 0;
3266 mcb
->num_callbacks
= num_reqs
;
3268 for (i
= 0; i
< num_reqs
; i
++) {
3269 mcb
->callbacks
[i
].cb
= reqs
[i
].cb
;
3270 mcb
->callbacks
[i
].opaque
= reqs
[i
].opaque
;
3273 // Check for mergable requests
3274 num_reqs
= multiwrite_merge(bs
, reqs
, num_reqs
, mcb
);
3276 trace_bdrv_aio_multiwrite(mcb
, mcb
->num_callbacks
, num_reqs
);
3278 /* Run the aio requests. */
3279 mcb
->num_requests
= num_reqs
;
3280 for (i
= 0; i
< num_reqs
; i
++) {
3281 bdrv_aio_writev(bs
, reqs
[i
].sector
, reqs
[i
].qiov
,
3282 reqs
[i
].nb_sectors
, multiwrite_cb
, mcb
);
3288 void bdrv_aio_cancel(BlockDriverAIOCB
*acb
)
3290 acb
->pool
->cancel(acb
);
3293 /* block I/O throttling */
3294 static bool bdrv_exceed_bps_limits(BlockDriverState
*bs
, int nb_sectors
,
3295 bool is_write
, double elapsed_time
, uint64_t *wait
)
3297 uint64_t bps_limit
= 0;
3298 double bytes_limit
, bytes_base
, bytes_res
;
3299 double slice_time
, wait_time
;
3301 if (bs
->io_limits
.bps
[BLOCK_IO_LIMIT_TOTAL
]) {
3302 bps_limit
= bs
->io_limits
.bps
[BLOCK_IO_LIMIT_TOTAL
];
3303 } else if (bs
->io_limits
.bps
[is_write
]) {
3304 bps_limit
= bs
->io_limits
.bps
[is_write
];
3313 slice_time
= bs
->slice_end
- bs
->slice_start
;
3314 slice_time
/= (NANOSECONDS_PER_SECOND
);
3315 bytes_limit
= bps_limit
* slice_time
;
3316 bytes_base
= bs
->nr_bytes
[is_write
] - bs
->io_base
.bytes
[is_write
];
3317 if (bs
->io_limits
.bps
[BLOCK_IO_LIMIT_TOTAL
]) {
3318 bytes_base
+= bs
->nr_bytes
[!is_write
] - bs
->io_base
.bytes
[!is_write
];
3321 /* bytes_base: the bytes of data which have been read/written; and
3322 * it is obtained from the history statistic info.
3323 * bytes_res: the remaining bytes of data which need to be read/written.
3324 * (bytes_base + bytes_res) / bps_limit: used to calcuate
3325 * the total time for completing reading/writting all data.
3327 bytes_res
= (unsigned) nb_sectors
* BDRV_SECTOR_SIZE
;
3329 if (bytes_base
+ bytes_res
<= bytes_limit
) {
3337 /* Calc approx time to dispatch */
3338 wait_time
= (bytes_base
+ bytes_res
) / bps_limit
- elapsed_time
;
3340 /* When the I/O rate at runtime exceeds the limits,
3341 * bs->slice_end need to be extended in order that the current statistic
3342 * info can be kept until the timer fire, so it is increased and tuned
3343 * based on the result of experiment.
3345 bs
->slice_time
= wait_time
* BLOCK_IO_SLICE_TIME
* 10;
3346 bs
->slice_end
+= bs
->slice_time
- 3 * BLOCK_IO_SLICE_TIME
;
3348 *wait
= wait_time
* BLOCK_IO_SLICE_TIME
* 10;
3354 static bool bdrv_exceed_iops_limits(BlockDriverState
*bs
, bool is_write
,
3355 double elapsed_time
, uint64_t *wait
)
3357 uint64_t iops_limit
= 0;
3358 double ios_limit
, ios_base
;
3359 double slice_time
, wait_time
;
3361 if (bs
->io_limits
.iops
[BLOCK_IO_LIMIT_TOTAL
]) {
3362 iops_limit
= bs
->io_limits
.iops
[BLOCK_IO_LIMIT_TOTAL
];
3363 } else if (bs
->io_limits
.iops
[is_write
]) {
3364 iops_limit
= bs
->io_limits
.iops
[is_write
];
3373 slice_time
= bs
->slice_end
- bs
->slice_start
;
3374 slice_time
/= (NANOSECONDS_PER_SECOND
);
3375 ios_limit
= iops_limit
* slice_time
;
3376 ios_base
= bs
->nr_ops
[is_write
] - bs
->io_base
.ios
[is_write
];
3377 if (bs
->io_limits
.iops
[BLOCK_IO_LIMIT_TOTAL
]) {
3378 ios_base
+= bs
->nr_ops
[!is_write
] - bs
->io_base
.ios
[!is_write
];
3381 if (ios_base
+ 1 <= ios_limit
) {
3389 /* Calc approx time to dispatch */
3390 wait_time
= (ios_base
+ 1) / iops_limit
;
3391 if (wait_time
> elapsed_time
) {
3392 wait_time
= wait_time
- elapsed_time
;
3397 bs
->slice_time
= wait_time
* BLOCK_IO_SLICE_TIME
* 10;
3398 bs
->slice_end
+= bs
->slice_time
- 3 * BLOCK_IO_SLICE_TIME
;
3400 *wait
= wait_time
* BLOCK_IO_SLICE_TIME
* 10;
3406 static bool bdrv_exceed_io_limits(BlockDriverState
*bs
, int nb_sectors
,
3407 bool is_write
, int64_t *wait
)
3409 int64_t now
, max_wait
;
3410 uint64_t bps_wait
= 0, iops_wait
= 0;
3411 double elapsed_time
;
3412 int bps_ret
, iops_ret
;
3414 now
= qemu_get_clock_ns(vm_clock
);
3415 if ((bs
->slice_start
< now
)
3416 && (bs
->slice_end
> now
)) {
3417 bs
->slice_end
= now
+ bs
->slice_time
;
3419 bs
->slice_time
= 5 * BLOCK_IO_SLICE_TIME
;
3420 bs
->slice_start
= now
;
3421 bs
->slice_end
= now
+ bs
->slice_time
;
3423 bs
->io_base
.bytes
[is_write
] = bs
->nr_bytes
[is_write
];
3424 bs
->io_base
.bytes
[!is_write
] = bs
->nr_bytes
[!is_write
];
3426 bs
->io_base
.ios
[is_write
] = bs
->nr_ops
[is_write
];
3427 bs
->io_base
.ios
[!is_write
] = bs
->nr_ops
[!is_write
];
3430 elapsed_time
= now
- bs
->slice_start
;
3431 elapsed_time
/= (NANOSECONDS_PER_SECOND
);
3433 bps_ret
= bdrv_exceed_bps_limits(bs
, nb_sectors
,
3434 is_write
, elapsed_time
, &bps_wait
);
3435 iops_ret
= bdrv_exceed_iops_limits(bs
, is_write
,
3436 elapsed_time
, &iops_wait
);
3437 if (bps_ret
|| iops_ret
) {
3438 max_wait
= bps_wait
> iops_wait
? bps_wait
: iops_wait
;
3443 now
= qemu_get_clock_ns(vm_clock
);
3444 if (bs
->slice_end
< now
+ max_wait
) {
3445 bs
->slice_end
= now
+ max_wait
;
3458 /**************************************************************/
3459 /* async block device emulation */
3461 typedef struct BlockDriverAIOCBSync
{
3462 BlockDriverAIOCB common
;
3465 /* vector translation state */
3469 } BlockDriverAIOCBSync
;
3471 static void bdrv_aio_cancel_em(BlockDriverAIOCB
*blockacb
)
3473 BlockDriverAIOCBSync
*acb
=
3474 container_of(blockacb
, BlockDriverAIOCBSync
, common
);
3475 qemu_bh_delete(acb
->bh
);
3477 qemu_aio_release(acb
);
3480 static AIOPool bdrv_em_aio_pool
= {
3481 .aiocb_size
= sizeof(BlockDriverAIOCBSync
),
3482 .cancel
= bdrv_aio_cancel_em
,
3485 static void bdrv_aio_bh_cb(void *opaque
)
3487 BlockDriverAIOCBSync
*acb
= opaque
;
3490 qemu_iovec_from_buf(acb
->qiov
, 0, acb
->bounce
, acb
->qiov
->size
);
3491 qemu_vfree(acb
->bounce
);
3492 acb
->common
.cb(acb
->common
.opaque
, acb
->ret
);
3493 qemu_bh_delete(acb
->bh
);
3495 qemu_aio_release(acb
);
3498 static BlockDriverAIOCB
*bdrv_aio_rw_vector(BlockDriverState
*bs
,
3502 BlockDriverCompletionFunc
*cb
,
3507 BlockDriverAIOCBSync
*acb
;
3509 acb
= qemu_aio_get(&bdrv_em_aio_pool
, bs
, cb
, opaque
);
3510 acb
->is_write
= is_write
;
3512 acb
->bounce
= qemu_blockalign(bs
, qiov
->size
);
3513 acb
->bh
= qemu_bh_new(bdrv_aio_bh_cb
, acb
);
3516 qemu_iovec_to_buf(acb
->qiov
, 0, acb
->bounce
, qiov
->size
);
3517 acb
->ret
= bs
->drv
->bdrv_write(bs
, sector_num
, acb
->bounce
, nb_sectors
);
3519 acb
->ret
= bs
->drv
->bdrv_read(bs
, sector_num
, acb
->bounce
, nb_sectors
);
3522 qemu_bh_schedule(acb
->bh
);
3524 return &acb
->common
;
3527 static BlockDriverAIOCB
*bdrv_aio_readv_em(BlockDriverState
*bs
,
3528 int64_t sector_num
, QEMUIOVector
*qiov
, int nb_sectors
,
3529 BlockDriverCompletionFunc
*cb
, void *opaque
)
3531 return bdrv_aio_rw_vector(bs
, sector_num
, qiov
, nb_sectors
, cb
, opaque
, 0);
3534 static BlockDriverAIOCB
*bdrv_aio_writev_em(BlockDriverState
*bs
,
3535 int64_t sector_num
, QEMUIOVector
*qiov
, int nb_sectors
,
3536 BlockDriverCompletionFunc
*cb
, void *opaque
)
3538 return bdrv_aio_rw_vector(bs
, sector_num
, qiov
, nb_sectors
, cb
, opaque
, 1);
3542 typedef struct BlockDriverAIOCBCoroutine
{
3543 BlockDriverAIOCB common
;
3547 } BlockDriverAIOCBCoroutine
;
3549 static void bdrv_aio_co_cancel_em(BlockDriverAIOCB
*blockacb
)
3554 static AIOPool bdrv_em_co_aio_pool
= {
3555 .aiocb_size
= sizeof(BlockDriverAIOCBCoroutine
),
3556 .cancel
= bdrv_aio_co_cancel_em
,
3559 static void bdrv_co_em_bh(void *opaque
)
3561 BlockDriverAIOCBCoroutine
*acb
= opaque
;
3563 acb
->common
.cb(acb
->common
.opaque
, acb
->req
.error
);
3564 qemu_bh_delete(acb
->bh
);
3565 qemu_aio_release(acb
);
3568 /* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
3569 static void coroutine_fn
bdrv_co_do_rw(void *opaque
)
3571 BlockDriverAIOCBCoroutine
*acb
= opaque
;
3572 BlockDriverState
*bs
= acb
->common
.bs
;
3574 if (!acb
->is_write
) {
3575 acb
->req
.error
= bdrv_co_do_readv(bs
, acb
->req
.sector
,
3576 acb
->req
.nb_sectors
, acb
->req
.qiov
, 0);
3578 acb
->req
.error
= bdrv_co_do_writev(bs
, acb
->req
.sector
,
3579 acb
->req
.nb_sectors
, acb
->req
.qiov
, 0);
3582 acb
->bh
= qemu_bh_new(bdrv_co_em_bh
, acb
);
3583 qemu_bh_schedule(acb
->bh
);
3586 static BlockDriverAIOCB
*bdrv_co_aio_rw_vector(BlockDriverState
*bs
,
3590 BlockDriverCompletionFunc
*cb
,
3595 BlockDriverAIOCBCoroutine
*acb
;
3597 acb
= qemu_aio_get(&bdrv_em_co_aio_pool
, bs
, cb
, opaque
);
3598 acb
->req
.sector
= sector_num
;
3599 acb
->req
.nb_sectors
= nb_sectors
;
3600 acb
->req
.qiov
= qiov
;
3601 acb
->is_write
= is_write
;
3603 co
= qemu_coroutine_create(bdrv_co_do_rw
);
3604 qemu_coroutine_enter(co
, acb
);
3606 return &acb
->common
;
3609 static void coroutine_fn
bdrv_aio_flush_co_entry(void *opaque
)
3611 BlockDriverAIOCBCoroutine
*acb
= opaque
;
3612 BlockDriverState
*bs
= acb
->common
.bs
;
3614 acb
->req
.error
= bdrv_co_flush(bs
);
3615 acb
->bh
= qemu_bh_new(bdrv_co_em_bh
, acb
);
3616 qemu_bh_schedule(acb
->bh
);
3619 BlockDriverAIOCB
*bdrv_aio_flush(BlockDriverState
*bs
,
3620 BlockDriverCompletionFunc
*cb
, void *opaque
)
3622 trace_bdrv_aio_flush(bs
, opaque
);
3625 BlockDriverAIOCBCoroutine
*acb
;
3627 acb
= qemu_aio_get(&bdrv_em_co_aio_pool
, bs
, cb
, opaque
);
3628 co
= qemu_coroutine_create(bdrv_aio_flush_co_entry
);
3629 qemu_coroutine_enter(co
, acb
);
3631 return &acb
->common
;
3634 static void coroutine_fn
bdrv_aio_discard_co_entry(void *opaque
)
3636 BlockDriverAIOCBCoroutine
*acb
= opaque
;
3637 BlockDriverState
*bs
= acb
->common
.bs
;
3639 acb
->req
.error
= bdrv_co_discard(bs
, acb
->req
.sector
, acb
->req
.nb_sectors
);
3640 acb
->bh
= qemu_bh_new(bdrv_co_em_bh
, acb
);
3641 qemu_bh_schedule(acb
->bh
);
3644 BlockDriverAIOCB
*bdrv_aio_discard(BlockDriverState
*bs
,
3645 int64_t sector_num
, int nb_sectors
,
3646 BlockDriverCompletionFunc
*cb
, void *opaque
)
3649 BlockDriverAIOCBCoroutine
*acb
;
3651 trace_bdrv_aio_discard(bs
, sector_num
, nb_sectors
, opaque
);
3653 acb
= qemu_aio_get(&bdrv_em_co_aio_pool
, bs
, cb
, opaque
);
3654 acb
->req
.sector
= sector_num
;
3655 acb
->req
.nb_sectors
= nb_sectors
;
3656 co
= qemu_coroutine_create(bdrv_aio_discard_co_entry
);
3657 qemu_coroutine_enter(co
, acb
);
3659 return &acb
->common
;
3662 void bdrv_init(void)
3664 module_call_init(MODULE_INIT_BLOCK
);
3667 void bdrv_init_with_whitelist(void)
3669 use_bdrv_whitelist
= 1;
3673 void *qemu_aio_get(AIOPool
*pool
, BlockDriverState
*bs
,
3674 BlockDriverCompletionFunc
*cb
, void *opaque
)
3676 BlockDriverAIOCB
*acb
;
3678 if (pool
->free_aiocb
) {
3679 acb
= pool
->free_aiocb
;
3680 pool
->free_aiocb
= acb
->next
;
3682 acb
= g_malloc0(pool
->aiocb_size
);
3687 acb
->opaque
= opaque
;
3691 void qemu_aio_release(void *p
)
3693 BlockDriverAIOCB
*acb
= (BlockDriverAIOCB
*)p
;
3694 AIOPool
*pool
= acb
->pool
;
3695 acb
->next
= pool
->free_aiocb
;
3696 pool
->free_aiocb
= acb
;
3699 /**************************************************************/
3700 /* Coroutine block device emulation */
3702 typedef struct CoroutineIOCompletion
{
3703 Coroutine
*coroutine
;
3705 } CoroutineIOCompletion
;
3707 static void bdrv_co_io_em_complete(void *opaque
, int ret
)
3709 CoroutineIOCompletion
*co
= opaque
;
3712 qemu_coroutine_enter(co
->coroutine
, NULL
);
3715 static int coroutine_fn
bdrv_co_io_em(BlockDriverState
*bs
, int64_t sector_num
,
3716 int nb_sectors
, QEMUIOVector
*iov
,
3719 CoroutineIOCompletion co
= {
3720 .coroutine
= qemu_coroutine_self(),
3722 BlockDriverAIOCB
*acb
;
3725 acb
= bs
->drv
->bdrv_aio_writev(bs
, sector_num
, iov
, nb_sectors
,
3726 bdrv_co_io_em_complete
, &co
);
3728 acb
= bs
->drv
->bdrv_aio_readv(bs
, sector_num
, iov
, nb_sectors
,
3729 bdrv_co_io_em_complete
, &co
);
3732 trace_bdrv_co_io_em(bs
, sector_num
, nb_sectors
, is_write
, acb
);
3736 qemu_coroutine_yield();
3741 static int coroutine_fn
bdrv_co_readv_em(BlockDriverState
*bs
,
3742 int64_t sector_num
, int nb_sectors
,
3745 return bdrv_co_io_em(bs
, sector_num
, nb_sectors
, iov
, false);
3748 static int coroutine_fn
bdrv_co_writev_em(BlockDriverState
*bs
,
3749 int64_t sector_num
, int nb_sectors
,
3752 return bdrv_co_io_em(bs
, sector_num
, nb_sectors
, iov
, true);
3755 static void coroutine_fn
bdrv_flush_co_entry(void *opaque
)
3757 RwCo
*rwco
= opaque
;
3759 rwco
->ret
= bdrv_co_flush(rwco
->bs
);
3762 int coroutine_fn
bdrv_co_flush(BlockDriverState
*bs
)
3766 if (!bs
|| !bdrv_is_inserted(bs
) || bdrv_is_read_only(bs
)) {
3770 /* Write back cached data to the OS even with cache=unsafe */
3771 if (bs
->drv
->bdrv_co_flush_to_os
) {
3772 ret
= bs
->drv
->bdrv_co_flush_to_os(bs
);
3778 /* But don't actually force it to the disk with cache=unsafe */
3779 if (bs
->open_flags
& BDRV_O_NO_FLUSH
) {
3783 if (bs
->drv
->bdrv_co_flush_to_disk
) {
3784 ret
= bs
->drv
->bdrv_co_flush_to_disk(bs
);
3785 } else if (bs
->drv
->bdrv_aio_flush
) {
3786 BlockDriverAIOCB
*acb
;
3787 CoroutineIOCompletion co
= {
3788 .coroutine
= qemu_coroutine_self(),
3791 acb
= bs
->drv
->bdrv_aio_flush(bs
, bdrv_co_io_em_complete
, &co
);
3795 qemu_coroutine_yield();
3800 * Some block drivers always operate in either writethrough or unsafe
3801 * mode and don't support bdrv_flush therefore. Usually qemu doesn't
3802 * know how the server works (because the behaviour is hardcoded or
3803 * depends on server-side configuration), so we can't ensure that
3804 * everything is safe on disk. Returning an error doesn't work because
3805 * that would break guests even if the server operates in writethrough
3808 * Let's hope the user knows what he's doing.
3816 /* Now flush the underlying protocol. It will also have BDRV_O_NO_FLUSH
3817 * in the case of cache=unsafe, so there are no useless flushes.
3820 return bdrv_co_flush(bs
->file
);
3823 void bdrv_invalidate_cache(BlockDriverState
*bs
)
3825 if (bs
->drv
&& bs
->drv
->bdrv_invalidate_cache
) {
3826 bs
->drv
->bdrv_invalidate_cache(bs
);
3830 void bdrv_invalidate_cache_all(void)
3832 BlockDriverState
*bs
;
3834 QTAILQ_FOREACH(bs
, &bdrv_states
, list
) {
3835 bdrv_invalidate_cache(bs
);
3839 void bdrv_clear_incoming_migration_all(void)
3841 BlockDriverState
*bs
;
3843 QTAILQ_FOREACH(bs
, &bdrv_states
, list
) {
3844 bs
->open_flags
= bs
->open_flags
& ~(BDRV_O_INCOMING
);
3848 int bdrv_flush(BlockDriverState
*bs
)
3856 if (qemu_in_coroutine()) {
3857 /* Fast-path if already in coroutine context */
3858 bdrv_flush_co_entry(&rwco
);
3860 co
= qemu_coroutine_create(bdrv_flush_co_entry
);
3861 qemu_coroutine_enter(co
, &rwco
);
3862 while (rwco
.ret
== NOT_DONE
) {
3870 static void coroutine_fn
bdrv_discard_co_entry(void *opaque
)
3872 RwCo
*rwco
= opaque
;
3874 rwco
->ret
= bdrv_co_discard(rwco
->bs
, rwco
->sector_num
, rwco
->nb_sectors
);
3877 int coroutine_fn
bdrv_co_discard(BlockDriverState
*bs
, int64_t sector_num
,
3882 } else if (bdrv_check_request(bs
, sector_num
, nb_sectors
)) {
3884 } else if (bs
->read_only
) {
3886 } else if (bs
->drv
->bdrv_co_discard
) {
3887 return bs
->drv
->bdrv_co_discard(bs
, sector_num
, nb_sectors
);
3888 } else if (bs
->drv
->bdrv_aio_discard
) {
3889 BlockDriverAIOCB
*acb
;
3890 CoroutineIOCompletion co
= {
3891 .coroutine
= qemu_coroutine_self(),
3894 acb
= bs
->drv
->bdrv_aio_discard(bs
, sector_num
, nb_sectors
,
3895 bdrv_co_io_em_complete
, &co
);
3899 qemu_coroutine_yield();
3907 int bdrv_discard(BlockDriverState
*bs
, int64_t sector_num
, int nb_sectors
)
3912 .sector_num
= sector_num
,
3913 .nb_sectors
= nb_sectors
,
3917 if (qemu_in_coroutine()) {
3918 /* Fast-path if already in coroutine context */
3919 bdrv_discard_co_entry(&rwco
);
3921 co
= qemu_coroutine_create(bdrv_discard_co_entry
);
3922 qemu_coroutine_enter(co
, &rwco
);
3923 while (rwco
.ret
== NOT_DONE
) {
3931 /**************************************************************/
3932 /* removable device support */
3935 * Return TRUE if the media is present
3937 int bdrv_is_inserted(BlockDriverState
*bs
)
3939 BlockDriver
*drv
= bs
->drv
;
3943 if (!drv
->bdrv_is_inserted
)
3945 return drv
->bdrv_is_inserted(bs
);
3949 * Return whether the media changed since the last call to this
3950 * function, or -ENOTSUP if we don't know. Most drivers don't know.
3952 int bdrv_media_changed(BlockDriverState
*bs
)
3954 BlockDriver
*drv
= bs
->drv
;
3956 if (drv
&& drv
->bdrv_media_changed
) {
3957 return drv
->bdrv_media_changed(bs
);
3963 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3965 void bdrv_eject(BlockDriverState
*bs
, bool eject_flag
)
3967 BlockDriver
*drv
= bs
->drv
;
3969 if (drv
&& drv
->bdrv_eject
) {
3970 drv
->bdrv_eject(bs
, eject_flag
);
3973 if (bs
->device_name
[0] != '\0') {
3974 bdrv_emit_qmp_eject_event(bs
, eject_flag
);
3979 * Lock or unlock the media (if it is locked, the user won't be able
3980 * to eject it manually).
3982 void bdrv_lock_medium(BlockDriverState
*bs
, bool locked
)
3984 BlockDriver
*drv
= bs
->drv
;
3986 trace_bdrv_lock_medium(bs
, locked
);
3988 if (drv
&& drv
->bdrv_lock_medium
) {
3989 drv
->bdrv_lock_medium(bs
, locked
);
3993 /* needed for generic scsi interface */
3995 int bdrv_ioctl(BlockDriverState
*bs
, unsigned long int req
, void *buf
)
3997 BlockDriver
*drv
= bs
->drv
;
3999 if (drv
&& drv
->bdrv_ioctl
)
4000 return drv
->bdrv_ioctl(bs
, req
, buf
);
4004 BlockDriverAIOCB
*bdrv_aio_ioctl(BlockDriverState
*bs
,
4005 unsigned long int req
, void *buf
,
4006 BlockDriverCompletionFunc
*cb
, void *opaque
)
4008 BlockDriver
*drv
= bs
->drv
;
4010 if (drv
&& drv
->bdrv_aio_ioctl
)
4011 return drv
->bdrv_aio_ioctl(bs
, req
, buf
, cb
, opaque
);
4015 void bdrv_set_buffer_alignment(BlockDriverState
*bs
, int align
)
4017 bs
->buffer_alignment
= align
;
4020 void *qemu_blockalign(BlockDriverState
*bs
, size_t size
)
4022 return qemu_memalign((bs
&& bs
->buffer_alignment
) ? bs
->buffer_alignment
: 512, size
);
4025 void bdrv_set_dirty_tracking(BlockDriverState
*bs
, int enable
)
4027 int64_t bitmap_size
;
4029 bs
->dirty_count
= 0;
4031 if (!bs
->dirty_bitmap
) {
4032 bitmap_size
= (bdrv_getlength(bs
) >> BDRV_SECTOR_BITS
) +
4033 BDRV_SECTORS_PER_DIRTY_CHUNK
* BITS_PER_LONG
- 1;
4034 bitmap_size
/= BDRV_SECTORS_PER_DIRTY_CHUNK
* BITS_PER_LONG
;
4036 bs
->dirty_bitmap
= g_new0(unsigned long, bitmap_size
);
4039 if (bs
->dirty_bitmap
) {
4040 g_free(bs
->dirty_bitmap
);
4041 bs
->dirty_bitmap
= NULL
;
4046 int bdrv_get_dirty(BlockDriverState
*bs
, int64_t sector
)
4048 int64_t chunk
= sector
/ (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK
;
4050 if (bs
->dirty_bitmap
&&
4051 (sector
<< BDRV_SECTOR_BITS
) < bdrv_getlength(bs
)) {
4052 return !!(bs
->dirty_bitmap
[chunk
/ (sizeof(unsigned long) * 8)] &
4053 (1UL << (chunk
% (sizeof(unsigned long) * 8))));
4059 void bdrv_reset_dirty(BlockDriverState
*bs
, int64_t cur_sector
,
4062 set_dirty_bitmap(bs
, cur_sector
, nr_sectors
, 0);
4065 int64_t bdrv_get_dirty_count(BlockDriverState
*bs
)
4067 return bs
->dirty_count
;
4070 void bdrv_set_in_use(BlockDriverState
*bs
, int in_use
)
4072 assert(bs
->in_use
!= in_use
);
4073 bs
->in_use
= in_use
;
4076 int bdrv_in_use(BlockDriverState
*bs
)
4081 void bdrv_iostatus_enable(BlockDriverState
*bs
)
4083 bs
->iostatus_enabled
= true;
4084 bs
->iostatus
= BLOCK_DEVICE_IO_STATUS_OK
;
4087 /* The I/O status is only enabled if the drive explicitly
4088 * enables it _and_ the VM is configured to stop on errors */
4089 bool bdrv_iostatus_is_enabled(const BlockDriverState
*bs
)
4091 return (bs
->iostatus_enabled
&&
4092 (bs
->on_write_error
== BLOCK_ERR_STOP_ENOSPC
||
4093 bs
->on_write_error
== BLOCK_ERR_STOP_ANY
||
4094 bs
->on_read_error
== BLOCK_ERR_STOP_ANY
));
4097 void bdrv_iostatus_disable(BlockDriverState
*bs
)
4099 bs
->iostatus_enabled
= false;
4102 void bdrv_iostatus_reset(BlockDriverState
*bs
)
4104 if (bdrv_iostatus_is_enabled(bs
)) {
4105 bs
->iostatus
= BLOCK_DEVICE_IO_STATUS_OK
;
4109 /* XXX: Today this is set by device models because it makes the implementation
4110 quite simple. However, the block layer knows about the error, so it's
4111 possible to implement this without device models being involved */
4112 void bdrv_iostatus_set_err(BlockDriverState
*bs
, int error
)
4114 if (bdrv_iostatus_is_enabled(bs
) &&
4115 bs
->iostatus
== BLOCK_DEVICE_IO_STATUS_OK
) {
4117 bs
->iostatus
= error
== ENOSPC
? BLOCK_DEVICE_IO_STATUS_NOSPACE
:
4118 BLOCK_DEVICE_IO_STATUS_FAILED
;
4123 bdrv_acct_start(BlockDriverState
*bs
, BlockAcctCookie
*cookie
, int64_t bytes
,
4124 enum BlockAcctType type
)
4126 assert(type
< BDRV_MAX_IOTYPE
);
4128 cookie
->bytes
= bytes
;
4129 cookie
->start_time_ns
= get_clock();
4130 cookie
->type
= type
;
4134 bdrv_acct_done(BlockDriverState
*bs
, BlockAcctCookie
*cookie
)
4136 assert(cookie
->type
< BDRV_MAX_IOTYPE
);
4138 bs
->nr_bytes
[cookie
->type
] += cookie
->bytes
;
4139 bs
->nr_ops
[cookie
->type
]++;
4140 bs
->total_time_ns
[cookie
->type
] += get_clock() - cookie
->start_time_ns
;
4143 int bdrv_img_create(const char *filename
, const char *fmt
,
4144 const char *base_filename
, const char *base_fmt
,
4145 char *options
, uint64_t img_size
, int flags
)
4147 QEMUOptionParameter
*param
= NULL
, *create_options
= NULL
;
4148 QEMUOptionParameter
*backing_fmt
, *backing_file
, *size
;
4149 BlockDriverState
*bs
= NULL
;
4150 BlockDriver
*drv
, *proto_drv
;
4151 BlockDriver
*backing_drv
= NULL
;
4154 /* Find driver and parse its options */
4155 drv
= bdrv_find_format(fmt
);
4157 error_report("Unknown file format '%s'", fmt
);
4162 proto_drv
= bdrv_find_protocol(filename
);
4164 error_report("Unknown protocol '%s'", filename
);
4169 create_options
= append_option_parameters(create_options
,
4170 drv
->create_options
);
4171 create_options
= append_option_parameters(create_options
,
4172 proto_drv
->create_options
);
4174 /* Create parameter list with default values */
4175 param
= parse_option_parameters("", create_options
, param
);
4177 set_option_parameter_int(param
, BLOCK_OPT_SIZE
, img_size
);
4179 /* Parse -o options */
4181 param
= parse_option_parameters(options
, create_options
, param
);
4182 if (param
== NULL
) {
4183 error_report("Invalid options for file format '%s'.", fmt
);
4189 if (base_filename
) {
4190 if (set_option_parameter(param
, BLOCK_OPT_BACKING_FILE
,
4192 error_report("Backing file not supported for file format '%s'",
4200 if (set_option_parameter(param
, BLOCK_OPT_BACKING_FMT
, base_fmt
)) {
4201 error_report("Backing file format not supported for file "
4202 "format '%s'", fmt
);
4208 backing_file
= get_option_parameter(param
, BLOCK_OPT_BACKING_FILE
);
4209 if (backing_file
&& backing_file
->value
.s
) {
4210 if (!strcmp(filename
, backing_file
->value
.s
)) {
4211 error_report("Error: Trying to create an image with the "
4212 "same filename as the backing file");
4218 backing_fmt
= get_option_parameter(param
, BLOCK_OPT_BACKING_FMT
);
4219 if (backing_fmt
&& backing_fmt
->value
.s
) {
4220 backing_drv
= bdrv_find_format(backing_fmt
->value
.s
);
4222 error_report("Unknown backing file format '%s'",
4223 backing_fmt
->value
.s
);
4229 // The size for the image must always be specified, with one exception:
4230 // If we are using a backing file, we can obtain the size from there
4231 size
= get_option_parameter(param
, BLOCK_OPT_SIZE
);
4232 if (size
&& size
->value
.n
== -1) {
4233 if (backing_file
&& backing_file
->value
.s
) {
4238 /* backing files always opened read-only */
4240 flags
& ~(BDRV_O_RDWR
| BDRV_O_SNAPSHOT
| BDRV_O_NO_BACKING
);
4244 ret
= bdrv_open(bs
, backing_file
->value
.s
, back_flags
, backing_drv
);
4246 error_report("Could not open '%s'", backing_file
->value
.s
);
4249 bdrv_get_geometry(bs
, &size
);
4252 snprintf(buf
, sizeof(buf
), "%" PRId64
, size
);
4253 set_option_parameter(param
, BLOCK_OPT_SIZE
, buf
);
4255 error_report("Image creation needs a size parameter");
4261 printf("Formatting '%s', fmt=%s ", filename
, fmt
);
4262 print_option_parameters(param
);
4265 ret
= bdrv_create(drv
, filename
, param
);
4268 if (ret
== -ENOTSUP
) {
4269 error_report("Formatting or formatting option not supported for "
4270 "file format '%s'", fmt
);
4271 } else if (ret
== -EFBIG
) {
4272 error_report("The image size is too large for file format '%s'",
4275 error_report("%s: error while creating %s: %s", filename
, fmt
,
4281 free_option_parameters(create_options
);
4282 free_option_parameters(param
);
4291 void *block_job_create(const BlockJobType
*job_type
, BlockDriverState
*bs
,
4292 int64_t speed
, BlockDriverCompletionFunc
*cb
,
4293 void *opaque
, Error
**errp
)
4297 if (bs
->job
|| bdrv_in_use(bs
)) {
4298 error_set(errp
, QERR_DEVICE_IN_USE
, bdrv_get_device_name(bs
));
4301 bdrv_set_in_use(bs
, 1);
4303 job
= g_malloc0(job_type
->instance_size
);
4304 job
->job_type
= job_type
;
4307 job
->opaque
= opaque
;
4311 /* Only set speed when necessary to avoid NotSupported error */
4313 Error
*local_err
= NULL
;
4315 block_job_set_speed(job
, speed
, &local_err
);
4316 if (error_is_set(&local_err
)) {
4319 bdrv_set_in_use(bs
, 0);
4320 error_propagate(errp
, local_err
);
4327 void block_job_complete(BlockJob
*job
, int ret
)
4329 BlockDriverState
*bs
= job
->bs
;
4331 assert(bs
->job
== job
);
4332 job
->cb(job
->opaque
, ret
);
4335 bdrv_set_in_use(bs
, 0);
4338 void block_job_set_speed(BlockJob
*job
, int64_t speed
, Error
**errp
)
4340 Error
*local_err
= NULL
;
4342 if (!job
->job_type
->set_speed
) {
4343 error_set(errp
, QERR_NOT_SUPPORTED
);
4346 job
->job_type
->set_speed(job
, speed
, &local_err
);
4347 if (error_is_set(&local_err
)) {
4348 error_propagate(errp
, local_err
);
4355 void block_job_cancel(BlockJob
*job
)
4357 job
->cancelled
= true;
4358 if (job
->co
&& !job
->busy
) {
4359 qemu_coroutine_enter(job
->co
, NULL
);
4363 bool block_job_is_cancelled(BlockJob
*job
)
4365 return job
->cancelled
;
4368 struct BlockCancelData
{
4370 BlockDriverCompletionFunc
*cb
;
4376 static void block_job_cancel_cb(void *opaque
, int ret
)
4378 struct BlockCancelData
*data
= opaque
;
4380 data
->cancelled
= block_job_is_cancelled(data
->job
);
4382 data
->cb(data
->opaque
, ret
);
4385 int block_job_cancel_sync(BlockJob
*job
)
4387 struct BlockCancelData data
;
4388 BlockDriverState
*bs
= job
->bs
;
4390 assert(bs
->job
== job
);
4392 /* Set up our own callback to store the result and chain to
4393 * the original callback.
4397 data
.opaque
= job
->opaque
;
4398 data
.ret
= -EINPROGRESS
;
4399 job
->cb
= block_job_cancel_cb
;
4400 job
->opaque
= &data
;
4401 block_job_cancel(job
);
4402 while (data
.ret
== -EINPROGRESS
) {
4405 return (data
.cancelled
&& data
.ret
== 0) ? -ECANCELED
: data
.ret
;
4408 void block_job_sleep_ns(BlockJob
*job
, QEMUClock
*clock
, int64_t ns
)
4410 /* Check cancellation *before* setting busy = false, too! */
4411 if (!block_job_is_cancelled(job
)) {
4413 co_sleep_ns(clock
, ns
);