2 * QEMU System Emulator block driver
4 * Copyright (c) 2003 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 #include "config-host.h"
25 #include "qemu-common.h"
27 #include "monitor/monitor.h"
28 #include "block/block_int.h"
29 #include "block/blockjob.h"
30 #include "qemu/module.h"
31 #include "qapi/qmp/qjson.h"
32 #include "sysemu/sysemu.h"
33 #include "qemu/notify.h"
34 #include "block/coroutine.h"
35 #include "block/qapi.h"
36 #include "qmp-commands.h"
37 #include "qemu/timer.h"
40 #include <sys/types.h>
42 #include <sys/ioctl.h>
43 #include <sys/queue.h>
53 struct BdrvDirtyBitmap
{
55 QLIST_ENTRY(BdrvDirtyBitmap
) list
;
58 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
60 static void bdrv_dev_change_media_cb(BlockDriverState
*bs
, bool load
);
61 static BlockDriverAIOCB
*bdrv_aio_readv_em(BlockDriverState
*bs
,
62 int64_t sector_num
, QEMUIOVector
*qiov
, int nb_sectors
,
63 BlockDriverCompletionFunc
*cb
, void *opaque
);
64 static BlockDriverAIOCB
*bdrv_aio_writev_em(BlockDriverState
*bs
,
65 int64_t sector_num
, QEMUIOVector
*qiov
, int nb_sectors
,
66 BlockDriverCompletionFunc
*cb
, void *opaque
);
67 static int coroutine_fn
bdrv_co_readv_em(BlockDriverState
*bs
,
68 int64_t sector_num
, int nb_sectors
,
70 static int coroutine_fn
bdrv_co_writev_em(BlockDriverState
*bs
,
71 int64_t sector_num
, int nb_sectors
,
73 static int coroutine_fn
bdrv_co_do_preadv(BlockDriverState
*bs
,
74 int64_t offset
, unsigned int bytes
, QEMUIOVector
*qiov
,
75 BdrvRequestFlags flags
);
76 static int coroutine_fn
bdrv_co_do_pwritev(BlockDriverState
*bs
,
77 int64_t offset
, unsigned int bytes
, QEMUIOVector
*qiov
,
78 BdrvRequestFlags flags
);
79 static BlockDriverAIOCB
*bdrv_co_aio_rw_vector(BlockDriverState
*bs
,
83 BdrvRequestFlags flags
,
84 BlockDriverCompletionFunc
*cb
,
87 static void coroutine_fn
bdrv_co_do_rw(void *opaque
);
88 static int coroutine_fn
bdrv_co_do_write_zeroes(BlockDriverState
*bs
,
89 int64_t sector_num
, int nb_sectors
, BdrvRequestFlags flags
);
91 static QTAILQ_HEAD(, BlockDriverState
) bdrv_states
=
92 QTAILQ_HEAD_INITIALIZER(bdrv_states
);
94 static QTAILQ_HEAD(, BlockDriverState
) graph_bdrv_states
=
95 QTAILQ_HEAD_INITIALIZER(graph_bdrv_states
);
97 static QLIST_HEAD(, BlockDriver
) bdrv_drivers
=
98 QLIST_HEAD_INITIALIZER(bdrv_drivers
);
100 /* If non-zero, use only whitelisted block drivers */
101 static int use_bdrv_whitelist
;
104 static int is_windows_drive_prefix(const char *filename
)
106 return (((filename
[0] >= 'a' && filename
[0] <= 'z') ||
107 (filename
[0] >= 'A' && filename
[0] <= 'Z')) &&
111 int is_windows_drive(const char *filename
)
113 if (is_windows_drive_prefix(filename
) &&
116 if (strstart(filename
, "\\\\.\\", NULL
) ||
117 strstart(filename
, "//./", NULL
))
123 /* throttling disk I/O limits */
124 void bdrv_set_io_limits(BlockDriverState
*bs
,
129 throttle_config(&bs
->throttle_state
, cfg
);
131 for (i
= 0; i
< 2; i
++) {
132 qemu_co_enter_next(&bs
->throttled_reqs
[i
]);
136 /* this function drain all the throttled IOs */
137 static bool bdrv_start_throttled_reqs(BlockDriverState
*bs
)
139 bool drained
= false;
140 bool enabled
= bs
->io_limits_enabled
;
143 bs
->io_limits_enabled
= false;
145 for (i
= 0; i
< 2; i
++) {
146 while (qemu_co_enter_next(&bs
->throttled_reqs
[i
])) {
151 bs
->io_limits_enabled
= enabled
;
156 void bdrv_io_limits_disable(BlockDriverState
*bs
)
158 bs
->io_limits_enabled
= false;
160 bdrv_start_throttled_reqs(bs
);
162 throttle_destroy(&bs
->throttle_state
);
165 static void bdrv_throttle_read_timer_cb(void *opaque
)
167 BlockDriverState
*bs
= opaque
;
168 qemu_co_enter_next(&bs
->throttled_reqs
[0]);
171 static void bdrv_throttle_write_timer_cb(void *opaque
)
173 BlockDriverState
*bs
= opaque
;
174 qemu_co_enter_next(&bs
->throttled_reqs
[1]);
177 /* should be called before bdrv_set_io_limits if a limit is set */
178 void bdrv_io_limits_enable(BlockDriverState
*bs
)
180 assert(!bs
->io_limits_enabled
);
181 throttle_init(&bs
->throttle_state
,
182 bdrv_get_aio_context(bs
),
184 bdrv_throttle_read_timer_cb
,
185 bdrv_throttle_write_timer_cb
,
187 bs
->io_limits_enabled
= true;
190 /* This function makes an IO wait if needed
192 * @nb_sectors: the number of sectors of the IO
193 * @is_write: is the IO a write
195 static void bdrv_io_limits_intercept(BlockDriverState
*bs
,
199 /* does this io must wait */
200 bool must_wait
= throttle_schedule_timer(&bs
->throttle_state
, is_write
);
202 /* if must wait or any request of this type throttled queue the IO */
204 !qemu_co_queue_empty(&bs
->throttled_reqs
[is_write
])) {
205 qemu_co_queue_wait(&bs
->throttled_reqs
[is_write
]);
208 /* the IO will be executed, do the accounting */
209 throttle_account(&bs
->throttle_state
, is_write
, bytes
);
212 /* if the next request must wait -> do nothing */
213 if (throttle_schedule_timer(&bs
->throttle_state
, is_write
)) {
217 /* else queue next request for execution */
218 qemu_co_queue_next(&bs
->throttled_reqs
[is_write
]);
221 size_t bdrv_opt_mem_align(BlockDriverState
*bs
)
223 if (!bs
|| !bs
->drv
) {
224 /* 4k should be on the safe side */
228 return bs
->bl
.opt_mem_alignment
;
231 /* check if the path starts with "<protocol>:" */
232 static int path_has_protocol(const char *path
)
237 if (is_windows_drive(path
) ||
238 is_windows_drive_prefix(path
)) {
241 p
= path
+ strcspn(path
, ":/\\");
243 p
= path
+ strcspn(path
, ":/");
249 int path_is_absolute(const char *path
)
252 /* specific case for names like: "\\.\d:" */
253 if (is_windows_drive(path
) || is_windows_drive_prefix(path
)) {
256 return (*path
== '/' || *path
== '\\');
258 return (*path
== '/');
262 /* if filename is absolute, just copy it to dest. Otherwise, build a
263 path to it by considering it is relative to base_path. URL are
265 void path_combine(char *dest
, int dest_size
,
266 const char *base_path
,
267 const char *filename
)
274 if (path_is_absolute(filename
)) {
275 pstrcpy(dest
, dest_size
, filename
);
277 p
= strchr(base_path
, ':');
282 p1
= strrchr(base_path
, '/');
286 p2
= strrchr(base_path
, '\\');
298 if (len
> dest_size
- 1)
300 memcpy(dest
, base_path
, len
);
302 pstrcat(dest
, dest_size
, filename
);
306 void bdrv_get_full_backing_filename(BlockDriverState
*bs
, char *dest
, size_t sz
)
308 if (bs
->backing_file
[0] == '\0' || path_has_protocol(bs
->backing_file
)) {
309 pstrcpy(dest
, sz
, bs
->backing_file
);
311 path_combine(dest
, sz
, bs
->filename
, bs
->backing_file
);
315 void bdrv_register(BlockDriver
*bdrv
)
317 /* Block drivers without coroutine functions need emulation */
318 if (!bdrv
->bdrv_co_readv
) {
319 bdrv
->bdrv_co_readv
= bdrv_co_readv_em
;
320 bdrv
->bdrv_co_writev
= bdrv_co_writev_em
;
322 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
323 * the block driver lacks aio we need to emulate that too.
325 if (!bdrv
->bdrv_aio_readv
) {
326 /* add AIO emulation layer */
327 bdrv
->bdrv_aio_readv
= bdrv_aio_readv_em
;
328 bdrv
->bdrv_aio_writev
= bdrv_aio_writev_em
;
332 if (bdrv
->bdrv_create
) {
333 assert(!bdrv
->bdrv_create2
&& !bdrv
->create_opts
);
334 assert(!bdrv
->bdrv_amend_options2
);
335 } else if (bdrv
->bdrv_create2
) {
336 assert(!bdrv
->bdrv_create
&& !bdrv
->create_options
);
337 assert(!bdrv
->bdrv_amend_options
);
339 QLIST_INSERT_HEAD(&bdrv_drivers
, bdrv
, list
);
342 /* create a new block device (by default it is empty) */
343 BlockDriverState
*bdrv_new(const char *device_name
, Error
**errp
)
345 BlockDriverState
*bs
;
348 if (bdrv_find(device_name
)) {
349 error_setg(errp
, "Device with id '%s' already exists",
353 if (bdrv_find_node(device_name
)) {
354 error_setg(errp
, "Device with node-name '%s' already exists",
359 bs
= g_malloc0(sizeof(BlockDriverState
));
360 QLIST_INIT(&bs
->dirty_bitmaps
);
361 pstrcpy(bs
->device_name
, sizeof(bs
->device_name
), device_name
);
362 if (device_name
[0] != '\0') {
363 QTAILQ_INSERT_TAIL(&bdrv_states
, bs
, device_list
);
365 for (i
= 0; i
< BLOCK_OP_TYPE_MAX
; i
++) {
366 QLIST_INIT(&bs
->op_blockers
[i
]);
368 bdrv_iostatus_disable(bs
);
369 notifier_list_init(&bs
->close_notifiers
);
370 notifier_with_return_list_init(&bs
->before_write_notifiers
);
371 qemu_co_queue_init(&bs
->throttled_reqs
[0]);
372 qemu_co_queue_init(&bs
->throttled_reqs
[1]);
374 bs
->aio_context
= qemu_get_aio_context();
379 void bdrv_add_close_notifier(BlockDriverState
*bs
, Notifier
*notify
)
381 notifier_list_add(&bs
->close_notifiers
, notify
);
384 BlockDriver
*bdrv_find_format(const char *format_name
)
387 QLIST_FOREACH(drv1
, &bdrv_drivers
, list
) {
388 if (!strcmp(drv1
->format_name
, format_name
)) {
395 static int bdrv_is_whitelisted(BlockDriver
*drv
, bool read_only
)
397 static const char *whitelist_rw
[] = {
398 CONFIG_BDRV_RW_WHITELIST
400 static const char *whitelist_ro
[] = {
401 CONFIG_BDRV_RO_WHITELIST
405 if (!whitelist_rw
[0] && !whitelist_ro
[0]) {
406 return 1; /* no whitelist, anything goes */
409 for (p
= whitelist_rw
; *p
; p
++) {
410 if (!strcmp(drv
->format_name
, *p
)) {
415 for (p
= whitelist_ro
; *p
; p
++) {
416 if (!strcmp(drv
->format_name
, *p
)) {
424 BlockDriver
*bdrv_find_whitelisted_format(const char *format_name
,
427 BlockDriver
*drv
= bdrv_find_format(format_name
);
428 return drv
&& bdrv_is_whitelisted(drv
, read_only
) ? drv
: NULL
;
431 typedef struct CreateCo
{
434 QEMUOptionParameter
*options
;
440 static void coroutine_fn
bdrv_create_co_entry(void *opaque
)
442 Error
*local_err
= NULL
;
445 CreateCo
*cco
= opaque
;
447 assert(!(cco
->options
&& cco
->opts
));
449 if (cco
->drv
->bdrv_create2
) {
450 QemuOptsList
*opts_list
= NULL
;
452 opts_list
= params_to_opts(cco
->options
);
453 cco
->opts
= qemu_opts_create(opts_list
, NULL
, 0, &error_abort
);
455 ret
= cco
->drv
->bdrv_create2(cco
->filename
, cco
->opts
, &local_err
);
457 qemu_opts_del(cco
->opts
);
458 qemu_opts_free(opts_list
);
462 cco
->options
= opts_to_params(cco
->opts
);
464 ret
= cco
->drv
->bdrv_create(cco
->filename
, cco
->options
, &local_err
);
466 free_option_parameters(cco
->options
);
470 error_propagate(&cco
->err
, local_err
);
475 int bdrv_create(BlockDriver
*drv
, const char* filename
,
476 QEMUOptionParameter
*options
,
477 QemuOpts
*opts
, Error
**errp
)
484 .filename
= g_strdup(filename
),
491 if (!drv
->bdrv_create
&& !drv
->bdrv_create2
) {
492 error_setg(errp
, "Driver '%s' does not support image creation", drv
->format_name
);
497 if (qemu_in_coroutine()) {
498 /* Fast-path if already in coroutine context */
499 bdrv_create_co_entry(&cco
);
501 co
= qemu_coroutine_create(bdrv_create_co_entry
);
502 qemu_coroutine_enter(co
, &cco
);
503 while (cco
.ret
== NOT_DONE
) {
511 error_propagate(errp
, cco
.err
);
513 error_setg_errno(errp
, -ret
, "Could not create image");
518 g_free(cco
.filename
);
522 int bdrv_create_file(const char* filename
, QEMUOptionParameter
*options
,
523 QemuOpts
*opts
, Error
**errp
)
526 Error
*local_err
= NULL
;
529 drv
= bdrv_find_protocol(filename
, true);
531 error_setg(errp
, "Could not find protocol for file '%s'", filename
);
535 ret
= bdrv_create(drv
, filename
, options
, opts
, &local_err
);
537 error_propagate(errp
, local_err
);
542 int bdrv_refresh_limits(BlockDriverState
*bs
)
544 BlockDriver
*drv
= bs
->drv
;
546 memset(&bs
->bl
, 0, sizeof(bs
->bl
));
552 /* Take some limits from the children as a default */
554 bdrv_refresh_limits(bs
->file
);
555 bs
->bl
.opt_transfer_length
= bs
->file
->bl
.opt_transfer_length
;
556 bs
->bl
.opt_mem_alignment
= bs
->file
->bl
.opt_mem_alignment
;
558 bs
->bl
.opt_mem_alignment
= 512;
561 if (bs
->backing_hd
) {
562 bdrv_refresh_limits(bs
->backing_hd
);
563 bs
->bl
.opt_transfer_length
=
564 MAX(bs
->bl
.opt_transfer_length
,
565 bs
->backing_hd
->bl
.opt_transfer_length
);
566 bs
->bl
.opt_mem_alignment
=
567 MAX(bs
->bl
.opt_mem_alignment
,
568 bs
->backing_hd
->bl
.opt_mem_alignment
);
571 /* Then let the driver override it */
572 if (drv
->bdrv_refresh_limits
) {
573 return drv
->bdrv_refresh_limits(bs
);
580 * Create a uniquely-named empty temporary file.
581 * Return 0 upon success, otherwise a negative errno value.
583 int get_tmp_filename(char *filename
, int size
)
586 char temp_dir
[MAX_PATH
];
587 /* GetTempFileName requires that its output buffer (4th param)
588 have length MAX_PATH or greater. */
589 assert(size
>= MAX_PATH
);
590 return (GetTempPath(MAX_PATH
, temp_dir
)
591 && GetTempFileName(temp_dir
, "qem", 0, filename
)
592 ? 0 : -GetLastError());
596 tmpdir
= getenv("TMPDIR");
600 if (snprintf(filename
, size
, "%s/vl.XXXXXX", tmpdir
) >= size
) {
603 fd
= mkstemp(filename
);
607 if (close(fd
) != 0) {
616 * Detect host devices. By convention, /dev/cdrom[N] is always
617 * recognized as a host CDROM.
619 static BlockDriver
*find_hdev_driver(const char *filename
)
621 int score_max
= 0, score
;
622 BlockDriver
*drv
= NULL
, *d
;
624 QLIST_FOREACH(d
, &bdrv_drivers
, list
) {
625 if (d
->bdrv_probe_device
) {
626 score
= d
->bdrv_probe_device(filename
);
627 if (score
> score_max
) {
637 BlockDriver
*bdrv_find_protocol(const char *filename
,
638 bool allow_protocol_prefix
)
645 /* TODO Drivers without bdrv_file_open must be specified explicitly */
648 * XXX(hch): we really should not let host device detection
649 * override an explicit protocol specification, but moving this
650 * later breaks access to device names with colons in them.
651 * Thanks to the brain-dead persistent naming schemes on udev-
652 * based Linux systems those actually are quite common.
654 drv1
= find_hdev_driver(filename
);
659 if (!path_has_protocol(filename
) || !allow_protocol_prefix
) {
660 return bdrv_find_format("file");
663 p
= strchr(filename
, ':');
666 if (len
> sizeof(protocol
) - 1)
667 len
= sizeof(protocol
) - 1;
668 memcpy(protocol
, filename
, len
);
669 protocol
[len
] = '\0';
670 QLIST_FOREACH(drv1
, &bdrv_drivers
, list
) {
671 if (drv1
->protocol_name
&&
672 !strcmp(drv1
->protocol_name
, protocol
)) {
679 static int find_image_format(BlockDriverState
*bs
, const char *filename
,
680 BlockDriver
**pdrv
, Error
**errp
)
682 int score
, score_max
;
683 BlockDriver
*drv1
, *drv
;
687 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
688 if (bs
->sg
|| !bdrv_is_inserted(bs
) || bdrv_getlength(bs
) == 0) {
689 drv
= bdrv_find_format("raw");
691 error_setg(errp
, "Could not find raw image format");
698 ret
= bdrv_pread(bs
, 0, buf
, sizeof(buf
));
700 error_setg_errno(errp
, -ret
, "Could not read image for determining its "
708 QLIST_FOREACH(drv1
, &bdrv_drivers
, list
) {
709 if (drv1
->bdrv_probe
) {
710 score
= drv1
->bdrv_probe(buf
, ret
, filename
);
711 if (score
> score_max
) {
718 error_setg(errp
, "Could not determine image format: No compatible "
727 * Set the current 'total_sectors' value
729 static int refresh_total_sectors(BlockDriverState
*bs
, int64_t hint
)
731 BlockDriver
*drv
= bs
->drv
;
733 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
737 /* query actual device if possible, otherwise just trust the hint */
738 if (drv
->bdrv_getlength
) {
739 int64_t length
= drv
->bdrv_getlength(bs
);
743 hint
= DIV_ROUND_UP(length
, BDRV_SECTOR_SIZE
);
746 bs
->total_sectors
= hint
;
751 * Set open flags for a given discard mode
753 * Return 0 on success, -1 if the discard mode was invalid.
755 int bdrv_parse_discard_flags(const char *mode
, int *flags
)
757 *flags
&= ~BDRV_O_UNMAP
;
759 if (!strcmp(mode
, "off") || !strcmp(mode
, "ignore")) {
761 } else if (!strcmp(mode
, "on") || !strcmp(mode
, "unmap")) {
762 *flags
|= BDRV_O_UNMAP
;
771 * Set open flags for a given cache mode
773 * Return 0 on success, -1 if the cache mode was invalid.
775 int bdrv_parse_cache_flags(const char *mode
, int *flags
)
777 *flags
&= ~BDRV_O_CACHE_MASK
;
779 if (!strcmp(mode
, "off") || !strcmp(mode
, "none")) {
780 *flags
|= BDRV_O_NOCACHE
| BDRV_O_CACHE_WB
;
781 } else if (!strcmp(mode
, "directsync")) {
782 *flags
|= BDRV_O_NOCACHE
;
783 } else if (!strcmp(mode
, "writeback")) {
784 *flags
|= BDRV_O_CACHE_WB
;
785 } else if (!strcmp(mode
, "unsafe")) {
786 *flags
|= BDRV_O_CACHE_WB
;
787 *flags
|= BDRV_O_NO_FLUSH
;
788 } else if (!strcmp(mode
, "writethrough")) {
789 /* this is the default */
798 * The copy-on-read flag is actually a reference count so multiple users may
799 * use the feature without worrying about clobbering its previous state.
800 * Copy-on-read stays enabled until all users have called to disable it.
802 void bdrv_enable_copy_on_read(BlockDriverState
*bs
)
807 void bdrv_disable_copy_on_read(BlockDriverState
*bs
)
809 assert(bs
->copy_on_read
> 0);
814 * Returns the flags that a temporary snapshot should get, based on the
815 * originally requested flags (the originally requested image will have flags
816 * like a backing file)
818 static int bdrv_temp_snapshot_flags(int flags
)
820 return (flags
& ~BDRV_O_SNAPSHOT
) | BDRV_O_TEMPORARY
;
824 * Returns the flags that bs->file should get, based on the given flags for
827 static int bdrv_inherited_flags(int flags
)
829 /* Enable protocol handling, disable format probing for bs->file */
830 flags
|= BDRV_O_PROTOCOL
;
832 /* Our block drivers take care to send flushes and respect unmap policy,
833 * so we can enable both unconditionally on lower layers. */
834 flags
|= BDRV_O_CACHE_WB
| BDRV_O_UNMAP
;
836 /* Clear flags that only apply to the top layer */
837 flags
&= ~(BDRV_O_SNAPSHOT
| BDRV_O_NO_BACKING
| BDRV_O_COPY_ON_READ
);
843 * Returns the flags that bs->backing_hd should get, based on the given flags
846 static int bdrv_backing_flags(int flags
)
848 /* backing files always opened read-only */
849 flags
&= ~(BDRV_O_RDWR
| BDRV_O_COPY_ON_READ
);
851 /* snapshot=on is handled on the top layer */
852 flags
&= ~(BDRV_O_SNAPSHOT
| BDRV_O_TEMPORARY
);
857 static int bdrv_open_flags(BlockDriverState
*bs
, int flags
)
859 int open_flags
= flags
| BDRV_O_CACHE_WB
;
862 * Clear flags that are internal to the block layer before opening the
865 open_flags
&= ~(BDRV_O_SNAPSHOT
| BDRV_O_NO_BACKING
);
868 * Snapshots should be writable.
870 if (flags
& BDRV_O_TEMPORARY
) {
871 open_flags
|= BDRV_O_RDWR
;
877 static void bdrv_assign_node_name(BlockDriverState
*bs
,
878 const char *node_name
,
885 /* empty string node name is invalid */
886 if (node_name
[0] == '\0') {
887 error_setg(errp
, "Empty node name");
891 /* takes care of avoiding namespaces collisions */
892 if (bdrv_find(node_name
)) {
893 error_setg(errp
, "node-name=%s is conflicting with a device id",
898 /* takes care of avoiding duplicates node names */
899 if (bdrv_find_node(node_name
)) {
900 error_setg(errp
, "Duplicate node name");
904 /* copy node name into the bs and insert it into the graph list */
905 pstrcpy(bs
->node_name
, sizeof(bs
->node_name
), node_name
);
906 QTAILQ_INSERT_TAIL(&graph_bdrv_states
, bs
, node_list
);
910 * Common part for opening disk images and files
912 * Removes all processed options from *options.
914 static int bdrv_open_common(BlockDriverState
*bs
, BlockDriverState
*file
,
915 QDict
*options
, int flags
, BlockDriver
*drv
, Error
**errp
)
918 const char *filename
;
919 const char *node_name
= NULL
;
920 Error
*local_err
= NULL
;
923 assert(bs
->file
== NULL
);
924 assert(options
!= NULL
&& bs
->options
!= options
);
927 filename
= file
->filename
;
929 filename
= qdict_get_try_str(options
, "filename");
932 if (drv
->bdrv_needs_filename
&& !filename
) {
933 error_setg(errp
, "The '%s' block driver requires a file name",
938 trace_bdrv_open_common(bs
, filename
?: "", flags
, drv
->format_name
);
940 node_name
= qdict_get_try_str(options
, "node-name");
941 bdrv_assign_node_name(bs
, node_name
, &local_err
);
943 error_propagate(errp
, local_err
);
946 qdict_del(options
, "node-name");
948 /* bdrv_open() with directly using a protocol as drv. This layer is already
949 * opened, so assign it to bs (while file becomes a closed BlockDriverState)
950 * and return immediately. */
951 if (file
!= NULL
&& drv
->bdrv_file_open
) {
956 bs
->open_flags
= flags
;
957 bs
->guest_block_size
= 512;
958 bs
->request_alignment
= 512;
959 bs
->zero_beyond_eof
= true;
960 open_flags
= bdrv_open_flags(bs
, flags
);
961 bs
->read_only
= !(open_flags
& BDRV_O_RDWR
);
963 if (use_bdrv_whitelist
&& !bdrv_is_whitelisted(drv
, bs
->read_only
)) {
965 !bs
->read_only
&& bdrv_is_whitelisted(drv
, true)
966 ? "Driver '%s' can only be used for read-only devices"
967 : "Driver '%s' is not whitelisted",
972 assert(bs
->copy_on_read
== 0); /* bdrv_new() and bdrv_close() make it so */
973 if (flags
& BDRV_O_COPY_ON_READ
) {
974 if (!bs
->read_only
) {
975 bdrv_enable_copy_on_read(bs
);
977 error_setg(errp
, "Can't use copy-on-read on read-only device");
982 if (filename
!= NULL
) {
983 pstrcpy(bs
->filename
, sizeof(bs
->filename
), filename
);
985 bs
->filename
[0] = '\0';
989 bs
->opaque
= g_malloc0(drv
->instance_size
);
991 bs
->enable_write_cache
= !!(flags
& BDRV_O_CACHE_WB
);
993 /* Open the image, either directly or using a protocol */
994 if (drv
->bdrv_file_open
) {
995 assert(file
== NULL
);
996 assert(!drv
->bdrv_needs_filename
|| filename
!= NULL
);
997 ret
= drv
->bdrv_file_open(bs
, options
, open_flags
, &local_err
);
1000 error_setg(errp
, "Can't use '%s' as a block driver for the "
1001 "protocol level", drv
->format_name
);
1006 ret
= drv
->bdrv_open(bs
, options
, open_flags
, &local_err
);
1011 error_propagate(errp
, local_err
);
1012 } else if (bs
->filename
[0]) {
1013 error_setg_errno(errp
, -ret
, "Could not open '%s'", bs
->filename
);
1015 error_setg_errno(errp
, -ret
, "Could not open image");
1020 ret
= refresh_total_sectors(bs
, bs
->total_sectors
);
1022 error_setg_errno(errp
, -ret
, "Could not refresh total sector count");
1026 bdrv_refresh_limits(bs
);
1027 assert(bdrv_opt_mem_align(bs
) != 0);
1028 assert((bs
->request_alignment
!= 0) || bs
->sg
);
1040 * Opens a file using a protocol (file, host_device, nbd, ...)
1042 * options is an indirect pointer to a QDict of options to pass to the block
1043 * drivers, or pointer to NULL for an empty set of options. If this function
1044 * takes ownership of the QDict reference, it will set *options to NULL;
1045 * otherwise, it will contain unused/unrecognized options after this function
1046 * returns. Then, the caller is responsible for freeing it. If it intends to
1047 * reuse the QDict, QINCREF() should be called beforehand.
1049 static int bdrv_file_open(BlockDriverState
*bs
, const char *filename
,
1050 QDict
**options
, int flags
, Error
**errp
)
1053 const char *drvname
;
1054 bool parse_filename
= false;
1055 Error
*local_err
= NULL
;
1058 /* Fetch the file name from the options QDict if necessary */
1060 filename
= qdict_get_try_str(*options
, "filename");
1061 } else if (filename
&& !qdict_haskey(*options
, "filename")) {
1062 qdict_put(*options
, "filename", qstring_from_str(filename
));
1063 parse_filename
= true;
1065 error_setg(errp
, "Can't specify 'file' and 'filename' options at the "
1071 /* Find the right block driver */
1072 drvname
= qdict_get_try_str(*options
, "driver");
1074 drv
= bdrv_find_format(drvname
);
1076 error_setg(errp
, "Unknown driver '%s'", drvname
);
1078 qdict_del(*options
, "driver");
1079 } else if (filename
) {
1080 drv
= bdrv_find_protocol(filename
, parse_filename
);
1082 error_setg(errp
, "Unknown protocol");
1085 error_setg(errp
, "Must specify either driver or file");
1090 /* errp has been set already */
1095 /* Parse the filename and open it */
1096 if (drv
->bdrv_parse_filename
&& parse_filename
) {
1097 drv
->bdrv_parse_filename(filename
, *options
, &local_err
);
1099 error_propagate(errp
, local_err
);
1104 if (!drv
->bdrv_needs_filename
) {
1105 qdict_del(*options
, "filename");
1107 filename
= qdict_get_str(*options
, "filename");
1111 if (!drv
->bdrv_file_open
) {
1112 ret
= bdrv_open(&bs
, filename
, NULL
, *options
, flags
, drv
, &local_err
);
1115 ret
= bdrv_open_common(bs
, NULL
, *options
, flags
, drv
, &local_err
);
1118 error_propagate(errp
, local_err
);
1129 void bdrv_set_backing_hd(BlockDriverState
*bs
, BlockDriverState
*backing_hd
)
1132 if (bs
->backing_hd
) {
1133 assert(bs
->backing_blocker
);
1134 bdrv_op_unblock_all(bs
->backing_hd
, bs
->backing_blocker
);
1135 } else if (backing_hd
) {
1136 error_setg(&bs
->backing_blocker
,
1137 "device is used as backing hd of '%s'",
1141 bs
->backing_hd
= backing_hd
;
1143 error_free(bs
->backing_blocker
);
1144 bs
->backing_blocker
= NULL
;
1147 bs
->open_flags
&= ~BDRV_O_NO_BACKING
;
1148 pstrcpy(bs
->backing_file
, sizeof(bs
->backing_file
), backing_hd
->filename
);
1149 pstrcpy(bs
->backing_format
, sizeof(bs
->backing_format
),
1150 backing_hd
->drv
? backing_hd
->drv
->format_name
: "");
1152 bdrv_op_block_all(bs
->backing_hd
, bs
->backing_blocker
);
1153 /* Otherwise we won't be able to commit due to check in bdrv_commit */
1154 bdrv_op_unblock(bs
->backing_hd
, BLOCK_OP_TYPE_COMMIT
,
1155 bs
->backing_blocker
);
1157 bdrv_refresh_limits(bs
);
1161 * Opens the backing file for a BlockDriverState if not yet open
1163 * options is a QDict of options to pass to the block drivers, or NULL for an
1164 * empty set of options. The reference to the QDict is transferred to this
1165 * function (even on failure), so if the caller intends to reuse the dictionary,
1166 * it needs to use QINCREF() before calling bdrv_file_open.
1168 int bdrv_open_backing_file(BlockDriverState
*bs
, QDict
*options
, Error
**errp
)
1170 char *backing_filename
= g_malloc0(PATH_MAX
);
1172 BlockDriver
*back_drv
= NULL
;
1173 BlockDriverState
*backing_hd
;
1174 Error
*local_err
= NULL
;
1176 if (bs
->backing_hd
!= NULL
) {
1181 /* NULL means an empty set of options */
1182 if (options
== NULL
) {
1183 options
= qdict_new();
1186 bs
->open_flags
&= ~BDRV_O_NO_BACKING
;
1187 if (qdict_haskey(options
, "file.filename")) {
1188 backing_filename
[0] = '\0';
1189 } else if (bs
->backing_file
[0] == '\0' && qdict_size(options
) == 0) {
1193 bdrv_get_full_backing_filename(bs
, backing_filename
, PATH_MAX
);
1196 backing_hd
= bdrv_new("", errp
);
1198 if (bs
->backing_format
[0] != '\0') {
1199 back_drv
= bdrv_find_format(bs
->backing_format
);
1202 assert(bs
->backing_hd
== NULL
);
1203 ret
= bdrv_open(&backing_hd
,
1204 *backing_filename
? backing_filename
: NULL
, NULL
, options
,
1205 bdrv_backing_flags(bs
->open_flags
), back_drv
, &local_err
);
1207 bdrv_unref(backing_hd
);
1209 bs
->open_flags
|= BDRV_O_NO_BACKING
;
1210 error_setg(errp
, "Could not open backing file: %s",
1211 error_get_pretty(local_err
));
1212 error_free(local_err
);
1215 bdrv_set_backing_hd(bs
, backing_hd
);
1218 g_free(backing_filename
);
1223 * Opens a disk image whose options are given as BlockdevRef in another block
1226 * If allow_none is true, no image will be opened if filename is false and no
1227 * BlockdevRef is given. *pbs will remain unchanged and 0 will be returned.
1229 * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
1230 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1231 * itself, all options starting with "${bdref_key}." are considered part of the
1234 * The BlockdevRef will be removed from the options QDict.
1236 * To conform with the behavior of bdrv_open(), *pbs has to be NULL.
1238 int bdrv_open_image(BlockDriverState
**pbs
, const char *filename
,
1239 QDict
*options
, const char *bdref_key
, int flags
,
1240 bool allow_none
, Error
**errp
)
1242 QDict
*image_options
;
1244 char *bdref_key_dot
;
1245 const char *reference
;
1248 assert(*pbs
== NULL
);
1250 bdref_key_dot
= g_strdup_printf("%s.", bdref_key
);
1251 qdict_extract_subqdict(options
, &image_options
, bdref_key_dot
);
1252 g_free(bdref_key_dot
);
1254 reference
= qdict_get_try_str(options
, bdref_key
);
1255 if (!filename
&& !reference
&& !qdict_size(image_options
)) {
1259 error_setg(errp
, "A block device must be specified for \"%s\"",
1263 QDECREF(image_options
);
1267 ret
= bdrv_open(pbs
, filename
, reference
, image_options
, flags
, NULL
, errp
);
1270 qdict_del(options
, bdref_key
);
1274 void bdrv_append_temp_snapshot(BlockDriverState
*bs
, int flags
, Error
**errp
)
1276 /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
1277 char *tmp_filename
= g_malloc0(PATH_MAX
+ 1);
1279 BlockDriver
*bdrv_qcow2
;
1280 QemuOptsList
*create_opts
= NULL
;
1281 QemuOpts
*opts
= NULL
;
1282 QDict
*snapshot_options
;
1283 BlockDriverState
*bs_snapshot
;
1287 /* if snapshot, we create a temporary backing file and open it
1288 instead of opening 'filename' directly */
1290 /* Get the required size from the image */
1291 total_size
= bdrv_getlength(bs
);
1292 if (total_size
< 0) {
1293 error_setg_errno(errp
, -total_size
, "Could not get image size");
1296 total_size
&= BDRV_SECTOR_MASK
;
1298 /* Create the temporary image */
1299 ret
= get_tmp_filename(tmp_filename
, PATH_MAX
+ 1);
1301 error_setg_errno(errp
, -ret
, "Could not get temporary filename");
1305 bdrv_qcow2
= bdrv_find_format("qcow2");
1307 assert(!(bdrv_qcow2
->create_options
&& bdrv_qcow2
->create_opts
));
1308 if (bdrv_qcow2
->create_options
) {
1309 create_opts
= params_to_opts(bdrv_qcow2
->create_options
);
1311 create_opts
= bdrv_qcow2
->create_opts
;
1313 opts
= qemu_opts_create(create_opts
, NULL
, 0, &error_abort
);
1314 qemu_opt_set_number(opts
, BLOCK_OPT_SIZE
, total_size
);
1315 ret
= bdrv_create(bdrv_qcow2
, tmp_filename
, NULL
, opts
, &local_err
);
1316 qemu_opts_del(opts
);
1317 if (bdrv_qcow2
->create_options
) {
1318 qemu_opts_free(create_opts
);
1321 error_setg_errno(errp
, -ret
, "Could not create temporary overlay "
1322 "'%s': %s", tmp_filename
,
1323 error_get_pretty(local_err
));
1324 error_free(local_err
);
1328 /* Prepare a new options QDict for the temporary file */
1329 snapshot_options
= qdict_new();
1330 qdict_put(snapshot_options
, "file.driver",
1331 qstring_from_str("file"));
1332 qdict_put(snapshot_options
, "file.filename",
1333 qstring_from_str(tmp_filename
));
1335 bs_snapshot
= bdrv_new("", &error_abort
);
1337 ret
= bdrv_open(&bs_snapshot
, NULL
, NULL
, snapshot_options
,
1338 flags
, bdrv_qcow2
, &local_err
);
1340 error_propagate(errp
, local_err
);
1344 bdrv_append(bs_snapshot
, bs
);
1347 g_free(tmp_filename
);
1350 static QDict
*parse_json_filename(const char *filename
, Error
**errp
)
1352 QObject
*options_obj
;
1356 ret
= strstart(filename
, "json:", &filename
);
1359 options_obj
= qobject_from_json(filename
);
1361 error_setg(errp
, "Could not parse the JSON options");
1365 if (qobject_type(options_obj
) != QTYPE_QDICT
) {
1366 qobject_decref(options_obj
);
1367 error_setg(errp
, "Invalid JSON object given");
1371 options
= qobject_to_qdict(options_obj
);
1372 qdict_flatten(options
);
1378 * Opens a disk image (raw, qcow2, vmdk, ...)
1380 * options is a QDict of options to pass to the block drivers, or NULL for an
1381 * empty set of options. The reference to the QDict belongs to the block layer
1382 * after the call (even on failure), so if the caller intends to reuse the
1383 * dictionary, it needs to use QINCREF() before calling bdrv_open.
1385 * If *pbs is NULL, a new BDS will be created with a pointer to it stored there.
1386 * If it is not NULL, the referenced BDS will be reused.
1388 * The reference parameter may be used to specify an existing block device which
1389 * should be opened. If specified, neither options nor a filename may be given,
1390 * nor can an existing BDS be reused (that is, *pbs has to be NULL).
1392 int bdrv_open(BlockDriverState
**pbs
, const char *filename
,
1393 const char *reference
, QDict
*options
, int flags
,
1394 BlockDriver
*drv
, Error
**errp
)
1397 BlockDriverState
*file
= NULL
, *bs
;
1398 const char *drvname
;
1399 Error
*local_err
= NULL
;
1400 int snapshot_flags
= 0;
1405 bool options_non_empty
= options
? qdict_size(options
) : false;
1409 error_setg(errp
, "Cannot reuse an existing BDS when referencing "
1410 "another block device");
1414 if (filename
|| options_non_empty
) {
1415 error_setg(errp
, "Cannot reference an existing block device with "
1416 "additional options or a new filename");
1420 bs
= bdrv_lookup_bs(reference
, reference
, errp
);
1432 bs
= bdrv_new("", &error_abort
);
1435 /* NULL means an empty set of options */
1436 if (options
== NULL
) {
1437 options
= qdict_new();
1440 if (filename
&& g_str_has_prefix(filename
, "json:")) {
1441 QDict
*json_options
= parse_json_filename(filename
, &local_err
);
1447 /* Options given in the filename have lower priority than options
1448 * specified directly */
1449 qdict_join(options
, json_options
, false);
1450 QDECREF(json_options
);
1454 bs
->options
= options
;
1455 options
= qdict_clone_shallow(options
);
1457 if (flags
& BDRV_O_PROTOCOL
) {
1459 ret
= bdrv_file_open(bs
, filename
, &options
, flags
& ~BDRV_O_PROTOCOL
,
1464 } else if (bs
->drv
) {
1465 goto close_and_fail
;
1471 /* Open image file without format layer */
1472 if (flags
& BDRV_O_RDWR
) {
1473 flags
|= BDRV_O_ALLOW_RDWR
;
1475 if (flags
& BDRV_O_SNAPSHOT
) {
1476 snapshot_flags
= bdrv_temp_snapshot_flags(flags
);
1477 flags
= bdrv_backing_flags(flags
);
1480 assert(file
== NULL
);
1481 ret
= bdrv_open_image(&file
, filename
, options
, "file",
1482 bdrv_inherited_flags(flags
),
1488 /* Find the right image format driver */
1489 drvname
= qdict_get_try_str(options
, "driver");
1491 drv
= bdrv_find_format(drvname
);
1492 qdict_del(options
, "driver");
1494 error_setg(errp
, "Invalid driver: '%s'", drvname
);
1502 ret
= find_image_format(file
, filename
, &drv
, &local_err
);
1504 error_setg(errp
, "Must specify either driver or file");
1514 /* Open the image */
1515 ret
= bdrv_open_common(bs
, file
, options
, flags
, drv
, &local_err
);
1520 if (file
&& (bs
->file
!= file
)) {
1525 /* If there is a backing file, use it */
1526 if ((flags
& BDRV_O_NO_BACKING
) == 0) {
1527 QDict
*backing_options
;
1529 qdict_extract_subqdict(options
, &backing_options
, "backing.");
1530 ret
= bdrv_open_backing_file(bs
, backing_options
, &local_err
);
1532 goto close_and_fail
;
1536 /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
1537 * temporary snapshot afterwards. */
1538 if (snapshot_flags
) {
1539 bdrv_append_temp_snapshot(bs
, snapshot_flags
, &local_err
);
1541 error_propagate(errp
, local_err
);
1542 goto close_and_fail
;
1548 /* Check if any unknown options were used */
1549 if (options
&& (qdict_size(options
) != 0)) {
1550 const QDictEntry
*entry
= qdict_first(options
);
1551 if (flags
& BDRV_O_PROTOCOL
) {
1552 error_setg(errp
, "Block protocol '%s' doesn't support the option "
1553 "'%s'", drv
->format_name
, entry
->key
);
1555 error_setg(errp
, "Block format '%s' used by device '%s' doesn't "
1556 "support the option '%s'", drv
->format_name
,
1557 bs
->device_name
, entry
->key
);
1561 goto close_and_fail
;
1564 if (!bdrv_key_required(bs
)) {
1565 bdrv_dev_change_media_cb(bs
, true);
1566 } else if (!runstate_check(RUN_STATE_PRELAUNCH
)
1567 && !runstate_check(RUN_STATE_INMIGRATE
)
1568 && !runstate_check(RUN_STATE_PAUSED
)) { /* HACK */
1570 "Guest must be stopped for opening of encrypted image");
1572 goto close_and_fail
;
1583 QDECREF(bs
->options
);
1587 /* If *pbs is NULL, a new BDS has been created in this function and
1588 needs to be freed now. Otherwise, it does not need to be closed,
1589 since it has not really been opened yet. */
1593 error_propagate(errp
, local_err
);
1598 /* See fail path, but now the BDS has to be always closed */
1606 error_propagate(errp
, local_err
);
1611 typedef struct BlockReopenQueueEntry
{
1613 BDRVReopenState state
;
1614 QSIMPLEQ_ENTRY(BlockReopenQueueEntry
) entry
;
1615 } BlockReopenQueueEntry
;
1618 * Adds a BlockDriverState to a simple queue for an atomic, transactional
1619 * reopen of multiple devices.
1621 * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
1622 * already performed, or alternatively may be NULL a new BlockReopenQueue will
1623 * be created and initialized. This newly created BlockReopenQueue should be
1624 * passed back in for subsequent calls that are intended to be of the same
1627 * bs is the BlockDriverState to add to the reopen queue.
1629 * flags contains the open flags for the associated bs
1631 * returns a pointer to bs_queue, which is either the newly allocated
1632 * bs_queue, or the existing bs_queue being used.
1635 BlockReopenQueue
*bdrv_reopen_queue(BlockReopenQueue
*bs_queue
,
1636 BlockDriverState
*bs
, int flags
)
1640 BlockReopenQueueEntry
*bs_entry
;
1641 if (bs_queue
== NULL
) {
1642 bs_queue
= g_new0(BlockReopenQueue
, 1);
1643 QSIMPLEQ_INIT(bs_queue
);
1646 /* bdrv_open() masks this flag out */
1647 flags
&= ~BDRV_O_PROTOCOL
;
1650 bdrv_reopen_queue(bs_queue
, bs
->file
, bdrv_inherited_flags(flags
));
1653 bs_entry
= g_new0(BlockReopenQueueEntry
, 1);
1654 QSIMPLEQ_INSERT_TAIL(bs_queue
, bs_entry
, entry
);
1656 bs_entry
->state
.bs
= bs
;
1657 bs_entry
->state
.flags
= flags
;
1663 * Reopen multiple BlockDriverStates atomically & transactionally.
1665 * The queue passed in (bs_queue) must have been built up previous
1666 * via bdrv_reopen_queue().
1668 * Reopens all BDS specified in the queue, with the appropriate
1669 * flags. All devices are prepared for reopen, and failure of any
1670 * device will cause all device changes to be abandonded, and intermediate
1673 * If all devices prepare successfully, then the changes are committed
1677 int bdrv_reopen_multiple(BlockReopenQueue
*bs_queue
, Error
**errp
)
1680 BlockReopenQueueEntry
*bs_entry
, *next
;
1681 Error
*local_err
= NULL
;
1683 assert(bs_queue
!= NULL
);
1687 QSIMPLEQ_FOREACH(bs_entry
, bs_queue
, entry
) {
1688 if (bdrv_reopen_prepare(&bs_entry
->state
, bs_queue
, &local_err
)) {
1689 error_propagate(errp
, local_err
);
1692 bs_entry
->prepared
= true;
1695 /* If we reach this point, we have success and just need to apply the
1698 QSIMPLEQ_FOREACH(bs_entry
, bs_queue
, entry
) {
1699 bdrv_reopen_commit(&bs_entry
->state
);
1705 QSIMPLEQ_FOREACH_SAFE(bs_entry
, bs_queue
, entry
, next
) {
1706 if (ret
&& bs_entry
->prepared
) {
1707 bdrv_reopen_abort(&bs_entry
->state
);
1716 /* Reopen a single BlockDriverState with the specified flags. */
1717 int bdrv_reopen(BlockDriverState
*bs
, int bdrv_flags
, Error
**errp
)
1720 Error
*local_err
= NULL
;
1721 BlockReopenQueue
*queue
= bdrv_reopen_queue(NULL
, bs
, bdrv_flags
);
1723 ret
= bdrv_reopen_multiple(queue
, &local_err
);
1724 if (local_err
!= NULL
) {
1725 error_propagate(errp
, local_err
);
1732 * Prepares a BlockDriverState for reopen. All changes are staged in the
1733 * 'opaque' field of the BDRVReopenState, which is used and allocated by
1734 * the block driver layer .bdrv_reopen_prepare()
1736 * bs is the BlockDriverState to reopen
1737 * flags are the new open flags
1738 * queue is the reopen queue
1740 * Returns 0 on success, non-zero on error. On error errp will be set
1743 * On failure, bdrv_reopen_abort() will be called to clean up any data.
1744 * It is the responsibility of the caller to then call the abort() or
1745 * commit() for any other BDS that have been left in a prepare() state
1748 int bdrv_reopen_prepare(BDRVReopenState
*reopen_state
, BlockReopenQueue
*queue
,
1752 Error
*local_err
= NULL
;
1755 assert(reopen_state
!= NULL
);
1756 assert(reopen_state
->bs
->drv
!= NULL
);
1757 drv
= reopen_state
->bs
->drv
;
1759 /* if we are to stay read-only, do not allow permission change
1761 if (!(reopen_state
->bs
->open_flags
& BDRV_O_ALLOW_RDWR
) &&
1762 reopen_state
->flags
& BDRV_O_RDWR
) {
1763 error_set(errp
, QERR_DEVICE_IS_READ_ONLY
,
1764 reopen_state
->bs
->device_name
);
1769 ret
= bdrv_flush(reopen_state
->bs
);
1771 error_set(errp
, ERROR_CLASS_GENERIC_ERROR
, "Error (%s) flushing drive",
1776 if (drv
->bdrv_reopen_prepare
) {
1777 ret
= drv
->bdrv_reopen_prepare(reopen_state
, queue
, &local_err
);
1779 if (local_err
!= NULL
) {
1780 error_propagate(errp
, local_err
);
1782 error_setg(errp
, "failed while preparing to reopen image '%s'",
1783 reopen_state
->bs
->filename
);
1788 /* It is currently mandatory to have a bdrv_reopen_prepare()
1789 * handler for each supported drv. */
1790 error_set(errp
, QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED
,
1791 drv
->format_name
, reopen_state
->bs
->device_name
,
1792 "reopening of file");
1804 * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
1805 * makes them final by swapping the staging BlockDriverState contents into
1806 * the active BlockDriverState contents.
1808 void bdrv_reopen_commit(BDRVReopenState
*reopen_state
)
1812 assert(reopen_state
!= NULL
);
1813 drv
= reopen_state
->bs
->drv
;
1814 assert(drv
!= NULL
);
1816 /* If there are any driver level actions to take */
1817 if (drv
->bdrv_reopen_commit
) {
1818 drv
->bdrv_reopen_commit(reopen_state
);
1821 /* set BDS specific flags now */
1822 reopen_state
->bs
->open_flags
= reopen_state
->flags
;
1823 reopen_state
->bs
->enable_write_cache
= !!(reopen_state
->flags
&
1825 reopen_state
->bs
->read_only
= !(reopen_state
->flags
& BDRV_O_RDWR
);
1827 bdrv_refresh_limits(reopen_state
->bs
);
1831 * Abort the reopen, and delete and free the staged changes in
1834 void bdrv_reopen_abort(BDRVReopenState
*reopen_state
)
1838 assert(reopen_state
!= NULL
);
1839 drv
= reopen_state
->bs
->drv
;
1840 assert(drv
!= NULL
);
1842 if (drv
->bdrv_reopen_abort
) {
1843 drv
->bdrv_reopen_abort(reopen_state
);
1848 void bdrv_close(BlockDriverState
*bs
)
1851 block_job_cancel_sync(bs
->job
);
1853 bdrv_drain_all(); /* complete I/O */
1855 bdrv_drain_all(); /* in case flush left pending I/O */
1856 notifier_list_notify(&bs
->close_notifiers
, bs
);
1859 if (bs
->backing_hd
) {
1860 BlockDriverState
*backing_hd
= bs
->backing_hd
;
1861 bdrv_set_backing_hd(bs
, NULL
);
1862 bdrv_unref(backing_hd
);
1864 bs
->drv
->bdrv_close(bs
);
1868 bs
->copy_on_read
= 0;
1869 bs
->backing_file
[0] = '\0';
1870 bs
->backing_format
[0] = '\0';
1871 bs
->total_sectors
= 0;
1876 bs
->zero_beyond_eof
= false;
1877 QDECREF(bs
->options
);
1880 if (bs
->file
!= NULL
) {
1881 bdrv_unref(bs
->file
);
1886 bdrv_dev_change_media_cb(bs
, false);
1888 /*throttling disk I/O limits*/
1889 if (bs
->io_limits_enabled
) {
1890 bdrv_io_limits_disable(bs
);
1894 void bdrv_close_all(void)
1896 BlockDriverState
*bs
;
1898 QTAILQ_FOREACH(bs
, &bdrv_states
, device_list
) {
1899 AioContext
*aio_context
= bdrv_get_aio_context(bs
);
1901 aio_context_acquire(aio_context
);
1903 aio_context_release(aio_context
);
1907 /* Check if any requests are in-flight (including throttled requests) */
1908 static bool bdrv_requests_pending(BlockDriverState
*bs
)
1910 if (!QLIST_EMPTY(&bs
->tracked_requests
)) {
1913 if (!qemu_co_queue_empty(&bs
->throttled_reqs
[0])) {
1916 if (!qemu_co_queue_empty(&bs
->throttled_reqs
[1])) {
1919 if (bs
->file
&& bdrv_requests_pending(bs
->file
)) {
1922 if (bs
->backing_hd
&& bdrv_requests_pending(bs
->backing_hd
)) {
1929 * Wait for pending requests to complete across all BlockDriverStates
1931 * This function does not flush data to disk, use bdrv_flush_all() for that
1932 * after calling this function.
1934 * Note that completion of an asynchronous I/O operation can trigger any
1935 * number of other I/O operations on other devices---for example a coroutine
1936 * can be arbitrarily complex and a constant flow of I/O can come until the
1937 * coroutine is complete. Because of this, it is not possible to have a
1938 * function to drain a single device's I/O queue.
1940 void bdrv_drain_all(void)
1942 /* Always run first iteration so any pending completion BHs run */
1944 BlockDriverState
*bs
;
1949 QTAILQ_FOREACH(bs
, &bdrv_states
, device_list
) {
1950 AioContext
*aio_context
= bdrv_get_aio_context(bs
);
1953 aio_context_acquire(aio_context
);
1954 bdrv_start_throttled_reqs(bs
);
1955 bs_busy
= bdrv_requests_pending(bs
);
1956 bs_busy
|= aio_poll(aio_context
, bs_busy
);
1957 aio_context_release(aio_context
);
1964 /* make a BlockDriverState anonymous by removing from bdrv_state and
1965 * graph_bdrv_state list.
1966 Also, NULL terminate the device_name to prevent double remove */
1967 void bdrv_make_anon(BlockDriverState
*bs
)
1969 if (bs
->device_name
[0] != '\0') {
1970 QTAILQ_REMOVE(&bdrv_states
, bs
, device_list
);
1972 bs
->device_name
[0] = '\0';
1973 if (bs
->node_name
[0] != '\0') {
1974 QTAILQ_REMOVE(&graph_bdrv_states
, bs
, node_list
);
1976 bs
->node_name
[0] = '\0';
1979 static void bdrv_rebind(BlockDriverState
*bs
)
1981 if (bs
->drv
&& bs
->drv
->bdrv_rebind
) {
1982 bs
->drv
->bdrv_rebind(bs
);
1986 static void bdrv_move_feature_fields(BlockDriverState
*bs_dest
,
1987 BlockDriverState
*bs_src
)
1989 /* move some fields that need to stay attached to the device */
1992 bs_dest
->dev_ops
= bs_src
->dev_ops
;
1993 bs_dest
->dev_opaque
= bs_src
->dev_opaque
;
1994 bs_dest
->dev
= bs_src
->dev
;
1995 bs_dest
->guest_block_size
= bs_src
->guest_block_size
;
1996 bs_dest
->copy_on_read
= bs_src
->copy_on_read
;
1998 bs_dest
->enable_write_cache
= bs_src
->enable_write_cache
;
2000 /* i/o throttled req */
2001 memcpy(&bs_dest
->throttle_state
,
2002 &bs_src
->throttle_state
,
2003 sizeof(ThrottleState
));
2004 bs_dest
->throttled_reqs
[0] = bs_src
->throttled_reqs
[0];
2005 bs_dest
->throttled_reqs
[1] = bs_src
->throttled_reqs
[1];
2006 bs_dest
->io_limits_enabled
= bs_src
->io_limits_enabled
;
2009 bs_dest
->on_read_error
= bs_src
->on_read_error
;
2010 bs_dest
->on_write_error
= bs_src
->on_write_error
;
2013 bs_dest
->iostatus_enabled
= bs_src
->iostatus_enabled
;
2014 bs_dest
->iostatus
= bs_src
->iostatus
;
2017 bs_dest
->dirty_bitmaps
= bs_src
->dirty_bitmaps
;
2019 /* reference count */
2020 bs_dest
->refcnt
= bs_src
->refcnt
;
2023 bs_dest
->job
= bs_src
->job
;
2025 /* keep the same entry in bdrv_states */
2026 pstrcpy(bs_dest
->device_name
, sizeof(bs_dest
->device_name
),
2027 bs_src
->device_name
);
2028 bs_dest
->device_list
= bs_src
->device_list
;
2029 memcpy(bs_dest
->op_blockers
, bs_src
->op_blockers
,
2030 sizeof(bs_dest
->op_blockers
));
2034 * Swap bs contents for two image chains while they are live,
2035 * while keeping required fields on the BlockDriverState that is
2036 * actually attached to a device.
2038 * This will modify the BlockDriverState fields, and swap contents
2039 * between bs_new and bs_old. Both bs_new and bs_old are modified.
2041 * bs_new is required to be anonymous.
2043 * This function does not create any image files.
2045 void bdrv_swap(BlockDriverState
*bs_new
, BlockDriverState
*bs_old
)
2047 BlockDriverState tmp
;
2049 /* The code needs to swap the node_name but simply swapping node_list won't
2050 * work so first remove the nodes from the graph list, do the swap then
2051 * insert them back if needed.
2053 if (bs_new
->node_name
[0] != '\0') {
2054 QTAILQ_REMOVE(&graph_bdrv_states
, bs_new
, node_list
);
2056 if (bs_old
->node_name
[0] != '\0') {
2057 QTAILQ_REMOVE(&graph_bdrv_states
, bs_old
, node_list
);
2060 /* bs_new must be anonymous and shouldn't have anything fancy enabled */
2061 assert(bs_new
->device_name
[0] == '\0');
2062 assert(QLIST_EMPTY(&bs_new
->dirty_bitmaps
));
2063 assert(bs_new
->job
== NULL
);
2064 assert(bs_new
->dev
== NULL
);
2065 assert(bs_new
->io_limits_enabled
== false);
2066 assert(!throttle_have_timer(&bs_new
->throttle_state
));
2072 /* there are some fields that should not be swapped, move them back */
2073 bdrv_move_feature_fields(&tmp
, bs_old
);
2074 bdrv_move_feature_fields(bs_old
, bs_new
);
2075 bdrv_move_feature_fields(bs_new
, &tmp
);
2077 /* bs_new shouldn't be in bdrv_states even after the swap! */
2078 assert(bs_new
->device_name
[0] == '\0');
2080 /* Check a few fields that should remain attached to the device */
2081 assert(bs_new
->dev
== NULL
);
2082 assert(bs_new
->job
== NULL
);
2083 assert(bs_new
->io_limits_enabled
== false);
2084 assert(!throttle_have_timer(&bs_new
->throttle_state
));
2086 /* insert the nodes back into the graph node list if needed */
2087 if (bs_new
->node_name
[0] != '\0') {
2088 QTAILQ_INSERT_TAIL(&graph_bdrv_states
, bs_new
, node_list
);
2090 if (bs_old
->node_name
[0] != '\0') {
2091 QTAILQ_INSERT_TAIL(&graph_bdrv_states
, bs_old
, node_list
);
2094 bdrv_rebind(bs_new
);
2095 bdrv_rebind(bs_old
);
2099 * Add new bs contents at the top of an image chain while the chain is
2100 * live, while keeping required fields on the top layer.
2102 * This will modify the BlockDriverState fields, and swap contents
2103 * between bs_new and bs_top. Both bs_new and bs_top are modified.
2105 * bs_new is required to be anonymous.
2107 * This function does not create any image files.
2109 void bdrv_append(BlockDriverState
*bs_new
, BlockDriverState
*bs_top
)
2111 bdrv_swap(bs_new
, bs_top
);
2113 /* The contents of 'tmp' will become bs_top, as we are
2114 * swapping bs_new and bs_top contents. */
2115 bdrv_set_backing_hd(bs_top
, bs_new
);
2118 static void bdrv_delete(BlockDriverState
*bs
)
2122 assert(bdrv_op_blocker_is_empty(bs
));
2123 assert(!bs
->refcnt
);
2124 assert(QLIST_EMPTY(&bs
->dirty_bitmaps
));
2128 /* remove from list, if necessary */
2134 int bdrv_attach_dev(BlockDriverState
*bs
, void *dev
)
2135 /* TODO change to DeviceState *dev when all users are qdevified */
2141 bdrv_iostatus_reset(bs
);
2145 /* TODO qdevified devices don't use this, remove when devices are qdevified */
2146 void bdrv_attach_dev_nofail(BlockDriverState
*bs
, void *dev
)
2148 if (bdrv_attach_dev(bs
, dev
) < 0) {
2153 void bdrv_detach_dev(BlockDriverState
*bs
, void *dev
)
2154 /* TODO change to DeviceState *dev when all users are qdevified */
2156 assert(bs
->dev
== dev
);
2159 bs
->dev_opaque
= NULL
;
2160 bs
->guest_block_size
= 512;
2163 /* TODO change to return DeviceState * when all users are qdevified */
2164 void *bdrv_get_attached_dev(BlockDriverState
*bs
)
2169 void bdrv_set_dev_ops(BlockDriverState
*bs
, const BlockDevOps
*ops
,
2173 bs
->dev_opaque
= opaque
;
2176 void bdrv_emit_qmp_error_event(const BlockDriverState
*bdrv
,
2177 enum MonitorEvent ev
,
2178 BlockErrorAction action
, bool is_read
)
2181 const char *action_str
;
2184 case BDRV_ACTION_REPORT
:
2185 action_str
= "report";
2187 case BDRV_ACTION_IGNORE
:
2188 action_str
= "ignore";
2190 case BDRV_ACTION_STOP
:
2191 action_str
= "stop";
2197 data
= qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
2200 is_read
? "read" : "write");
2201 monitor_protocol_event(ev
, data
);
2203 qobject_decref(data
);
2206 static void bdrv_emit_qmp_eject_event(BlockDriverState
*bs
, bool ejected
)
2210 data
= qobject_from_jsonf("{ 'device': %s, 'tray-open': %i }",
2211 bdrv_get_device_name(bs
), ejected
);
2212 monitor_protocol_event(QEVENT_DEVICE_TRAY_MOVED
, data
);
2214 qobject_decref(data
);
2217 static void bdrv_dev_change_media_cb(BlockDriverState
*bs
, bool load
)
2219 if (bs
->dev_ops
&& bs
->dev_ops
->change_media_cb
) {
2220 bool tray_was_closed
= !bdrv_dev_is_tray_open(bs
);
2221 bs
->dev_ops
->change_media_cb(bs
->dev_opaque
, load
);
2222 if (tray_was_closed
) {
2224 bdrv_emit_qmp_eject_event(bs
, true);
2228 bdrv_emit_qmp_eject_event(bs
, false);
2233 bool bdrv_dev_has_removable_media(BlockDriverState
*bs
)
2235 return !bs
->dev
|| (bs
->dev_ops
&& bs
->dev_ops
->change_media_cb
);
2238 void bdrv_dev_eject_request(BlockDriverState
*bs
, bool force
)
2240 if (bs
->dev_ops
&& bs
->dev_ops
->eject_request_cb
) {
2241 bs
->dev_ops
->eject_request_cb(bs
->dev_opaque
, force
);
2245 bool bdrv_dev_is_tray_open(BlockDriverState
*bs
)
2247 if (bs
->dev_ops
&& bs
->dev_ops
->is_tray_open
) {
2248 return bs
->dev_ops
->is_tray_open(bs
->dev_opaque
);
2253 static void bdrv_dev_resize_cb(BlockDriverState
*bs
)
2255 if (bs
->dev_ops
&& bs
->dev_ops
->resize_cb
) {
2256 bs
->dev_ops
->resize_cb(bs
->dev_opaque
);
2260 bool bdrv_dev_is_medium_locked(BlockDriverState
*bs
)
2262 if (bs
->dev_ops
&& bs
->dev_ops
->is_medium_locked
) {
2263 return bs
->dev_ops
->is_medium_locked(bs
->dev_opaque
);
2269 * Run consistency checks on an image
2271 * Returns 0 if the check could be completed (it doesn't mean that the image is
2272 * free of errors) or -errno when an internal error occurred. The results of the
2273 * check are stored in res.
2275 int bdrv_check(BlockDriverState
*bs
, BdrvCheckResult
*res
, BdrvCheckMode fix
)
2277 if (bs
->drv
->bdrv_check
== NULL
) {
2281 memset(res
, 0, sizeof(*res
));
2282 return bs
->drv
->bdrv_check(bs
, res
, fix
);
2285 #define COMMIT_BUF_SECTORS 2048
2287 /* commit COW file into the raw image */
2288 int bdrv_commit(BlockDriverState
*bs
)
2290 BlockDriver
*drv
= bs
->drv
;
2291 int64_t sector
, total_sectors
, length
, backing_length
;
2292 int n
, ro
, open_flags
;
2294 uint8_t *buf
= NULL
;
2295 char filename
[PATH_MAX
];
2300 if (!bs
->backing_hd
) {
2304 if (bdrv_op_is_blocked(bs
, BLOCK_OP_TYPE_COMMIT
, NULL
) ||
2305 bdrv_op_is_blocked(bs
->backing_hd
, BLOCK_OP_TYPE_COMMIT
, NULL
)) {
2309 ro
= bs
->backing_hd
->read_only
;
2310 /* Use pstrcpy (not strncpy): filename must be NUL-terminated. */
2311 pstrcpy(filename
, sizeof(filename
), bs
->backing_hd
->filename
);
2312 open_flags
= bs
->backing_hd
->open_flags
;
2315 if (bdrv_reopen(bs
->backing_hd
, open_flags
| BDRV_O_RDWR
, NULL
)) {
2320 length
= bdrv_getlength(bs
);
2326 backing_length
= bdrv_getlength(bs
->backing_hd
);
2327 if (backing_length
< 0) {
2328 ret
= backing_length
;
2332 /* If our top snapshot is larger than the backing file image,
2333 * grow the backing file image if possible. If not possible,
2334 * we must return an error */
2335 if (length
> backing_length
) {
2336 ret
= bdrv_truncate(bs
->backing_hd
, length
);
2342 total_sectors
= length
>> BDRV_SECTOR_BITS
;
2343 buf
= g_malloc(COMMIT_BUF_SECTORS
* BDRV_SECTOR_SIZE
);
2345 for (sector
= 0; sector
< total_sectors
; sector
+= n
) {
2346 ret
= bdrv_is_allocated(bs
, sector
, COMMIT_BUF_SECTORS
, &n
);
2351 ret
= bdrv_read(bs
, sector
, buf
, n
);
2356 ret
= bdrv_write(bs
->backing_hd
, sector
, buf
, n
);
2363 if (drv
->bdrv_make_empty
) {
2364 ret
= drv
->bdrv_make_empty(bs
);
2372 * Make sure all data we wrote to the backing device is actually
2375 if (bs
->backing_hd
) {
2376 bdrv_flush(bs
->backing_hd
);
2384 /* ignoring error return here */
2385 bdrv_reopen(bs
->backing_hd
, open_flags
& ~BDRV_O_RDWR
, NULL
);
2391 int bdrv_commit_all(void)
2393 BlockDriverState
*bs
;
2395 QTAILQ_FOREACH(bs
, &bdrv_states
, device_list
) {
2396 AioContext
*aio_context
= bdrv_get_aio_context(bs
);
2398 aio_context_acquire(aio_context
);
2399 if (bs
->drv
&& bs
->backing_hd
) {
2400 int ret
= bdrv_commit(bs
);
2402 aio_context_release(aio_context
);
2406 aio_context_release(aio_context
);
2412 * Remove an active request from the tracked requests list
2414 * This function should be called when a tracked request is completing.
2416 static void tracked_request_end(BdrvTrackedRequest
*req
)
2418 if (req
->serialising
) {
2419 req
->bs
->serialising_in_flight
--;
2422 QLIST_REMOVE(req
, list
);
2423 qemu_co_queue_restart_all(&req
->wait_queue
);
2427 * Add an active request to the tracked requests list
2429 static void tracked_request_begin(BdrvTrackedRequest
*req
,
2430 BlockDriverState
*bs
,
2432 unsigned int bytes
, bool is_write
)
2434 *req
= (BdrvTrackedRequest
){
2438 .is_write
= is_write
,
2439 .co
= qemu_coroutine_self(),
2440 .serialising
= false,
2441 .overlap_offset
= offset
,
2442 .overlap_bytes
= bytes
,
2445 qemu_co_queue_init(&req
->wait_queue
);
2447 QLIST_INSERT_HEAD(&bs
->tracked_requests
, req
, list
);
2450 static void mark_request_serialising(BdrvTrackedRequest
*req
, uint64_t align
)
2452 int64_t overlap_offset
= req
->offset
& ~(align
- 1);
2453 unsigned int overlap_bytes
= ROUND_UP(req
->offset
+ req
->bytes
, align
)
2456 if (!req
->serialising
) {
2457 req
->bs
->serialising_in_flight
++;
2458 req
->serialising
= true;
2461 req
->overlap_offset
= MIN(req
->overlap_offset
, overlap_offset
);
2462 req
->overlap_bytes
= MAX(req
->overlap_bytes
, overlap_bytes
);
2466 * Round a region to cluster boundaries
2468 void bdrv_round_to_clusters(BlockDriverState
*bs
,
2469 int64_t sector_num
, int nb_sectors
,
2470 int64_t *cluster_sector_num
,
2471 int *cluster_nb_sectors
)
2473 BlockDriverInfo bdi
;
2475 if (bdrv_get_info(bs
, &bdi
) < 0 || bdi
.cluster_size
== 0) {
2476 *cluster_sector_num
= sector_num
;
2477 *cluster_nb_sectors
= nb_sectors
;
2479 int64_t c
= bdi
.cluster_size
/ BDRV_SECTOR_SIZE
;
2480 *cluster_sector_num
= QEMU_ALIGN_DOWN(sector_num
, c
);
2481 *cluster_nb_sectors
= QEMU_ALIGN_UP(sector_num
- *cluster_sector_num
+
2486 static int bdrv_get_cluster_size(BlockDriverState
*bs
)
2488 BlockDriverInfo bdi
;
2491 ret
= bdrv_get_info(bs
, &bdi
);
2492 if (ret
< 0 || bdi
.cluster_size
== 0) {
2493 return bs
->request_alignment
;
2495 return bdi
.cluster_size
;
2499 static bool tracked_request_overlaps(BdrvTrackedRequest
*req
,
2500 int64_t offset
, unsigned int bytes
)
2503 if (offset
>= req
->overlap_offset
+ req
->overlap_bytes
) {
2507 if (req
->overlap_offset
>= offset
+ bytes
) {
2513 static bool coroutine_fn
wait_serialising_requests(BdrvTrackedRequest
*self
)
2515 BlockDriverState
*bs
= self
->bs
;
2516 BdrvTrackedRequest
*req
;
2518 bool waited
= false;
2520 if (!bs
->serialising_in_flight
) {
2526 QLIST_FOREACH(req
, &bs
->tracked_requests
, list
) {
2527 if (req
== self
|| (!req
->serialising
&& !self
->serialising
)) {
2530 if (tracked_request_overlaps(req
, self
->overlap_offset
,
2531 self
->overlap_bytes
))
2533 /* Hitting this means there was a reentrant request, for
2534 * example, a block driver issuing nested requests. This must
2535 * never happen since it means deadlock.
2537 assert(qemu_coroutine_self() != req
->co
);
2539 /* If the request is already (indirectly) waiting for us, or
2540 * will wait for us as soon as it wakes up, then just go on
2541 * (instead of producing a deadlock in the former case). */
2542 if (!req
->waiting_for
) {
2543 self
->waiting_for
= req
;
2544 qemu_co_queue_wait(&req
->wait_queue
);
2545 self
->waiting_for
= NULL
;
2560 * -EINVAL - backing format specified, but no file
2561 * -ENOSPC - can't update the backing file because no space is left in the
2563 * -ENOTSUP - format driver doesn't support changing the backing file
2565 int bdrv_change_backing_file(BlockDriverState
*bs
,
2566 const char *backing_file
, const char *backing_fmt
)
2568 BlockDriver
*drv
= bs
->drv
;
2571 /* Backing file format doesn't make sense without a backing file */
2572 if (backing_fmt
&& !backing_file
) {
2576 if (drv
->bdrv_change_backing_file
!= NULL
) {
2577 ret
= drv
->bdrv_change_backing_file(bs
, backing_file
, backing_fmt
);
2583 pstrcpy(bs
->backing_file
, sizeof(bs
->backing_file
), backing_file
?: "");
2584 pstrcpy(bs
->backing_format
, sizeof(bs
->backing_format
), backing_fmt
?: "");
2590 * Finds the image layer in the chain that has 'bs' as its backing file.
2592 * active is the current topmost image.
2594 * Returns NULL if bs is not found in active's image chain,
2595 * or if active == bs.
2597 BlockDriverState
*bdrv_find_overlay(BlockDriverState
*active
,
2598 BlockDriverState
*bs
)
2600 BlockDriverState
*overlay
= NULL
;
2601 BlockDriverState
*intermediate
;
2603 assert(active
!= NULL
);
2606 /* if bs is the same as active, then by definition it has no overlay
2612 intermediate
= active
;
2613 while (intermediate
->backing_hd
) {
2614 if (intermediate
->backing_hd
== bs
) {
2615 overlay
= intermediate
;
2618 intermediate
= intermediate
->backing_hd
;
2624 typedef struct BlkIntermediateStates
{
2625 BlockDriverState
*bs
;
2626 QSIMPLEQ_ENTRY(BlkIntermediateStates
) entry
;
2627 } BlkIntermediateStates
;
2631 * Drops images above 'base' up to and including 'top', and sets the image
2632 * above 'top' to have base as its backing file.
2634 * Requires that the overlay to 'top' is opened r/w, so that the backing file
2635 * information in 'bs' can be properly updated.
2637 * E.g., this will convert the following chain:
2638 * bottom <- base <- intermediate <- top <- active
2642 * bottom <- base <- active
2644 * It is allowed for bottom==base, in which case it converts:
2646 * base <- intermediate <- top <- active
2653 * if active == top, that is considered an error
2656 int bdrv_drop_intermediate(BlockDriverState
*active
, BlockDriverState
*top
,
2657 BlockDriverState
*base
)
2659 BlockDriverState
*intermediate
;
2660 BlockDriverState
*base_bs
= NULL
;
2661 BlockDriverState
*new_top_bs
= NULL
;
2662 BlkIntermediateStates
*intermediate_state
, *next
;
2665 QSIMPLEQ_HEAD(states_to_delete
, BlkIntermediateStates
) states_to_delete
;
2666 QSIMPLEQ_INIT(&states_to_delete
);
2668 if (!top
->drv
|| !base
->drv
) {
2672 new_top_bs
= bdrv_find_overlay(active
, top
);
2674 if (new_top_bs
== NULL
) {
2675 /* we could not find the image above 'top', this is an error */
2679 /* special case of new_top_bs->backing_hd already pointing to base - nothing
2680 * to do, no intermediate images */
2681 if (new_top_bs
->backing_hd
== base
) {
2688 /* now we will go down through the list, and add each BDS we find
2689 * into our deletion queue, until we hit the 'base'
2691 while (intermediate
) {
2692 intermediate_state
= g_malloc0(sizeof(BlkIntermediateStates
));
2693 intermediate_state
->bs
= intermediate
;
2694 QSIMPLEQ_INSERT_TAIL(&states_to_delete
, intermediate_state
, entry
);
2696 if (intermediate
->backing_hd
== base
) {
2697 base_bs
= intermediate
->backing_hd
;
2700 intermediate
= intermediate
->backing_hd
;
2702 if (base_bs
== NULL
) {
2703 /* something went wrong, we did not end at the base. safely
2704 * unravel everything, and exit with error */
2708 /* success - we can delete the intermediate states, and link top->base */
2709 ret
= bdrv_change_backing_file(new_top_bs
, base_bs
->filename
,
2710 base_bs
->drv
? base_bs
->drv
->format_name
: "");
2714 bdrv_set_backing_hd(new_top_bs
, base_bs
);
2716 QSIMPLEQ_FOREACH_SAFE(intermediate_state
, &states_to_delete
, entry
, next
) {
2717 /* so that bdrv_close() does not recursively close the chain */
2718 bdrv_set_backing_hd(intermediate_state
->bs
, NULL
);
2719 bdrv_unref(intermediate_state
->bs
);
2724 QSIMPLEQ_FOREACH_SAFE(intermediate_state
, &states_to_delete
, entry
, next
) {
2725 g_free(intermediate_state
);
2731 static int bdrv_check_byte_request(BlockDriverState
*bs
, int64_t offset
,
2736 if (size
> INT_MAX
) {
2740 if (!bdrv_is_inserted(bs
))
2746 len
= bdrv_getlength(bs
);
2751 if ((offset
> len
) || (len
- offset
< size
))
2757 static int bdrv_check_request(BlockDriverState
*bs
, int64_t sector_num
,
2760 if (nb_sectors
< 0 || nb_sectors
> INT_MAX
/ BDRV_SECTOR_SIZE
) {
2764 return bdrv_check_byte_request(bs
, sector_num
* BDRV_SECTOR_SIZE
,
2765 nb_sectors
* BDRV_SECTOR_SIZE
);
2768 typedef struct RwCo
{
2769 BlockDriverState
*bs
;
2774 BdrvRequestFlags flags
;
2777 static void coroutine_fn
bdrv_rw_co_entry(void *opaque
)
2779 RwCo
*rwco
= opaque
;
2781 if (!rwco
->is_write
) {
2782 rwco
->ret
= bdrv_co_do_preadv(rwco
->bs
, rwco
->offset
,
2783 rwco
->qiov
->size
, rwco
->qiov
,
2786 rwco
->ret
= bdrv_co_do_pwritev(rwco
->bs
, rwco
->offset
,
2787 rwco
->qiov
->size
, rwco
->qiov
,
2793 * Process a vectored synchronous request using coroutines
2795 static int bdrv_prwv_co(BlockDriverState
*bs
, int64_t offset
,
2796 QEMUIOVector
*qiov
, bool is_write
,
2797 BdrvRequestFlags flags
)
2804 .is_write
= is_write
,
2810 * In sync call context, when the vcpu is blocked, this throttling timer
2811 * will not fire; so the I/O throttling function has to be disabled here
2812 * if it has been enabled.
2814 if (bs
->io_limits_enabled
) {
2815 fprintf(stderr
, "Disabling I/O throttling on '%s' due "
2816 "to synchronous I/O.\n", bdrv_get_device_name(bs
));
2817 bdrv_io_limits_disable(bs
);
2820 if (qemu_in_coroutine()) {
2821 /* Fast-path if already in coroutine context */
2822 bdrv_rw_co_entry(&rwco
);
2824 AioContext
*aio_context
= bdrv_get_aio_context(bs
);
2826 co
= qemu_coroutine_create(bdrv_rw_co_entry
);
2827 qemu_coroutine_enter(co
, &rwco
);
2828 while (rwco
.ret
== NOT_DONE
) {
2829 aio_poll(aio_context
, true);
2836 * Process a synchronous request using coroutines
2838 static int bdrv_rw_co(BlockDriverState
*bs
, int64_t sector_num
, uint8_t *buf
,
2839 int nb_sectors
, bool is_write
, BdrvRequestFlags flags
)
2842 struct iovec iov
= {
2843 .iov_base
= (void *)buf
,
2844 .iov_len
= nb_sectors
* BDRV_SECTOR_SIZE
,
2847 if (nb_sectors
< 0 || nb_sectors
> INT_MAX
/ BDRV_SECTOR_SIZE
) {
2851 qemu_iovec_init_external(&qiov
, &iov
, 1);
2852 return bdrv_prwv_co(bs
, sector_num
<< BDRV_SECTOR_BITS
,
2853 &qiov
, is_write
, flags
);
2856 /* return < 0 if error. See bdrv_write() for the return codes */
2857 int bdrv_read(BlockDriverState
*bs
, int64_t sector_num
,
2858 uint8_t *buf
, int nb_sectors
)
2860 return bdrv_rw_co(bs
, sector_num
, buf
, nb_sectors
, false, 0);
2863 /* Just like bdrv_read(), but with I/O throttling temporarily disabled */
2864 int bdrv_read_unthrottled(BlockDriverState
*bs
, int64_t sector_num
,
2865 uint8_t *buf
, int nb_sectors
)
2870 enabled
= bs
->io_limits_enabled
;
2871 bs
->io_limits_enabled
= false;
2872 ret
= bdrv_read(bs
, sector_num
, buf
, nb_sectors
);
2873 bs
->io_limits_enabled
= enabled
;
2877 /* Return < 0 if error. Important errors are:
2878 -EIO generic I/O error (may happen for all errors)
2879 -ENOMEDIUM No media inserted.
2880 -EINVAL Invalid sector number or nb_sectors
2881 -EACCES Trying to write a read-only device
2883 int bdrv_write(BlockDriverState
*bs
, int64_t sector_num
,
2884 const uint8_t *buf
, int nb_sectors
)
2886 return bdrv_rw_co(bs
, sector_num
, (uint8_t *)buf
, nb_sectors
, true, 0);
2889 int bdrv_write_zeroes(BlockDriverState
*bs
, int64_t sector_num
,
2890 int nb_sectors
, BdrvRequestFlags flags
)
2892 return bdrv_rw_co(bs
, sector_num
, NULL
, nb_sectors
, true,
2893 BDRV_REQ_ZERO_WRITE
| flags
);
2897 * Completely zero out a block device with the help of bdrv_write_zeroes.
2898 * The operation is sped up by checking the block status and only writing
2899 * zeroes to the device if they currently do not return zeroes. Optional
2900 * flags are passed through to bdrv_write_zeroes (e.g. BDRV_REQ_MAY_UNMAP).
2902 * Returns < 0 on error, 0 on success. For error codes see bdrv_write().
2904 int bdrv_make_zero(BlockDriverState
*bs
, BdrvRequestFlags flags
)
2906 int64_t target_size
;
2907 int64_t ret
, nb_sectors
, sector_num
= 0;
2910 target_size
= bdrv_getlength(bs
);
2911 if (target_size
< 0) {
2914 target_size
/= BDRV_SECTOR_SIZE
;
2917 nb_sectors
= target_size
- sector_num
;
2918 if (nb_sectors
<= 0) {
2921 if (nb_sectors
> INT_MAX
) {
2922 nb_sectors
= INT_MAX
;
2924 ret
= bdrv_get_block_status(bs
, sector_num
, nb_sectors
, &n
);
2926 error_report("error getting block status at sector %" PRId64
": %s",
2927 sector_num
, strerror(-ret
));
2930 if (ret
& BDRV_BLOCK_ZERO
) {
2934 ret
= bdrv_write_zeroes(bs
, sector_num
, n
, flags
);
2936 error_report("error writing zeroes at sector %" PRId64
": %s",
2937 sector_num
, strerror(-ret
));
2944 int bdrv_pread(BlockDriverState
*bs
, int64_t offset
, void *buf
, int bytes
)
2947 struct iovec iov
= {
2948 .iov_base
= (void *)buf
,
2957 qemu_iovec_init_external(&qiov
, &iov
, 1);
2958 ret
= bdrv_prwv_co(bs
, offset
, &qiov
, false, 0);
2966 int bdrv_pwritev(BlockDriverState
*bs
, int64_t offset
, QEMUIOVector
*qiov
)
2970 ret
= bdrv_prwv_co(bs
, offset
, qiov
, true, 0);
2978 int bdrv_pwrite(BlockDriverState
*bs
, int64_t offset
,
2979 const void *buf
, int bytes
)
2982 struct iovec iov
= {
2983 .iov_base
= (void *) buf
,
2991 qemu_iovec_init_external(&qiov
, &iov
, 1);
2992 return bdrv_pwritev(bs
, offset
, &qiov
);
2996 * Writes to the file and ensures that no writes are reordered across this
2997 * request (acts as a barrier)
2999 * Returns 0 on success, -errno in error cases.
3001 int bdrv_pwrite_sync(BlockDriverState
*bs
, int64_t offset
,
3002 const void *buf
, int count
)
3006 ret
= bdrv_pwrite(bs
, offset
, buf
, count
);
3011 /* No flush needed for cache modes that already do it */
3012 if (bs
->enable_write_cache
) {
3019 static int coroutine_fn
bdrv_co_do_copy_on_readv(BlockDriverState
*bs
,
3020 int64_t sector_num
, int nb_sectors
, QEMUIOVector
*qiov
)
3022 /* Perform I/O through a temporary buffer so that users who scribble over
3023 * their read buffer while the operation is in progress do not end up
3024 * modifying the image file. This is critical for zero-copy guest I/O
3025 * where anything might happen inside guest memory.
3027 void *bounce_buffer
;
3029 BlockDriver
*drv
= bs
->drv
;
3031 QEMUIOVector bounce_qiov
;
3032 int64_t cluster_sector_num
;
3033 int cluster_nb_sectors
;
3037 /* Cover entire cluster so no additional backing file I/O is required when
3038 * allocating cluster in the image file.
3040 bdrv_round_to_clusters(bs
, sector_num
, nb_sectors
,
3041 &cluster_sector_num
, &cluster_nb_sectors
);
3043 trace_bdrv_co_do_copy_on_readv(bs
, sector_num
, nb_sectors
,
3044 cluster_sector_num
, cluster_nb_sectors
);
3046 iov
.iov_len
= cluster_nb_sectors
* BDRV_SECTOR_SIZE
;
3047 iov
.iov_base
= bounce_buffer
= qemu_blockalign(bs
, iov
.iov_len
);
3048 qemu_iovec_init_external(&bounce_qiov
, &iov
, 1);
3050 ret
= drv
->bdrv_co_readv(bs
, cluster_sector_num
, cluster_nb_sectors
,
3056 if (drv
->bdrv_co_write_zeroes
&&
3057 buffer_is_zero(bounce_buffer
, iov
.iov_len
)) {
3058 ret
= bdrv_co_do_write_zeroes(bs
, cluster_sector_num
,
3059 cluster_nb_sectors
, 0);
3061 /* This does not change the data on the disk, it is not necessary
3062 * to flush even in cache=writethrough mode.
3064 ret
= drv
->bdrv_co_writev(bs
, cluster_sector_num
, cluster_nb_sectors
,
3069 /* It might be okay to ignore write errors for guest requests. If this
3070 * is a deliberate copy-on-read then we don't want to ignore the error.
3071 * Simply report it in all cases.
3076 skip_bytes
= (sector_num
- cluster_sector_num
) * BDRV_SECTOR_SIZE
;
3077 qemu_iovec_from_buf(qiov
, 0, bounce_buffer
+ skip_bytes
,
3078 nb_sectors
* BDRV_SECTOR_SIZE
);
3081 qemu_vfree(bounce_buffer
);
3086 * Forwards an already correctly aligned request to the BlockDriver. This
3087 * handles copy on read and zeroing after EOF; any other features must be
3088 * implemented by the caller.
3090 static int coroutine_fn
bdrv_aligned_preadv(BlockDriverState
*bs
,
3091 BdrvTrackedRequest
*req
, int64_t offset
, unsigned int bytes
,
3092 int64_t align
, QEMUIOVector
*qiov
, int flags
)
3094 BlockDriver
*drv
= bs
->drv
;
3097 int64_t sector_num
= offset
>> BDRV_SECTOR_BITS
;
3098 unsigned int nb_sectors
= bytes
>> BDRV_SECTOR_BITS
;
3100 assert((offset
& (BDRV_SECTOR_SIZE
- 1)) == 0);
3101 assert((bytes
& (BDRV_SECTOR_SIZE
- 1)) == 0);
3103 /* Handle Copy on Read and associated serialisation */
3104 if (flags
& BDRV_REQ_COPY_ON_READ
) {
3105 /* If we touch the same cluster it counts as an overlap. This
3106 * guarantees that allocating writes will be serialized and not race
3107 * with each other for the same cluster. For example, in copy-on-read
3108 * it ensures that the CoR read and write operations are atomic and
3109 * guest writes cannot interleave between them. */
3110 mark_request_serialising(req
, bdrv_get_cluster_size(bs
));
3113 wait_serialising_requests(req
);
3115 if (flags
& BDRV_REQ_COPY_ON_READ
) {
3118 ret
= bdrv_is_allocated(bs
, sector_num
, nb_sectors
, &pnum
);
3123 if (!ret
|| pnum
!= nb_sectors
) {
3124 ret
= bdrv_co_do_copy_on_readv(bs
, sector_num
, nb_sectors
, qiov
);
3129 /* Forward the request to the BlockDriver */
3130 if (!(bs
->zero_beyond_eof
&& bs
->growable
)) {
3131 ret
= drv
->bdrv_co_readv(bs
, sector_num
, nb_sectors
, qiov
);
3133 /* Read zeros after EOF of growable BDSes */
3134 int64_t len
, total_sectors
, max_nb_sectors
;
3136 len
= bdrv_getlength(bs
);
3142 total_sectors
= DIV_ROUND_UP(len
, BDRV_SECTOR_SIZE
);
3143 max_nb_sectors
= ROUND_UP(MAX(0, total_sectors
- sector_num
),
3144 align
>> BDRV_SECTOR_BITS
);
3145 if (max_nb_sectors
> 0) {
3146 ret
= drv
->bdrv_co_readv(bs
, sector_num
,
3147 MIN(nb_sectors
, max_nb_sectors
), qiov
);
3152 /* Reading beyond end of file is supposed to produce zeroes */
3153 if (ret
== 0 && total_sectors
< sector_num
+ nb_sectors
) {
3154 uint64_t offset
= MAX(0, total_sectors
- sector_num
);
3155 uint64_t bytes
= (sector_num
+ nb_sectors
- offset
) *
3157 qemu_iovec_memset(qiov
, offset
* BDRV_SECTOR_SIZE
, 0, bytes
);
3166 * Handle a read request in coroutine context
3168 static int coroutine_fn
bdrv_co_do_preadv(BlockDriverState
*bs
,
3169 int64_t offset
, unsigned int bytes
, QEMUIOVector
*qiov
,
3170 BdrvRequestFlags flags
)
3172 BlockDriver
*drv
= bs
->drv
;
3173 BdrvTrackedRequest req
;
3175 /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */
3176 uint64_t align
= MAX(BDRV_SECTOR_SIZE
, bs
->request_alignment
);
3177 uint8_t *head_buf
= NULL
;
3178 uint8_t *tail_buf
= NULL
;
3179 QEMUIOVector local_qiov
;
3180 bool use_local_qiov
= false;
3186 if (bdrv_check_byte_request(bs
, offset
, bytes
)) {
3190 if (bs
->copy_on_read
) {
3191 flags
|= BDRV_REQ_COPY_ON_READ
;
3194 /* throttling disk I/O */
3195 if (bs
->io_limits_enabled
) {
3196 bdrv_io_limits_intercept(bs
, bytes
, false);
3199 /* Align read if necessary by padding qiov */
3200 if (offset
& (align
- 1)) {
3201 head_buf
= qemu_blockalign(bs
, align
);
3202 qemu_iovec_init(&local_qiov
, qiov
->niov
+ 2);
3203 qemu_iovec_add(&local_qiov
, head_buf
, offset
& (align
- 1));
3204 qemu_iovec_concat(&local_qiov
, qiov
, 0, qiov
->size
);
3205 use_local_qiov
= true;
3207 bytes
+= offset
& (align
- 1);
3208 offset
= offset
& ~(align
- 1);
3211 if ((offset
+ bytes
) & (align
- 1)) {
3212 if (!use_local_qiov
) {
3213 qemu_iovec_init(&local_qiov
, qiov
->niov
+ 1);
3214 qemu_iovec_concat(&local_qiov
, qiov
, 0, qiov
->size
);
3215 use_local_qiov
= true;
3217 tail_buf
= qemu_blockalign(bs
, align
);
3218 qemu_iovec_add(&local_qiov
, tail_buf
,
3219 align
- ((offset
+ bytes
) & (align
- 1)));
3221 bytes
= ROUND_UP(bytes
, align
);
3224 tracked_request_begin(&req
, bs
, offset
, bytes
, false);
3225 ret
= bdrv_aligned_preadv(bs
, &req
, offset
, bytes
, align
,
3226 use_local_qiov
? &local_qiov
: qiov
,
3228 tracked_request_end(&req
);
3230 if (use_local_qiov
) {
3231 qemu_iovec_destroy(&local_qiov
);
3232 qemu_vfree(head_buf
);
3233 qemu_vfree(tail_buf
);
3239 static int coroutine_fn
bdrv_co_do_readv(BlockDriverState
*bs
,
3240 int64_t sector_num
, int nb_sectors
, QEMUIOVector
*qiov
,
3241 BdrvRequestFlags flags
)
3243 if (nb_sectors
< 0 || nb_sectors
> (UINT_MAX
>> BDRV_SECTOR_BITS
)) {
3247 return bdrv_co_do_preadv(bs
, sector_num
<< BDRV_SECTOR_BITS
,
3248 nb_sectors
<< BDRV_SECTOR_BITS
, qiov
, flags
);
3251 int coroutine_fn
bdrv_co_readv(BlockDriverState
*bs
, int64_t sector_num
,
3252 int nb_sectors
, QEMUIOVector
*qiov
)
3254 trace_bdrv_co_readv(bs
, sector_num
, nb_sectors
);
3256 return bdrv_co_do_readv(bs
, sector_num
, nb_sectors
, qiov
, 0);
3259 int coroutine_fn
bdrv_co_copy_on_readv(BlockDriverState
*bs
,
3260 int64_t sector_num
, int nb_sectors
, QEMUIOVector
*qiov
)
3262 trace_bdrv_co_copy_on_readv(bs
, sector_num
, nb_sectors
);
3264 return bdrv_co_do_readv(bs
, sector_num
, nb_sectors
, qiov
,
3265 BDRV_REQ_COPY_ON_READ
);
3268 /* if no limit is specified in the BlockLimits use a default
3269 * of 32768 512-byte sectors (16 MiB) per request.
3271 #define MAX_WRITE_ZEROES_DEFAULT 32768
3273 static int coroutine_fn
bdrv_co_do_write_zeroes(BlockDriverState
*bs
,
3274 int64_t sector_num
, int nb_sectors
, BdrvRequestFlags flags
)
3276 BlockDriver
*drv
= bs
->drv
;
3278 struct iovec iov
= {0};
3281 int max_write_zeroes
= bs
->bl
.max_write_zeroes
?
3282 bs
->bl
.max_write_zeroes
: MAX_WRITE_ZEROES_DEFAULT
;
3284 while (nb_sectors
> 0 && !ret
) {
3285 int num
= nb_sectors
;
3287 /* Align request. Block drivers can expect the "bulk" of the request
3290 if (bs
->bl
.write_zeroes_alignment
3291 && num
> bs
->bl
.write_zeroes_alignment
) {
3292 if (sector_num
% bs
->bl
.write_zeroes_alignment
!= 0) {
3293 /* Make a small request up to the first aligned sector. */
3294 num
= bs
->bl
.write_zeroes_alignment
;
3295 num
-= sector_num
% bs
->bl
.write_zeroes_alignment
;
3296 } else if ((sector_num
+ num
) % bs
->bl
.write_zeroes_alignment
!= 0) {
3297 /* Shorten the request to the last aligned sector. num cannot
3298 * underflow because num > bs->bl.write_zeroes_alignment.
3300 num
-= (sector_num
+ num
) % bs
->bl
.write_zeroes_alignment
;
3304 /* limit request size */
3305 if (num
> max_write_zeroes
) {
3306 num
= max_write_zeroes
;
3310 /* First try the efficient write zeroes operation */
3311 if (drv
->bdrv_co_write_zeroes
) {
3312 ret
= drv
->bdrv_co_write_zeroes(bs
, sector_num
, num
, flags
);
3315 if (ret
== -ENOTSUP
) {
3316 /* Fall back to bounce buffer if write zeroes is unsupported */
3317 iov
.iov_len
= num
* BDRV_SECTOR_SIZE
;
3318 if (iov
.iov_base
== NULL
) {
3319 iov
.iov_base
= qemu_blockalign(bs
, num
* BDRV_SECTOR_SIZE
);
3320 memset(iov
.iov_base
, 0, num
* BDRV_SECTOR_SIZE
);
3322 qemu_iovec_init_external(&qiov
, &iov
, 1);
3324 ret
= drv
->bdrv_co_writev(bs
, sector_num
, num
, &qiov
);
3326 /* Keep bounce buffer around if it is big enough for all
3327 * all future requests.
3329 if (num
< max_write_zeroes
) {
3330 qemu_vfree(iov
.iov_base
);
3331 iov
.iov_base
= NULL
;
3339 qemu_vfree(iov
.iov_base
);
3344 * Forwards an already correctly aligned write request to the BlockDriver.
3346 static int coroutine_fn
bdrv_aligned_pwritev(BlockDriverState
*bs
,
3347 BdrvTrackedRequest
*req
, int64_t offset
, unsigned int bytes
,
3348 QEMUIOVector
*qiov
, int flags
)
3350 BlockDriver
*drv
= bs
->drv
;
3354 int64_t sector_num
= offset
>> BDRV_SECTOR_BITS
;
3355 unsigned int nb_sectors
= bytes
>> BDRV_SECTOR_BITS
;
3357 assert((offset
& (BDRV_SECTOR_SIZE
- 1)) == 0);
3358 assert((bytes
& (BDRV_SECTOR_SIZE
- 1)) == 0);
3360 waited
= wait_serialising_requests(req
);
3361 assert(!waited
|| !req
->serialising
);
3362 assert(req
->overlap_offset
<= offset
);
3363 assert(offset
+ bytes
<= req
->overlap_offset
+ req
->overlap_bytes
);
3365 ret
= notifier_with_return_list_notify(&bs
->before_write_notifiers
, req
);
3367 if (!ret
&& bs
->detect_zeroes
!= BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF
&&
3368 !(flags
& BDRV_REQ_ZERO_WRITE
) && drv
->bdrv_co_write_zeroes
&&
3369 qemu_iovec_is_zero(qiov
)) {
3370 flags
|= BDRV_REQ_ZERO_WRITE
;
3371 if (bs
->detect_zeroes
== BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP
) {
3372 flags
|= BDRV_REQ_MAY_UNMAP
;
3377 /* Do nothing, write notifier decided to fail this request */
3378 } else if (flags
& BDRV_REQ_ZERO_WRITE
) {
3379 BLKDBG_EVENT(bs
, BLKDBG_PWRITEV_ZERO
);
3380 ret
= bdrv_co_do_write_zeroes(bs
, sector_num
, nb_sectors
, flags
);
3382 BLKDBG_EVENT(bs
, BLKDBG_PWRITEV
);
3383 ret
= drv
->bdrv_co_writev(bs
, sector_num
, nb_sectors
, qiov
);
3385 BLKDBG_EVENT(bs
, BLKDBG_PWRITEV_DONE
);
3387 if (ret
== 0 && !bs
->enable_write_cache
) {
3388 ret
= bdrv_co_flush(bs
);
3391 bdrv_set_dirty(bs
, sector_num
, nb_sectors
);
3393 if (bs
->wr_highest_sector
< sector_num
+ nb_sectors
- 1) {
3394 bs
->wr_highest_sector
= sector_num
+ nb_sectors
- 1;
3396 if (bs
->growable
&& ret
>= 0) {
3397 bs
->total_sectors
= MAX(bs
->total_sectors
, sector_num
+ nb_sectors
);
3404 * Handle a write request in coroutine context
3406 static int coroutine_fn
bdrv_co_do_pwritev(BlockDriverState
*bs
,
3407 int64_t offset
, unsigned int bytes
, QEMUIOVector
*qiov
,
3408 BdrvRequestFlags flags
)
3410 BdrvTrackedRequest req
;
3411 /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */
3412 uint64_t align
= MAX(BDRV_SECTOR_SIZE
, bs
->request_alignment
);
3413 uint8_t *head_buf
= NULL
;
3414 uint8_t *tail_buf
= NULL
;
3415 QEMUIOVector local_qiov
;
3416 bool use_local_qiov
= false;
3422 if (bs
->read_only
) {
3425 if (bdrv_check_byte_request(bs
, offset
, bytes
)) {
3429 /* throttling disk I/O */
3430 if (bs
->io_limits_enabled
) {
3431 bdrv_io_limits_intercept(bs
, bytes
, true);
3435 * Align write if necessary by performing a read-modify-write cycle.
3436 * Pad qiov with the read parts and be sure to have a tracked request not
3437 * only for bdrv_aligned_pwritev, but also for the reads of the RMW cycle.
3439 tracked_request_begin(&req
, bs
, offset
, bytes
, true);
3441 if (offset
& (align
- 1)) {
3442 QEMUIOVector head_qiov
;
3443 struct iovec head_iov
;
3445 mark_request_serialising(&req
, align
);
3446 wait_serialising_requests(&req
);
3448 head_buf
= qemu_blockalign(bs
, align
);
3449 head_iov
= (struct iovec
) {
3450 .iov_base
= head_buf
,
3453 qemu_iovec_init_external(&head_qiov
, &head_iov
, 1);
3455 BLKDBG_EVENT(bs
, BLKDBG_PWRITEV_RMW_HEAD
);
3456 ret
= bdrv_aligned_preadv(bs
, &req
, offset
& ~(align
- 1), align
,
3457 align
, &head_qiov
, 0);
3461 BLKDBG_EVENT(bs
, BLKDBG_PWRITEV_RMW_AFTER_HEAD
);
3463 qemu_iovec_init(&local_qiov
, qiov
->niov
+ 2);
3464 qemu_iovec_add(&local_qiov
, head_buf
, offset
& (align
- 1));
3465 qemu_iovec_concat(&local_qiov
, qiov
, 0, qiov
->size
);
3466 use_local_qiov
= true;
3468 bytes
+= offset
& (align
- 1);
3469 offset
= offset
& ~(align
- 1);
3472 if ((offset
+ bytes
) & (align
- 1)) {
3473 QEMUIOVector tail_qiov
;
3474 struct iovec tail_iov
;
3478 mark_request_serialising(&req
, align
);
3479 waited
= wait_serialising_requests(&req
);
3480 assert(!waited
|| !use_local_qiov
);
3482 tail_buf
= qemu_blockalign(bs
, align
);
3483 tail_iov
= (struct iovec
) {
3484 .iov_base
= tail_buf
,
3487 qemu_iovec_init_external(&tail_qiov
, &tail_iov
, 1);
3489 BLKDBG_EVENT(bs
, BLKDBG_PWRITEV_RMW_TAIL
);
3490 ret
= bdrv_aligned_preadv(bs
, &req
, (offset
+ bytes
) & ~(align
- 1), align
,
3491 align
, &tail_qiov
, 0);
3495 BLKDBG_EVENT(bs
, BLKDBG_PWRITEV_RMW_AFTER_TAIL
);
3497 if (!use_local_qiov
) {
3498 qemu_iovec_init(&local_qiov
, qiov
->niov
+ 1);
3499 qemu_iovec_concat(&local_qiov
, qiov
, 0, qiov
->size
);
3500 use_local_qiov
= true;
3503 tail_bytes
= (offset
+ bytes
) & (align
- 1);
3504 qemu_iovec_add(&local_qiov
, tail_buf
+ tail_bytes
, align
- tail_bytes
);
3506 bytes
= ROUND_UP(bytes
, align
);
3509 ret
= bdrv_aligned_pwritev(bs
, &req
, offset
, bytes
,
3510 use_local_qiov
? &local_qiov
: qiov
,
3514 tracked_request_end(&req
);
3516 if (use_local_qiov
) {
3517 qemu_iovec_destroy(&local_qiov
);
3519 qemu_vfree(head_buf
);
3520 qemu_vfree(tail_buf
);
3525 static int coroutine_fn
bdrv_co_do_writev(BlockDriverState
*bs
,
3526 int64_t sector_num
, int nb_sectors
, QEMUIOVector
*qiov
,
3527 BdrvRequestFlags flags
)
3529 if (nb_sectors
< 0 || nb_sectors
> (INT_MAX
>> BDRV_SECTOR_BITS
)) {
3533 return bdrv_co_do_pwritev(bs
, sector_num
<< BDRV_SECTOR_BITS
,
3534 nb_sectors
<< BDRV_SECTOR_BITS
, qiov
, flags
);
3537 int coroutine_fn
bdrv_co_writev(BlockDriverState
*bs
, int64_t sector_num
,
3538 int nb_sectors
, QEMUIOVector
*qiov
)
3540 trace_bdrv_co_writev(bs
, sector_num
, nb_sectors
);
3542 return bdrv_co_do_writev(bs
, sector_num
, nb_sectors
, qiov
, 0);
3545 int coroutine_fn
bdrv_co_write_zeroes(BlockDriverState
*bs
,
3546 int64_t sector_num
, int nb_sectors
,
3547 BdrvRequestFlags flags
)
3549 trace_bdrv_co_write_zeroes(bs
, sector_num
, nb_sectors
, flags
);
3551 if (!(bs
->open_flags
& BDRV_O_UNMAP
)) {
3552 flags
&= ~BDRV_REQ_MAY_UNMAP
;
3555 return bdrv_co_do_writev(bs
, sector_num
, nb_sectors
, NULL
,
3556 BDRV_REQ_ZERO_WRITE
| flags
);
3560 * Truncate file to 'offset' bytes (needed only for file protocols)
3562 int bdrv_truncate(BlockDriverState
*bs
, int64_t offset
)
3564 BlockDriver
*drv
= bs
->drv
;
3568 if (!drv
->bdrv_truncate
)
3572 if (bdrv_op_is_blocked(bs
, BLOCK_OP_TYPE_RESIZE
, NULL
)) {
3575 ret
= drv
->bdrv_truncate(bs
, offset
);
3577 ret
= refresh_total_sectors(bs
, offset
>> BDRV_SECTOR_BITS
);
3578 bdrv_dev_resize_cb(bs
);
3584 * Length of a allocated file in bytes. Sparse files are counted by actual
3585 * allocated space. Return < 0 if error or unknown.
3587 int64_t bdrv_get_allocated_file_size(BlockDriverState
*bs
)
3589 BlockDriver
*drv
= bs
->drv
;
3593 if (drv
->bdrv_get_allocated_file_size
) {
3594 return drv
->bdrv_get_allocated_file_size(bs
);
3597 return bdrv_get_allocated_file_size(bs
->file
);
3603 * Length of a file in bytes. Return < 0 if error or unknown.
3605 int64_t bdrv_getlength(BlockDriverState
*bs
)
3607 BlockDriver
*drv
= bs
->drv
;
3611 if (drv
->has_variable_length
) {
3612 int ret
= refresh_total_sectors(bs
, bs
->total_sectors
);
3617 return bs
->total_sectors
* BDRV_SECTOR_SIZE
;
3620 /* return 0 as number of sectors if no device present or error */
3621 void bdrv_get_geometry(BlockDriverState
*bs
, uint64_t *nb_sectors_ptr
)
3624 length
= bdrv_getlength(bs
);
3628 length
= length
>> BDRV_SECTOR_BITS
;
3629 *nb_sectors_ptr
= length
;
3632 void bdrv_set_on_error(BlockDriverState
*bs
, BlockdevOnError on_read_error
,
3633 BlockdevOnError on_write_error
)
3635 bs
->on_read_error
= on_read_error
;
3636 bs
->on_write_error
= on_write_error
;
3639 BlockdevOnError
bdrv_get_on_error(BlockDriverState
*bs
, bool is_read
)
3641 return is_read
? bs
->on_read_error
: bs
->on_write_error
;
3644 BlockErrorAction
bdrv_get_error_action(BlockDriverState
*bs
, bool is_read
, int error
)
3646 BlockdevOnError on_err
= is_read
? bs
->on_read_error
: bs
->on_write_error
;
3649 case BLOCKDEV_ON_ERROR_ENOSPC
:
3650 return (error
== ENOSPC
) ? BDRV_ACTION_STOP
: BDRV_ACTION_REPORT
;
3651 case BLOCKDEV_ON_ERROR_STOP
:
3652 return BDRV_ACTION_STOP
;
3653 case BLOCKDEV_ON_ERROR_REPORT
:
3654 return BDRV_ACTION_REPORT
;
3655 case BLOCKDEV_ON_ERROR_IGNORE
:
3656 return BDRV_ACTION_IGNORE
;
3662 /* This is done by device models because, while the block layer knows
3663 * about the error, it does not know whether an operation comes from
3664 * the device or the block layer (from a job, for example).
3666 void bdrv_error_action(BlockDriverState
*bs
, BlockErrorAction action
,
3667 bool is_read
, int error
)
3670 bdrv_emit_qmp_error_event(bs
, QEVENT_BLOCK_IO_ERROR
, action
, is_read
);
3671 if (action
== BDRV_ACTION_STOP
) {
3672 vm_stop(RUN_STATE_IO_ERROR
);
3673 bdrv_iostatus_set_err(bs
, error
);
3677 int bdrv_is_read_only(BlockDriverState
*bs
)
3679 return bs
->read_only
;
3682 int bdrv_is_sg(BlockDriverState
*bs
)
3687 int bdrv_enable_write_cache(BlockDriverState
*bs
)
3689 return bs
->enable_write_cache
;
3692 void bdrv_set_enable_write_cache(BlockDriverState
*bs
, bool wce
)
3694 bs
->enable_write_cache
= wce
;
3696 /* so a reopen() will preserve wce */
3698 bs
->open_flags
|= BDRV_O_CACHE_WB
;
3700 bs
->open_flags
&= ~BDRV_O_CACHE_WB
;
3704 int bdrv_is_encrypted(BlockDriverState
*bs
)
3706 if (bs
->backing_hd
&& bs
->backing_hd
->encrypted
)
3708 return bs
->encrypted
;
3711 int bdrv_key_required(BlockDriverState
*bs
)
3713 BlockDriverState
*backing_hd
= bs
->backing_hd
;
3715 if (backing_hd
&& backing_hd
->encrypted
&& !backing_hd
->valid_key
)
3717 return (bs
->encrypted
&& !bs
->valid_key
);
3720 int bdrv_set_key(BlockDriverState
*bs
, const char *key
)
3723 if (bs
->backing_hd
&& bs
->backing_hd
->encrypted
) {
3724 ret
= bdrv_set_key(bs
->backing_hd
, key
);
3730 if (!bs
->encrypted
) {
3732 } else if (!bs
->drv
|| !bs
->drv
->bdrv_set_key
) {
3735 ret
= bs
->drv
->bdrv_set_key(bs
, key
);
3738 } else if (!bs
->valid_key
) {
3740 /* call the change callback now, we skipped it on open */
3741 bdrv_dev_change_media_cb(bs
, true);
3746 const char *bdrv_get_format_name(BlockDriverState
*bs
)
3748 return bs
->drv
? bs
->drv
->format_name
: NULL
;
3751 void bdrv_iterate_format(void (*it
)(void *opaque
, const char *name
),
3756 const char **formats
= NULL
;
3758 QLIST_FOREACH(drv
, &bdrv_drivers
, list
) {
3759 if (drv
->format_name
) {
3762 while (formats
&& i
&& !found
) {
3763 found
= !strcmp(formats
[--i
], drv
->format_name
);
3767 formats
= g_realloc(formats
, (count
+ 1) * sizeof(char *));
3768 formats
[count
++] = drv
->format_name
;
3769 it(opaque
, drv
->format_name
);
3776 /* This function is to find block backend bs */
3777 BlockDriverState
*bdrv_find(const char *name
)
3779 BlockDriverState
*bs
;
3781 QTAILQ_FOREACH(bs
, &bdrv_states
, device_list
) {
3782 if (!strcmp(name
, bs
->device_name
)) {
3789 /* This function is to find a node in the bs graph */
3790 BlockDriverState
*bdrv_find_node(const char *node_name
)
3792 BlockDriverState
*bs
;
3796 QTAILQ_FOREACH(bs
, &graph_bdrv_states
, node_list
) {
3797 if (!strcmp(node_name
, bs
->node_name
)) {
3804 /* Put this QMP function here so it can access the static graph_bdrv_states. */
3805 BlockDeviceInfoList
*bdrv_named_nodes_list(void)
3807 BlockDeviceInfoList
*list
, *entry
;
3808 BlockDriverState
*bs
;
3811 QTAILQ_FOREACH(bs
, &graph_bdrv_states
, node_list
) {
3812 entry
= g_malloc0(sizeof(*entry
));
3813 entry
->value
= bdrv_block_device_info(bs
);
3821 BlockDriverState
*bdrv_lookup_bs(const char *device
,
3822 const char *node_name
,
3825 BlockDriverState
*bs
= NULL
;
3828 bs
= bdrv_find(device
);
3836 bs
= bdrv_find_node(node_name
);
3843 error_setg(errp
, "Cannot find device=%s nor node_name=%s",
3844 device
? device
: "",
3845 node_name
? node_name
: "");
3849 BlockDriverState
*bdrv_next(BlockDriverState
*bs
)
3852 return QTAILQ_FIRST(&bdrv_states
);
3854 return QTAILQ_NEXT(bs
, device_list
);
3857 void bdrv_iterate(void (*it
)(void *opaque
, BlockDriverState
*bs
), void *opaque
)
3859 BlockDriverState
*bs
;
3861 QTAILQ_FOREACH(bs
, &bdrv_states
, device_list
) {
3866 const char *bdrv_get_device_name(BlockDriverState
*bs
)
3868 return bs
->device_name
;
3871 int bdrv_get_flags(BlockDriverState
*bs
)
3873 return bs
->open_flags
;
3876 int bdrv_flush_all(void)
3878 BlockDriverState
*bs
;
3881 QTAILQ_FOREACH(bs
, &bdrv_states
, device_list
) {
3882 AioContext
*aio_context
= bdrv_get_aio_context(bs
);
3885 aio_context_acquire(aio_context
);
3886 ret
= bdrv_flush(bs
);
3887 if (ret
< 0 && !result
) {
3890 aio_context_release(aio_context
);
3896 int bdrv_has_zero_init_1(BlockDriverState
*bs
)
3901 int bdrv_has_zero_init(BlockDriverState
*bs
)
3905 /* If BS is a copy on write image, it is initialized to
3906 the contents of the base image, which may not be zeroes. */
3907 if (bs
->backing_hd
) {
3910 if (bs
->drv
->bdrv_has_zero_init
) {
3911 return bs
->drv
->bdrv_has_zero_init(bs
);
3918 bool bdrv_unallocated_blocks_are_zero(BlockDriverState
*bs
)
3920 BlockDriverInfo bdi
;
3922 if (bs
->backing_hd
) {
3926 if (bdrv_get_info(bs
, &bdi
) == 0) {
3927 return bdi
.unallocated_blocks_are_zero
;
3933 bool bdrv_can_write_zeroes_with_unmap(BlockDriverState
*bs
)
3935 BlockDriverInfo bdi
;
3937 if (bs
->backing_hd
|| !(bs
->open_flags
& BDRV_O_UNMAP
)) {
3941 if (bdrv_get_info(bs
, &bdi
) == 0) {
3942 return bdi
.can_write_zeroes_with_unmap
;
3948 typedef struct BdrvCoGetBlockStatusData
{
3949 BlockDriverState
*bs
;
3950 BlockDriverState
*base
;
3956 } BdrvCoGetBlockStatusData
;
3959 * Returns true iff the specified sector is present in the disk image. Drivers
3960 * not implementing the functionality are assumed to not support backing files,
3961 * hence all their sectors are reported as allocated.
3963 * If 'sector_num' is beyond the end of the disk image the return value is 0
3964 * and 'pnum' is set to 0.
3966 * 'pnum' is set to the number of sectors (including and immediately following
3967 * the specified sector) that are known to be in the same
3968 * allocated/unallocated state.
3970 * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
3971 * beyond the end of the disk image it will be clamped.
3973 static int64_t coroutine_fn
bdrv_co_get_block_status(BlockDriverState
*bs
,
3975 int nb_sectors
, int *pnum
)
3981 length
= bdrv_getlength(bs
);
3986 if (sector_num
>= (length
>> BDRV_SECTOR_BITS
)) {
3991 n
= bs
->total_sectors
- sector_num
;
3992 if (n
< nb_sectors
) {
3996 if (!bs
->drv
->bdrv_co_get_block_status
) {
3998 ret
= BDRV_BLOCK_DATA
| BDRV_BLOCK_ALLOCATED
;
3999 if (bs
->drv
->protocol_name
) {
4000 ret
|= BDRV_BLOCK_OFFSET_VALID
| (sector_num
* BDRV_SECTOR_SIZE
);
4005 ret
= bs
->drv
->bdrv_co_get_block_status(bs
, sector_num
, nb_sectors
, pnum
);
4011 if (ret
& BDRV_BLOCK_RAW
) {
4012 assert(ret
& BDRV_BLOCK_OFFSET_VALID
);
4013 return bdrv_get_block_status(bs
->file
, ret
>> BDRV_SECTOR_BITS
,
4017 if (ret
& (BDRV_BLOCK_DATA
| BDRV_BLOCK_ZERO
)) {
4018 ret
|= BDRV_BLOCK_ALLOCATED
;
4021 if (!(ret
& BDRV_BLOCK_DATA
) && !(ret
& BDRV_BLOCK_ZERO
)) {
4022 if (bdrv_unallocated_blocks_are_zero(bs
)) {
4023 ret
|= BDRV_BLOCK_ZERO
;
4024 } else if (bs
->backing_hd
) {
4025 BlockDriverState
*bs2
= bs
->backing_hd
;
4026 int64_t length2
= bdrv_getlength(bs2
);
4027 if (length2
>= 0 && sector_num
>= (length2
>> BDRV_SECTOR_BITS
)) {
4028 ret
|= BDRV_BLOCK_ZERO
;
4034 (ret
& BDRV_BLOCK_DATA
) && !(ret
& BDRV_BLOCK_ZERO
) &&
4035 (ret
& BDRV_BLOCK_OFFSET_VALID
)) {
4036 ret2
= bdrv_co_get_block_status(bs
->file
, ret
>> BDRV_SECTOR_BITS
,
4039 /* Ignore errors. This is just providing extra information, it
4040 * is useful but not necessary.
4042 ret
|= (ret2
& BDRV_BLOCK_ZERO
);
4049 /* Coroutine wrapper for bdrv_get_block_status() */
4050 static void coroutine_fn
bdrv_get_block_status_co_entry(void *opaque
)
4052 BdrvCoGetBlockStatusData
*data
= opaque
;
4053 BlockDriverState
*bs
= data
->bs
;
4055 data
->ret
= bdrv_co_get_block_status(bs
, data
->sector_num
, data
->nb_sectors
,
4061 * Synchronous wrapper around bdrv_co_get_block_status().
4063 * See bdrv_co_get_block_status() for details.
4065 int64_t bdrv_get_block_status(BlockDriverState
*bs
, int64_t sector_num
,
4066 int nb_sectors
, int *pnum
)
4069 BdrvCoGetBlockStatusData data
= {
4071 .sector_num
= sector_num
,
4072 .nb_sectors
= nb_sectors
,
4077 if (qemu_in_coroutine()) {
4078 /* Fast-path if already in coroutine context */
4079 bdrv_get_block_status_co_entry(&data
);
4081 AioContext
*aio_context
= bdrv_get_aio_context(bs
);
4083 co
= qemu_coroutine_create(bdrv_get_block_status_co_entry
);
4084 qemu_coroutine_enter(co
, &data
);
4085 while (!data
.done
) {
4086 aio_poll(aio_context
, true);
4092 int coroutine_fn
bdrv_is_allocated(BlockDriverState
*bs
, int64_t sector_num
,
4093 int nb_sectors
, int *pnum
)
4095 int64_t ret
= bdrv_get_block_status(bs
, sector_num
, nb_sectors
, pnum
);
4099 return (ret
& BDRV_BLOCK_ALLOCATED
);
4103 * Given an image chain: ... -> [BASE] -> [INTER1] -> [INTER2] -> [TOP]
4105 * Return true if the given sector is allocated in any image between
4106 * BASE and TOP (inclusive). BASE can be NULL to check if the given
4107 * sector is allocated in any image of the chain. Return false otherwise.
4109 * 'pnum' is set to the number of sectors (including and immediately following
4110 * the specified sector) that are known to be in the same
4111 * allocated/unallocated state.
4114 int bdrv_is_allocated_above(BlockDriverState
*top
,
4115 BlockDriverState
*base
,
4117 int nb_sectors
, int *pnum
)
4119 BlockDriverState
*intermediate
;
4120 int ret
, n
= nb_sectors
;
4123 while (intermediate
&& intermediate
!= base
) {
4125 ret
= bdrv_is_allocated(intermediate
, sector_num
, nb_sectors
,
4135 * [sector_num, nb_sectors] is unallocated on top but intermediate
4138 * [sector_num+x, nr_sectors] allocated.
4140 if (n
> pnum_inter
&&
4141 (intermediate
== top
||
4142 sector_num
+ pnum_inter
< intermediate
->total_sectors
)) {
4146 intermediate
= intermediate
->backing_hd
;
4153 const char *bdrv_get_encrypted_filename(BlockDriverState
*bs
)
4155 if (bs
->backing_hd
&& bs
->backing_hd
->encrypted
)
4156 return bs
->backing_file
;
4157 else if (bs
->encrypted
)
4158 return bs
->filename
;
4163 void bdrv_get_backing_filename(BlockDriverState
*bs
,
4164 char *filename
, int filename_size
)
4166 pstrcpy(filename
, filename_size
, bs
->backing_file
);
4169 int bdrv_write_compressed(BlockDriverState
*bs
, int64_t sector_num
,
4170 const uint8_t *buf
, int nb_sectors
)
4172 BlockDriver
*drv
= bs
->drv
;
4175 if (!drv
->bdrv_write_compressed
)
4177 if (bdrv_check_request(bs
, sector_num
, nb_sectors
))
4180 assert(QLIST_EMPTY(&bs
->dirty_bitmaps
));
4182 return drv
->bdrv_write_compressed(bs
, sector_num
, buf
, nb_sectors
);
4185 int bdrv_get_info(BlockDriverState
*bs
, BlockDriverInfo
*bdi
)
4187 BlockDriver
*drv
= bs
->drv
;
4190 if (!drv
->bdrv_get_info
)
4192 memset(bdi
, 0, sizeof(*bdi
));
4193 return drv
->bdrv_get_info(bs
, bdi
);
4196 ImageInfoSpecific
*bdrv_get_specific_info(BlockDriverState
*bs
)
4198 BlockDriver
*drv
= bs
->drv
;
4199 if (drv
&& drv
->bdrv_get_specific_info
) {
4200 return drv
->bdrv_get_specific_info(bs
);
4205 int bdrv_save_vmstate(BlockDriverState
*bs
, const uint8_t *buf
,
4206 int64_t pos
, int size
)
4209 struct iovec iov
= {
4210 .iov_base
= (void *) buf
,
4214 qemu_iovec_init_external(&qiov
, &iov
, 1);
4215 return bdrv_writev_vmstate(bs
, &qiov
, pos
);
4218 int bdrv_writev_vmstate(BlockDriverState
*bs
, QEMUIOVector
*qiov
, int64_t pos
)
4220 BlockDriver
*drv
= bs
->drv
;
4224 } else if (drv
->bdrv_save_vmstate
) {
4225 return drv
->bdrv_save_vmstate(bs
, qiov
, pos
);
4226 } else if (bs
->file
) {
4227 return bdrv_writev_vmstate(bs
->file
, qiov
, pos
);
4233 int bdrv_load_vmstate(BlockDriverState
*bs
, uint8_t *buf
,
4234 int64_t pos
, int size
)
4236 BlockDriver
*drv
= bs
->drv
;
4239 if (drv
->bdrv_load_vmstate
)
4240 return drv
->bdrv_load_vmstate(bs
, buf
, pos
, size
);
4242 return bdrv_load_vmstate(bs
->file
, buf
, pos
, size
);
4246 void bdrv_debug_event(BlockDriverState
*bs
, BlkDebugEvent event
)
4248 if (!bs
|| !bs
->drv
|| !bs
->drv
->bdrv_debug_event
) {
4252 bs
->drv
->bdrv_debug_event(bs
, event
);
4255 int bdrv_debug_breakpoint(BlockDriverState
*bs
, const char *event
,
4258 while (bs
&& bs
->drv
&& !bs
->drv
->bdrv_debug_breakpoint
) {
4262 if (bs
&& bs
->drv
&& bs
->drv
->bdrv_debug_breakpoint
) {
4263 return bs
->drv
->bdrv_debug_breakpoint(bs
, event
, tag
);
4269 int bdrv_debug_remove_breakpoint(BlockDriverState
*bs
, const char *tag
)
4271 while (bs
&& bs
->drv
&& !bs
->drv
->bdrv_debug_remove_breakpoint
) {
4275 if (bs
&& bs
->drv
&& bs
->drv
->bdrv_debug_remove_breakpoint
) {
4276 return bs
->drv
->bdrv_debug_remove_breakpoint(bs
, tag
);
4282 int bdrv_debug_resume(BlockDriverState
*bs
, const char *tag
)
4284 while (bs
&& (!bs
->drv
|| !bs
->drv
->bdrv_debug_resume
)) {
4288 if (bs
&& bs
->drv
&& bs
->drv
->bdrv_debug_resume
) {
4289 return bs
->drv
->bdrv_debug_resume(bs
, tag
);
4295 bool bdrv_debug_is_suspended(BlockDriverState
*bs
, const char *tag
)
4297 while (bs
&& bs
->drv
&& !bs
->drv
->bdrv_debug_is_suspended
) {
4301 if (bs
&& bs
->drv
&& bs
->drv
->bdrv_debug_is_suspended
) {
4302 return bs
->drv
->bdrv_debug_is_suspended(bs
, tag
);
4308 int bdrv_is_snapshot(BlockDriverState
*bs
)
4310 return !!(bs
->open_flags
& BDRV_O_SNAPSHOT
);
4313 /* backing_file can either be relative, or absolute, or a protocol. If it is
4314 * relative, it must be relative to the chain. So, passing in bs->filename
4315 * from a BDS as backing_file should not be done, as that may be relative to
4316 * the CWD rather than the chain. */
4317 BlockDriverState
*bdrv_find_backing_image(BlockDriverState
*bs
,
4318 const char *backing_file
)
4320 char *filename_full
= NULL
;
4321 char *backing_file_full
= NULL
;
4322 char *filename_tmp
= NULL
;
4323 int is_protocol
= 0;
4324 BlockDriverState
*curr_bs
= NULL
;
4325 BlockDriverState
*retval
= NULL
;
4327 if (!bs
|| !bs
->drv
|| !backing_file
) {
4331 filename_full
= g_malloc(PATH_MAX
);
4332 backing_file_full
= g_malloc(PATH_MAX
);
4333 filename_tmp
= g_malloc(PATH_MAX
);
4335 is_protocol
= path_has_protocol(backing_file
);
4337 for (curr_bs
= bs
; curr_bs
->backing_hd
; curr_bs
= curr_bs
->backing_hd
) {
4339 /* If either of the filename paths is actually a protocol, then
4340 * compare unmodified paths; otherwise make paths relative */
4341 if (is_protocol
|| path_has_protocol(curr_bs
->backing_file
)) {
4342 if (strcmp(backing_file
, curr_bs
->backing_file
) == 0) {
4343 retval
= curr_bs
->backing_hd
;
4347 /* If not an absolute filename path, make it relative to the current
4348 * image's filename path */
4349 path_combine(filename_tmp
, PATH_MAX
, curr_bs
->filename
,
4352 /* We are going to compare absolute pathnames */
4353 if (!realpath(filename_tmp
, filename_full
)) {
4357 /* We need to make sure the backing filename we are comparing against
4358 * is relative to the current image filename (or absolute) */
4359 path_combine(filename_tmp
, PATH_MAX
, curr_bs
->filename
,
4360 curr_bs
->backing_file
);
4362 if (!realpath(filename_tmp
, backing_file_full
)) {
4366 if (strcmp(backing_file_full
, filename_full
) == 0) {
4367 retval
= curr_bs
->backing_hd
;
4373 g_free(filename_full
);
4374 g_free(backing_file_full
);
4375 g_free(filename_tmp
);
4379 int bdrv_get_backing_file_depth(BlockDriverState
*bs
)
4385 if (!bs
->backing_hd
) {
4389 return 1 + bdrv_get_backing_file_depth(bs
->backing_hd
);
4392 BlockDriverState
*bdrv_find_base(BlockDriverState
*bs
)
4394 BlockDriverState
*curr_bs
= NULL
;
4402 while (curr_bs
->backing_hd
) {
4403 curr_bs
= curr_bs
->backing_hd
;
4408 /**************************************************************/
4411 BlockDriverAIOCB
*bdrv_aio_readv(BlockDriverState
*bs
, int64_t sector_num
,
4412 QEMUIOVector
*qiov
, int nb_sectors
,
4413 BlockDriverCompletionFunc
*cb
, void *opaque
)
4415 trace_bdrv_aio_readv(bs
, sector_num
, nb_sectors
, opaque
);
4417 return bdrv_co_aio_rw_vector(bs
, sector_num
, qiov
, nb_sectors
, 0,
4421 BlockDriverAIOCB
*bdrv_aio_writev(BlockDriverState
*bs
, int64_t sector_num
,
4422 QEMUIOVector
*qiov
, int nb_sectors
,
4423 BlockDriverCompletionFunc
*cb
, void *opaque
)
4425 trace_bdrv_aio_writev(bs
, sector_num
, nb_sectors
, opaque
);
4427 return bdrv_co_aio_rw_vector(bs
, sector_num
, qiov
, nb_sectors
, 0,
4431 BlockDriverAIOCB
*bdrv_aio_write_zeroes(BlockDriverState
*bs
,
4432 int64_t sector_num
, int nb_sectors
, BdrvRequestFlags flags
,
4433 BlockDriverCompletionFunc
*cb
, void *opaque
)
4435 trace_bdrv_aio_write_zeroes(bs
, sector_num
, nb_sectors
, flags
, opaque
);
4437 return bdrv_co_aio_rw_vector(bs
, sector_num
, NULL
, nb_sectors
,
4438 BDRV_REQ_ZERO_WRITE
| flags
,
4443 typedef struct MultiwriteCB
{
4448 BlockDriverCompletionFunc
*cb
;
4450 QEMUIOVector
*free_qiov
;
4454 static void multiwrite_user_cb(MultiwriteCB
*mcb
)
4458 for (i
= 0; i
< mcb
->num_callbacks
; i
++) {
4459 mcb
->callbacks
[i
].cb(mcb
->callbacks
[i
].opaque
, mcb
->error
);
4460 if (mcb
->callbacks
[i
].free_qiov
) {
4461 qemu_iovec_destroy(mcb
->callbacks
[i
].free_qiov
);
4463 g_free(mcb
->callbacks
[i
].free_qiov
);
4467 static void multiwrite_cb(void *opaque
, int ret
)
4469 MultiwriteCB
*mcb
= opaque
;
4471 trace_multiwrite_cb(mcb
, ret
);
4473 if (ret
< 0 && !mcb
->error
) {
4477 mcb
->num_requests
--;
4478 if (mcb
->num_requests
== 0) {
4479 multiwrite_user_cb(mcb
);
4484 static int multiwrite_req_compare(const void *a
, const void *b
)
4486 const BlockRequest
*req1
= a
, *req2
= b
;
4489 * Note that we can't simply subtract req2->sector from req1->sector
4490 * here as that could overflow the return value.
4492 if (req1
->sector
> req2
->sector
) {
4494 } else if (req1
->sector
< req2
->sector
) {
4502 * Takes a bunch of requests and tries to merge them. Returns the number of
4503 * requests that remain after merging.
4505 static int multiwrite_merge(BlockDriverState
*bs
, BlockRequest
*reqs
,
4506 int num_reqs
, MultiwriteCB
*mcb
)
4510 // Sort requests by start sector
4511 qsort(reqs
, num_reqs
, sizeof(*reqs
), &multiwrite_req_compare
);
4513 // Check if adjacent requests touch the same clusters. If so, combine them,
4514 // filling up gaps with zero sectors.
4516 for (i
= 1; i
< num_reqs
; i
++) {
4518 int64_t oldreq_last
= reqs
[outidx
].sector
+ reqs
[outidx
].nb_sectors
;
4520 // Handle exactly sequential writes and overlapping writes.
4521 if (reqs
[i
].sector
<= oldreq_last
) {
4525 if (reqs
[outidx
].qiov
->niov
+ reqs
[i
].qiov
->niov
+ 1 > IOV_MAX
) {
4531 QEMUIOVector
*qiov
= g_malloc0(sizeof(*qiov
));
4532 qemu_iovec_init(qiov
,
4533 reqs
[outidx
].qiov
->niov
+ reqs
[i
].qiov
->niov
+ 1);
4535 // Add the first request to the merged one. If the requests are
4536 // overlapping, drop the last sectors of the first request.
4537 size
= (reqs
[i
].sector
- reqs
[outidx
].sector
) << 9;
4538 qemu_iovec_concat(qiov
, reqs
[outidx
].qiov
, 0, size
);
4540 // We should need to add any zeros between the two requests
4541 assert (reqs
[i
].sector
<= oldreq_last
);
4543 // Add the second request
4544 qemu_iovec_concat(qiov
, reqs
[i
].qiov
, 0, reqs
[i
].qiov
->size
);
4546 reqs
[outidx
].nb_sectors
= qiov
->size
>> 9;
4547 reqs
[outidx
].qiov
= qiov
;
4549 mcb
->callbacks
[i
].free_qiov
= reqs
[outidx
].qiov
;
4552 reqs
[outidx
].sector
= reqs
[i
].sector
;
4553 reqs
[outidx
].nb_sectors
= reqs
[i
].nb_sectors
;
4554 reqs
[outidx
].qiov
= reqs
[i
].qiov
;
4562 * Submit multiple AIO write requests at once.
4564 * On success, the function returns 0 and all requests in the reqs array have
4565 * been submitted. In error case this function returns -1, and any of the
4566 * requests may or may not be submitted yet. In particular, this means that the
4567 * callback will be called for some of the requests, for others it won't. The
4568 * caller must check the error field of the BlockRequest to wait for the right
4569 * callbacks (if error != 0, no callback will be called).
4571 * The implementation may modify the contents of the reqs array, e.g. to merge
4572 * requests. However, the fields opaque and error are left unmodified as they
4573 * are used to signal failure for a single request to the caller.
4575 int bdrv_aio_multiwrite(BlockDriverState
*bs
, BlockRequest
*reqs
, int num_reqs
)
4580 /* don't submit writes if we don't have a medium */
4581 if (bs
->drv
== NULL
) {
4582 for (i
= 0; i
< num_reqs
; i
++) {
4583 reqs
[i
].error
= -ENOMEDIUM
;
4588 if (num_reqs
== 0) {
4592 // Create MultiwriteCB structure
4593 mcb
= g_malloc0(sizeof(*mcb
) + num_reqs
* sizeof(*mcb
->callbacks
));
4594 mcb
->num_requests
= 0;
4595 mcb
->num_callbacks
= num_reqs
;
4597 for (i
= 0; i
< num_reqs
; i
++) {
4598 mcb
->callbacks
[i
].cb
= reqs
[i
].cb
;
4599 mcb
->callbacks
[i
].opaque
= reqs
[i
].opaque
;
4602 // Check for mergable requests
4603 num_reqs
= multiwrite_merge(bs
, reqs
, num_reqs
, mcb
);
4605 trace_bdrv_aio_multiwrite(mcb
, mcb
->num_callbacks
, num_reqs
);
4607 /* Run the aio requests. */
4608 mcb
->num_requests
= num_reqs
;
4609 for (i
= 0; i
< num_reqs
; i
++) {
4610 bdrv_co_aio_rw_vector(bs
, reqs
[i
].sector
, reqs
[i
].qiov
,
4611 reqs
[i
].nb_sectors
, reqs
[i
].flags
,
4619 void bdrv_aio_cancel(BlockDriverAIOCB
*acb
)
4621 acb
->aiocb_info
->cancel(acb
);
4624 /**************************************************************/
4625 /* async block device emulation */
4627 typedef struct BlockDriverAIOCBSync
{
4628 BlockDriverAIOCB common
;
4631 /* vector translation state */
4635 } BlockDriverAIOCBSync
;
4637 static void bdrv_aio_cancel_em(BlockDriverAIOCB
*blockacb
)
4639 BlockDriverAIOCBSync
*acb
=
4640 container_of(blockacb
, BlockDriverAIOCBSync
, common
);
4641 qemu_bh_delete(acb
->bh
);
4643 qemu_aio_release(acb
);
4646 static const AIOCBInfo bdrv_em_aiocb_info
= {
4647 .aiocb_size
= sizeof(BlockDriverAIOCBSync
),
4648 .cancel
= bdrv_aio_cancel_em
,
4651 static void bdrv_aio_bh_cb(void *opaque
)
4653 BlockDriverAIOCBSync
*acb
= opaque
;
4656 qemu_iovec_from_buf(acb
->qiov
, 0, acb
->bounce
, acb
->qiov
->size
);
4657 qemu_vfree(acb
->bounce
);
4658 acb
->common
.cb(acb
->common
.opaque
, acb
->ret
);
4659 qemu_bh_delete(acb
->bh
);
4661 qemu_aio_release(acb
);
4664 static BlockDriverAIOCB
*bdrv_aio_rw_vector(BlockDriverState
*bs
,
4668 BlockDriverCompletionFunc
*cb
,
4673 BlockDriverAIOCBSync
*acb
;
4675 acb
= qemu_aio_get(&bdrv_em_aiocb_info
, bs
, cb
, opaque
);
4676 acb
->is_write
= is_write
;
4678 acb
->bounce
= qemu_blockalign(bs
, qiov
->size
);
4679 acb
->bh
= aio_bh_new(bdrv_get_aio_context(bs
), bdrv_aio_bh_cb
, acb
);
4682 qemu_iovec_to_buf(acb
->qiov
, 0, acb
->bounce
, qiov
->size
);
4683 acb
->ret
= bs
->drv
->bdrv_write(bs
, sector_num
, acb
->bounce
, nb_sectors
);
4685 acb
->ret
= bs
->drv
->bdrv_read(bs
, sector_num
, acb
->bounce
, nb_sectors
);
4688 qemu_bh_schedule(acb
->bh
);
4690 return &acb
->common
;
4693 static BlockDriverAIOCB
*bdrv_aio_readv_em(BlockDriverState
*bs
,
4694 int64_t sector_num
, QEMUIOVector
*qiov
, int nb_sectors
,
4695 BlockDriverCompletionFunc
*cb
, void *opaque
)
4697 return bdrv_aio_rw_vector(bs
, sector_num
, qiov
, nb_sectors
, cb
, opaque
, 0);
4700 static BlockDriverAIOCB
*bdrv_aio_writev_em(BlockDriverState
*bs
,
4701 int64_t sector_num
, QEMUIOVector
*qiov
, int nb_sectors
,
4702 BlockDriverCompletionFunc
*cb
, void *opaque
)
4704 return bdrv_aio_rw_vector(bs
, sector_num
, qiov
, nb_sectors
, cb
, opaque
, 1);
4708 typedef struct BlockDriverAIOCBCoroutine
{
4709 BlockDriverAIOCB common
;
4714 } BlockDriverAIOCBCoroutine
;
4716 static void bdrv_aio_co_cancel_em(BlockDriverAIOCB
*blockacb
)
4718 AioContext
*aio_context
= bdrv_get_aio_context(blockacb
->bs
);
4719 BlockDriverAIOCBCoroutine
*acb
=
4720 container_of(blockacb
, BlockDriverAIOCBCoroutine
, common
);
4725 aio_poll(aio_context
, true);
4729 static const AIOCBInfo bdrv_em_co_aiocb_info
= {
4730 .aiocb_size
= sizeof(BlockDriverAIOCBCoroutine
),
4731 .cancel
= bdrv_aio_co_cancel_em
,
4734 static void bdrv_co_em_bh(void *opaque
)
4736 BlockDriverAIOCBCoroutine
*acb
= opaque
;
4738 acb
->common
.cb(acb
->common
.opaque
, acb
->req
.error
);
4744 qemu_bh_delete(acb
->bh
);
4745 qemu_aio_release(acb
);
4748 /* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
4749 static void coroutine_fn
bdrv_co_do_rw(void *opaque
)
4751 BlockDriverAIOCBCoroutine
*acb
= opaque
;
4752 BlockDriverState
*bs
= acb
->common
.bs
;
4754 if (!acb
->is_write
) {
4755 acb
->req
.error
= bdrv_co_do_readv(bs
, acb
->req
.sector
,
4756 acb
->req
.nb_sectors
, acb
->req
.qiov
, acb
->req
.flags
);
4758 acb
->req
.error
= bdrv_co_do_writev(bs
, acb
->req
.sector
,
4759 acb
->req
.nb_sectors
, acb
->req
.qiov
, acb
->req
.flags
);
4762 acb
->bh
= aio_bh_new(bdrv_get_aio_context(bs
), bdrv_co_em_bh
, acb
);
4763 qemu_bh_schedule(acb
->bh
);
4766 static BlockDriverAIOCB
*bdrv_co_aio_rw_vector(BlockDriverState
*bs
,
4770 BdrvRequestFlags flags
,
4771 BlockDriverCompletionFunc
*cb
,
4776 BlockDriverAIOCBCoroutine
*acb
;
4778 acb
= qemu_aio_get(&bdrv_em_co_aiocb_info
, bs
, cb
, opaque
);
4779 acb
->req
.sector
= sector_num
;
4780 acb
->req
.nb_sectors
= nb_sectors
;
4781 acb
->req
.qiov
= qiov
;
4782 acb
->req
.flags
= flags
;
4783 acb
->is_write
= is_write
;
4786 co
= qemu_coroutine_create(bdrv_co_do_rw
);
4787 qemu_coroutine_enter(co
, acb
);
4789 return &acb
->common
;
4792 static void coroutine_fn
bdrv_aio_flush_co_entry(void *opaque
)
4794 BlockDriverAIOCBCoroutine
*acb
= opaque
;
4795 BlockDriverState
*bs
= acb
->common
.bs
;
4797 acb
->req
.error
= bdrv_co_flush(bs
);
4798 acb
->bh
= aio_bh_new(bdrv_get_aio_context(bs
), bdrv_co_em_bh
, acb
);
4799 qemu_bh_schedule(acb
->bh
);
4802 BlockDriverAIOCB
*bdrv_aio_flush(BlockDriverState
*bs
,
4803 BlockDriverCompletionFunc
*cb
, void *opaque
)
4805 trace_bdrv_aio_flush(bs
, opaque
);
4808 BlockDriverAIOCBCoroutine
*acb
;
4810 acb
= qemu_aio_get(&bdrv_em_co_aiocb_info
, bs
, cb
, opaque
);
4813 co
= qemu_coroutine_create(bdrv_aio_flush_co_entry
);
4814 qemu_coroutine_enter(co
, acb
);
4816 return &acb
->common
;
4819 static void coroutine_fn
bdrv_aio_discard_co_entry(void *opaque
)
4821 BlockDriverAIOCBCoroutine
*acb
= opaque
;
4822 BlockDriverState
*bs
= acb
->common
.bs
;
4824 acb
->req
.error
= bdrv_co_discard(bs
, acb
->req
.sector
, acb
->req
.nb_sectors
);
4825 acb
->bh
= aio_bh_new(bdrv_get_aio_context(bs
), bdrv_co_em_bh
, acb
);
4826 qemu_bh_schedule(acb
->bh
);
4829 BlockDriverAIOCB
*bdrv_aio_discard(BlockDriverState
*bs
,
4830 int64_t sector_num
, int nb_sectors
,
4831 BlockDriverCompletionFunc
*cb
, void *opaque
)
4834 BlockDriverAIOCBCoroutine
*acb
;
4836 trace_bdrv_aio_discard(bs
, sector_num
, nb_sectors
, opaque
);
4838 acb
= qemu_aio_get(&bdrv_em_co_aiocb_info
, bs
, cb
, opaque
);
4839 acb
->req
.sector
= sector_num
;
4840 acb
->req
.nb_sectors
= nb_sectors
;
4842 co
= qemu_coroutine_create(bdrv_aio_discard_co_entry
);
4843 qemu_coroutine_enter(co
, acb
);
4845 return &acb
->common
;
4848 void bdrv_init(void)
4850 module_call_init(MODULE_INIT_BLOCK
);
4853 void bdrv_init_with_whitelist(void)
4855 use_bdrv_whitelist
= 1;
4859 void *qemu_aio_get(const AIOCBInfo
*aiocb_info
, BlockDriverState
*bs
,
4860 BlockDriverCompletionFunc
*cb
, void *opaque
)
4862 BlockDriverAIOCB
*acb
;
4864 acb
= g_slice_alloc(aiocb_info
->aiocb_size
);
4865 acb
->aiocb_info
= aiocb_info
;
4868 acb
->opaque
= opaque
;
4872 void qemu_aio_release(void *p
)
4874 BlockDriverAIOCB
*acb
= p
;
4875 g_slice_free1(acb
->aiocb_info
->aiocb_size
, acb
);
4878 /**************************************************************/
4879 /* Coroutine block device emulation */
4881 typedef struct CoroutineIOCompletion
{
4882 Coroutine
*coroutine
;
4884 } CoroutineIOCompletion
;
4886 static void bdrv_co_io_em_complete(void *opaque
, int ret
)
4888 CoroutineIOCompletion
*co
= opaque
;
4891 qemu_coroutine_enter(co
->coroutine
, NULL
);
4894 static int coroutine_fn
bdrv_co_io_em(BlockDriverState
*bs
, int64_t sector_num
,
4895 int nb_sectors
, QEMUIOVector
*iov
,
4898 CoroutineIOCompletion co
= {
4899 .coroutine
= qemu_coroutine_self(),
4901 BlockDriverAIOCB
*acb
;
4904 acb
= bs
->drv
->bdrv_aio_writev(bs
, sector_num
, iov
, nb_sectors
,
4905 bdrv_co_io_em_complete
, &co
);
4907 acb
= bs
->drv
->bdrv_aio_readv(bs
, sector_num
, iov
, nb_sectors
,
4908 bdrv_co_io_em_complete
, &co
);
4911 trace_bdrv_co_io_em(bs
, sector_num
, nb_sectors
, is_write
, acb
);
4915 qemu_coroutine_yield();
4920 static int coroutine_fn
bdrv_co_readv_em(BlockDriverState
*bs
,
4921 int64_t sector_num
, int nb_sectors
,
4924 return bdrv_co_io_em(bs
, sector_num
, nb_sectors
, iov
, false);
4927 static int coroutine_fn
bdrv_co_writev_em(BlockDriverState
*bs
,
4928 int64_t sector_num
, int nb_sectors
,
4931 return bdrv_co_io_em(bs
, sector_num
, nb_sectors
, iov
, true);
4934 static void coroutine_fn
bdrv_flush_co_entry(void *opaque
)
4936 RwCo
*rwco
= opaque
;
4938 rwco
->ret
= bdrv_co_flush(rwco
->bs
);
4941 int coroutine_fn
bdrv_co_flush(BlockDriverState
*bs
)
4945 if (!bs
|| !bdrv_is_inserted(bs
) || bdrv_is_read_only(bs
)) {
4949 /* Write back cached data to the OS even with cache=unsafe */
4950 BLKDBG_EVENT(bs
->file
, BLKDBG_FLUSH_TO_OS
);
4951 if (bs
->drv
->bdrv_co_flush_to_os
) {
4952 ret
= bs
->drv
->bdrv_co_flush_to_os(bs
);
4958 /* But don't actually force it to the disk with cache=unsafe */
4959 if (bs
->open_flags
& BDRV_O_NO_FLUSH
) {
4963 BLKDBG_EVENT(bs
->file
, BLKDBG_FLUSH_TO_DISK
);
4964 if (bs
->drv
->bdrv_co_flush_to_disk
) {
4965 ret
= bs
->drv
->bdrv_co_flush_to_disk(bs
);
4966 } else if (bs
->drv
->bdrv_aio_flush
) {
4967 BlockDriverAIOCB
*acb
;
4968 CoroutineIOCompletion co
= {
4969 .coroutine
= qemu_coroutine_self(),
4972 acb
= bs
->drv
->bdrv_aio_flush(bs
, bdrv_co_io_em_complete
, &co
);
4976 qemu_coroutine_yield();
4981 * Some block drivers always operate in either writethrough or unsafe
4982 * mode and don't support bdrv_flush therefore. Usually qemu doesn't
4983 * know how the server works (because the behaviour is hardcoded or
4984 * depends on server-side configuration), so we can't ensure that
4985 * everything is safe on disk. Returning an error doesn't work because
4986 * that would break guests even if the server operates in writethrough
4989 * Let's hope the user knows what he's doing.
4997 /* Now flush the underlying protocol. It will also have BDRV_O_NO_FLUSH
4998 * in the case of cache=unsafe, so there are no useless flushes.
5001 return bdrv_co_flush(bs
->file
);
5004 void bdrv_invalidate_cache(BlockDriverState
*bs
, Error
**errp
)
5006 Error
*local_err
= NULL
;
5013 if (bs
->drv
->bdrv_invalidate_cache
) {
5014 bs
->drv
->bdrv_invalidate_cache(bs
, &local_err
);
5015 } else if (bs
->file
) {
5016 bdrv_invalidate_cache(bs
->file
, &local_err
);
5019 error_propagate(errp
, local_err
);
5023 ret
= refresh_total_sectors(bs
, bs
->total_sectors
);
5025 error_setg_errno(errp
, -ret
, "Could not refresh total sector count");
5030 void bdrv_invalidate_cache_all(Error
**errp
)
5032 BlockDriverState
*bs
;
5033 Error
*local_err
= NULL
;
5035 QTAILQ_FOREACH(bs
, &bdrv_states
, device_list
) {
5036 AioContext
*aio_context
= bdrv_get_aio_context(bs
);
5038 aio_context_acquire(aio_context
);
5039 bdrv_invalidate_cache(bs
, &local_err
);
5040 aio_context_release(aio_context
);
5042 error_propagate(errp
, local_err
);
5048 void bdrv_clear_incoming_migration_all(void)
5050 BlockDriverState
*bs
;
5052 QTAILQ_FOREACH(bs
, &bdrv_states
, device_list
) {
5053 AioContext
*aio_context
= bdrv_get_aio_context(bs
);
5055 aio_context_acquire(aio_context
);
5056 bs
->open_flags
= bs
->open_flags
& ~(BDRV_O_INCOMING
);
5057 aio_context_release(aio_context
);
5061 int bdrv_flush(BlockDriverState
*bs
)
5069 if (qemu_in_coroutine()) {
5070 /* Fast-path if already in coroutine context */
5071 bdrv_flush_co_entry(&rwco
);
5073 AioContext
*aio_context
= bdrv_get_aio_context(bs
);
5075 co
= qemu_coroutine_create(bdrv_flush_co_entry
);
5076 qemu_coroutine_enter(co
, &rwco
);
5077 while (rwco
.ret
== NOT_DONE
) {
5078 aio_poll(aio_context
, true);
5085 typedef struct DiscardCo
{
5086 BlockDriverState
*bs
;
5091 static void coroutine_fn
bdrv_discard_co_entry(void *opaque
)
5093 DiscardCo
*rwco
= opaque
;
5095 rwco
->ret
= bdrv_co_discard(rwco
->bs
, rwco
->sector_num
, rwco
->nb_sectors
);
5098 /* if no limit is specified in the BlockLimits use a default
5099 * of 32768 512-byte sectors (16 MiB) per request.
5101 #define MAX_DISCARD_DEFAULT 32768
5103 int coroutine_fn
bdrv_co_discard(BlockDriverState
*bs
, int64_t sector_num
,
5110 } else if (bdrv_check_request(bs
, sector_num
, nb_sectors
)) {
5112 } else if (bs
->read_only
) {
5116 bdrv_reset_dirty(bs
, sector_num
, nb_sectors
);
5118 /* Do nothing if disabled. */
5119 if (!(bs
->open_flags
& BDRV_O_UNMAP
)) {
5123 if (!bs
->drv
->bdrv_co_discard
&& !bs
->drv
->bdrv_aio_discard
) {
5127 max_discard
= bs
->bl
.max_discard
? bs
->bl
.max_discard
: MAX_DISCARD_DEFAULT
;
5128 while (nb_sectors
> 0) {
5130 int num
= nb_sectors
;
5133 if (bs
->bl
.discard_alignment
&&
5134 num
>= bs
->bl
.discard_alignment
&&
5135 sector_num
% bs
->bl
.discard_alignment
) {
5136 if (num
> bs
->bl
.discard_alignment
) {
5137 num
= bs
->bl
.discard_alignment
;
5139 num
-= sector_num
% bs
->bl
.discard_alignment
;
5142 /* limit request size */
5143 if (num
> max_discard
) {
5147 if (bs
->drv
->bdrv_co_discard
) {
5148 ret
= bs
->drv
->bdrv_co_discard(bs
, sector_num
, num
);
5150 BlockDriverAIOCB
*acb
;
5151 CoroutineIOCompletion co
= {
5152 .coroutine
= qemu_coroutine_self(),
5155 acb
= bs
->drv
->bdrv_aio_discard(bs
, sector_num
, nb_sectors
,
5156 bdrv_co_io_em_complete
, &co
);
5160 qemu_coroutine_yield();
5164 if (ret
&& ret
!= -ENOTSUP
) {
5174 int bdrv_discard(BlockDriverState
*bs
, int64_t sector_num
, int nb_sectors
)
5179 .sector_num
= sector_num
,
5180 .nb_sectors
= nb_sectors
,
5184 if (qemu_in_coroutine()) {
5185 /* Fast-path if already in coroutine context */
5186 bdrv_discard_co_entry(&rwco
);
5188 AioContext
*aio_context
= bdrv_get_aio_context(bs
);
5190 co
= qemu_coroutine_create(bdrv_discard_co_entry
);
5191 qemu_coroutine_enter(co
, &rwco
);
5192 while (rwco
.ret
== NOT_DONE
) {
5193 aio_poll(aio_context
, true);
5200 /**************************************************************/
5201 /* removable device support */
5204 * Return TRUE if the media is present
5206 int bdrv_is_inserted(BlockDriverState
*bs
)
5208 BlockDriver
*drv
= bs
->drv
;
5212 if (!drv
->bdrv_is_inserted
)
5214 return drv
->bdrv_is_inserted(bs
);
5218 * Return whether the media changed since the last call to this
5219 * function, or -ENOTSUP if we don't know. Most drivers don't know.
5221 int bdrv_media_changed(BlockDriverState
*bs
)
5223 BlockDriver
*drv
= bs
->drv
;
5225 if (drv
&& drv
->bdrv_media_changed
) {
5226 return drv
->bdrv_media_changed(bs
);
5232 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
5234 void bdrv_eject(BlockDriverState
*bs
, bool eject_flag
)
5236 BlockDriver
*drv
= bs
->drv
;
5238 if (drv
&& drv
->bdrv_eject
) {
5239 drv
->bdrv_eject(bs
, eject_flag
);
5242 if (bs
->device_name
[0] != '\0') {
5243 bdrv_emit_qmp_eject_event(bs
, eject_flag
);
5248 * Lock or unlock the media (if it is locked, the user won't be able
5249 * to eject it manually).
5251 void bdrv_lock_medium(BlockDriverState
*bs
, bool locked
)
5253 BlockDriver
*drv
= bs
->drv
;
5255 trace_bdrv_lock_medium(bs
, locked
);
5257 if (drv
&& drv
->bdrv_lock_medium
) {
5258 drv
->bdrv_lock_medium(bs
, locked
);
5262 /* needed for generic scsi interface */
5264 int bdrv_ioctl(BlockDriverState
*bs
, unsigned long int req
, void *buf
)
5266 BlockDriver
*drv
= bs
->drv
;
5268 if (drv
&& drv
->bdrv_ioctl
)
5269 return drv
->bdrv_ioctl(bs
, req
, buf
);
5273 BlockDriverAIOCB
*bdrv_aio_ioctl(BlockDriverState
*bs
,
5274 unsigned long int req
, void *buf
,
5275 BlockDriverCompletionFunc
*cb
, void *opaque
)
5277 BlockDriver
*drv
= bs
->drv
;
5279 if (drv
&& drv
->bdrv_aio_ioctl
)
5280 return drv
->bdrv_aio_ioctl(bs
, req
, buf
, cb
, opaque
);
5284 void bdrv_set_guest_block_size(BlockDriverState
*bs
, int align
)
5286 bs
->guest_block_size
= align
;
5289 void *qemu_blockalign(BlockDriverState
*bs
, size_t size
)
5291 return qemu_memalign(bdrv_opt_mem_align(bs
), size
);
5295 * Check if all memory in this vector is sector aligned.
5297 bool bdrv_qiov_is_aligned(BlockDriverState
*bs
, QEMUIOVector
*qiov
)
5300 size_t alignment
= bdrv_opt_mem_align(bs
);
5302 for (i
= 0; i
< qiov
->niov
; i
++) {
5303 if ((uintptr_t) qiov
->iov
[i
].iov_base
% alignment
) {
5306 if (qiov
->iov
[i
].iov_len
% alignment
) {
5314 BdrvDirtyBitmap
*bdrv_create_dirty_bitmap(BlockDriverState
*bs
, int granularity
,
5317 int64_t bitmap_size
;
5318 BdrvDirtyBitmap
*bitmap
;
5320 assert((granularity
& (granularity
- 1)) == 0);
5322 granularity
>>= BDRV_SECTOR_BITS
;
5323 assert(granularity
);
5324 bitmap_size
= bdrv_getlength(bs
);
5325 if (bitmap_size
< 0) {
5326 error_setg_errno(errp
, -bitmap_size
, "could not get length of device");
5327 errno
= -bitmap_size
;
5330 bitmap_size
>>= BDRV_SECTOR_BITS
;
5331 bitmap
= g_malloc0(sizeof(BdrvDirtyBitmap
));
5332 bitmap
->bitmap
= hbitmap_alloc(bitmap_size
, ffs(granularity
) - 1);
5333 QLIST_INSERT_HEAD(&bs
->dirty_bitmaps
, bitmap
, list
);
5337 void bdrv_release_dirty_bitmap(BlockDriverState
*bs
, BdrvDirtyBitmap
*bitmap
)
5339 BdrvDirtyBitmap
*bm
, *next
;
5340 QLIST_FOREACH_SAFE(bm
, &bs
->dirty_bitmaps
, list
, next
) {
5342 QLIST_REMOVE(bitmap
, list
);
5343 hbitmap_free(bitmap
->bitmap
);
5350 BlockDirtyInfoList
*bdrv_query_dirty_bitmaps(BlockDriverState
*bs
)
5352 BdrvDirtyBitmap
*bm
;
5353 BlockDirtyInfoList
*list
= NULL
;
5354 BlockDirtyInfoList
**plist
= &list
;
5356 QLIST_FOREACH(bm
, &bs
->dirty_bitmaps
, list
) {
5357 BlockDirtyInfo
*info
= g_malloc0(sizeof(BlockDirtyInfo
));
5358 BlockDirtyInfoList
*entry
= g_malloc0(sizeof(BlockDirtyInfoList
));
5359 info
->count
= bdrv_get_dirty_count(bs
, bm
);
5361 ((int64_t) BDRV_SECTOR_SIZE
<< hbitmap_granularity(bm
->bitmap
));
5362 entry
->value
= info
;
5364 plist
= &entry
->next
;
5370 int bdrv_get_dirty(BlockDriverState
*bs
, BdrvDirtyBitmap
*bitmap
, int64_t sector
)
5373 return hbitmap_get(bitmap
->bitmap
, sector
);
5379 void bdrv_dirty_iter_init(BlockDriverState
*bs
,
5380 BdrvDirtyBitmap
*bitmap
, HBitmapIter
*hbi
)
5382 hbitmap_iter_init(hbi
, bitmap
->bitmap
, 0);
5385 void bdrv_set_dirty(BlockDriverState
*bs
, int64_t cur_sector
,
5388 BdrvDirtyBitmap
*bitmap
;
5389 QLIST_FOREACH(bitmap
, &bs
->dirty_bitmaps
, list
) {
5390 hbitmap_set(bitmap
->bitmap
, cur_sector
, nr_sectors
);
5394 void bdrv_reset_dirty(BlockDriverState
*bs
, int64_t cur_sector
, int nr_sectors
)
5396 BdrvDirtyBitmap
*bitmap
;
5397 QLIST_FOREACH(bitmap
, &bs
->dirty_bitmaps
, list
) {
5398 hbitmap_reset(bitmap
->bitmap
, cur_sector
, nr_sectors
);
5402 int64_t bdrv_get_dirty_count(BlockDriverState
*bs
, BdrvDirtyBitmap
*bitmap
)
5404 return hbitmap_count(bitmap
->bitmap
);
5407 /* Get a reference to bs */
5408 void bdrv_ref(BlockDriverState
*bs
)
5413 /* Release a previously grabbed reference to bs.
5414 * If after releasing, reference count is zero, the BlockDriverState is
5416 void bdrv_unref(BlockDriverState
*bs
)
5418 assert(bs
->refcnt
> 0);
5419 if (--bs
->refcnt
== 0) {
5424 struct BdrvOpBlocker
{
5426 QLIST_ENTRY(BdrvOpBlocker
) list
;
5429 bool bdrv_op_is_blocked(BlockDriverState
*bs
, BlockOpType op
, Error
**errp
)
5431 BdrvOpBlocker
*blocker
;
5432 assert((int) op
>= 0 && op
< BLOCK_OP_TYPE_MAX
);
5433 if (!QLIST_EMPTY(&bs
->op_blockers
[op
])) {
5434 blocker
= QLIST_FIRST(&bs
->op_blockers
[op
]);
5436 error_setg(errp
, "Device '%s' is busy: %s",
5437 bs
->device_name
, error_get_pretty(blocker
->reason
));
5444 void bdrv_op_block(BlockDriverState
*bs
, BlockOpType op
, Error
*reason
)
5446 BdrvOpBlocker
*blocker
;
5447 assert((int) op
>= 0 && op
< BLOCK_OP_TYPE_MAX
);
5449 blocker
= g_malloc0(sizeof(BdrvOpBlocker
));
5450 blocker
->reason
= reason
;
5451 QLIST_INSERT_HEAD(&bs
->op_blockers
[op
], blocker
, list
);
5454 void bdrv_op_unblock(BlockDriverState
*bs
, BlockOpType op
, Error
*reason
)
5456 BdrvOpBlocker
*blocker
, *next
;
5457 assert((int) op
>= 0 && op
< BLOCK_OP_TYPE_MAX
);
5458 QLIST_FOREACH_SAFE(blocker
, &bs
->op_blockers
[op
], list
, next
) {
5459 if (blocker
->reason
== reason
) {
5460 QLIST_REMOVE(blocker
, list
);
5466 void bdrv_op_block_all(BlockDriverState
*bs
, Error
*reason
)
5469 for (i
= 0; i
< BLOCK_OP_TYPE_MAX
; i
++) {
5470 bdrv_op_block(bs
, i
, reason
);
5474 void bdrv_op_unblock_all(BlockDriverState
*bs
, Error
*reason
)
5477 for (i
= 0; i
< BLOCK_OP_TYPE_MAX
; i
++) {
5478 bdrv_op_unblock(bs
, i
, reason
);
5482 bool bdrv_op_blocker_is_empty(BlockDriverState
*bs
)
5486 for (i
= 0; i
< BLOCK_OP_TYPE_MAX
; i
++) {
5487 if (!QLIST_EMPTY(&bs
->op_blockers
[i
])) {
5494 void bdrv_iostatus_enable(BlockDriverState
*bs
)
5496 bs
->iostatus_enabled
= true;
5497 bs
->iostatus
= BLOCK_DEVICE_IO_STATUS_OK
;
5500 /* The I/O status is only enabled if the drive explicitly
5501 * enables it _and_ the VM is configured to stop on errors */
5502 bool bdrv_iostatus_is_enabled(const BlockDriverState
*bs
)
5504 return (bs
->iostatus_enabled
&&
5505 (bs
->on_write_error
== BLOCKDEV_ON_ERROR_ENOSPC
||
5506 bs
->on_write_error
== BLOCKDEV_ON_ERROR_STOP
||
5507 bs
->on_read_error
== BLOCKDEV_ON_ERROR_STOP
));
5510 void bdrv_iostatus_disable(BlockDriverState
*bs
)
5512 bs
->iostatus_enabled
= false;
5515 void bdrv_iostatus_reset(BlockDriverState
*bs
)
5517 if (bdrv_iostatus_is_enabled(bs
)) {
5518 bs
->iostatus
= BLOCK_DEVICE_IO_STATUS_OK
;
5520 block_job_iostatus_reset(bs
->job
);
5525 void bdrv_iostatus_set_err(BlockDriverState
*bs
, int error
)
5527 assert(bdrv_iostatus_is_enabled(bs
));
5528 if (bs
->iostatus
== BLOCK_DEVICE_IO_STATUS_OK
) {
5529 bs
->iostatus
= error
== ENOSPC
? BLOCK_DEVICE_IO_STATUS_NOSPACE
:
5530 BLOCK_DEVICE_IO_STATUS_FAILED
;
5535 bdrv_acct_start(BlockDriverState
*bs
, BlockAcctCookie
*cookie
, int64_t bytes
,
5536 enum BlockAcctType type
)
5538 assert(type
< BDRV_MAX_IOTYPE
);
5540 cookie
->bytes
= bytes
;
5541 cookie
->start_time_ns
= get_clock();
5542 cookie
->type
= type
;
5546 bdrv_acct_done(BlockDriverState
*bs
, BlockAcctCookie
*cookie
)
5548 assert(cookie
->type
< BDRV_MAX_IOTYPE
);
5550 bs
->nr_bytes
[cookie
->type
] += cookie
->bytes
;
5551 bs
->nr_ops
[cookie
->type
]++;
5552 bs
->total_time_ns
[cookie
->type
] += get_clock() - cookie
->start_time_ns
;
5555 void bdrv_img_create(const char *filename
, const char *fmt
,
5556 const char *base_filename
, const char *base_fmt
,
5557 char *options
, uint64_t img_size
, int flags
,
5558 Error
**errp
, bool quiet
)
5560 QemuOptsList
*create_opts
= NULL
;
5561 QemuOpts
*opts
= NULL
;
5562 const char *backing_fmt
, *backing_file
;
5564 BlockDriver
*drv
, *proto_drv
;
5565 BlockDriver
*backing_drv
= NULL
;
5566 Error
*local_err
= NULL
;
5569 /* Find driver and parse its options */
5570 drv
= bdrv_find_format(fmt
);
5572 error_setg(errp
, "Unknown file format '%s'", fmt
);
5576 proto_drv
= bdrv_find_protocol(filename
, true);
5578 error_setg(errp
, "Unknown protocol '%s'", filename
);
5582 create_opts
= qemu_opts_append(create_opts
, drv
->create_opts
,
5583 drv
->create_options
);
5584 create_opts
= qemu_opts_append(create_opts
, proto_drv
->create_opts
,
5585 proto_drv
->create_options
);
5587 /* Create parameter list with default values */
5588 opts
= qemu_opts_create(create_opts
, NULL
, 0, &error_abort
);
5589 qemu_opt_set_number(opts
, BLOCK_OPT_SIZE
, img_size
);
5591 /* Parse -o options */
5593 if (qemu_opts_do_parse(opts
, options
, NULL
) != 0) {
5594 error_setg(errp
, "Invalid options for file format '%s'", fmt
);
5599 if (base_filename
) {
5600 if (qemu_opt_set(opts
, BLOCK_OPT_BACKING_FILE
, base_filename
)) {
5601 error_setg(errp
, "Backing file not supported for file format '%s'",
5608 if (qemu_opt_set(opts
, BLOCK_OPT_BACKING_FMT
, base_fmt
)) {
5609 error_setg(errp
, "Backing file format not supported for file "
5610 "format '%s'", fmt
);
5615 backing_file
= qemu_opt_get(opts
, BLOCK_OPT_BACKING_FILE
);
5617 if (!strcmp(filename
, backing_file
)) {
5618 error_setg(errp
, "Error: Trying to create an image with the "
5619 "same filename as the backing file");
5624 backing_fmt
= qemu_opt_get(opts
, BLOCK_OPT_BACKING_FMT
);
5626 backing_drv
= bdrv_find_format(backing_fmt
);
5628 error_setg(errp
, "Unknown backing file format '%s'",
5634 // The size for the image must always be specified, with one exception:
5635 // If we are using a backing file, we can obtain the size from there
5636 size
= qemu_opt_get_size(opts
, BLOCK_OPT_SIZE
, 0);
5639 BlockDriverState
*bs
;
5644 /* backing files always opened read-only */
5646 flags
& ~(BDRV_O_RDWR
| BDRV_O_SNAPSHOT
| BDRV_O_NO_BACKING
);
5649 ret
= bdrv_open(&bs
, backing_file
, NULL
, NULL
, back_flags
,
5650 backing_drv
, &local_err
);
5652 error_setg_errno(errp
, -ret
, "Could not open '%s': %s",
5654 error_get_pretty(local_err
));
5655 error_free(local_err
);
5659 bdrv_get_geometry(bs
, &size
);
5662 snprintf(buf
, sizeof(buf
), "%" PRId64
, size
);
5663 qemu_opt_set_number(opts
, BLOCK_OPT_SIZE
, size
);
5667 error_setg(errp
, "Image creation needs a size parameter");
5673 printf("Formatting '%s', fmt=%s ", filename
, fmt
);
5674 qemu_opts_print(opts
);
5678 ret
= bdrv_create(drv
, filename
, NULL
, opts
, &local_err
);
5680 if (ret
== -EFBIG
) {
5681 /* This is generally a better message than whatever the driver would
5682 * deliver (especially because of the cluster_size_hint), since that
5683 * is most probably not much different from "image too large". */
5684 const char *cluster_size_hint
= "";
5685 if (qemu_opt_get_size(opts
, BLOCK_OPT_CLUSTER_SIZE
, 0)) {
5686 cluster_size_hint
= " (try using a larger cluster size)";
5688 error_setg(errp
, "The image size is too large for file format '%s'"
5689 "%s", fmt
, cluster_size_hint
);
5690 error_free(local_err
);
5695 qemu_opts_del(opts
);
5696 qemu_opts_free(create_opts
);
5698 error_propagate(errp
, local_err
);
5702 AioContext
*bdrv_get_aio_context(BlockDriverState
*bs
)
5704 return bs
->aio_context
;
5707 void bdrv_detach_aio_context(BlockDriverState
*bs
)
5713 if (bs
->io_limits_enabled
) {
5714 throttle_detach_aio_context(&bs
->throttle_state
);
5716 if (bs
->drv
->bdrv_detach_aio_context
) {
5717 bs
->drv
->bdrv_detach_aio_context(bs
);
5720 bdrv_detach_aio_context(bs
->file
);
5722 if (bs
->backing_hd
) {
5723 bdrv_detach_aio_context(bs
->backing_hd
);
5726 bs
->aio_context
= NULL
;
5729 void bdrv_attach_aio_context(BlockDriverState
*bs
,
5730 AioContext
*new_context
)
5736 bs
->aio_context
= new_context
;
5738 if (bs
->backing_hd
) {
5739 bdrv_attach_aio_context(bs
->backing_hd
, new_context
);
5742 bdrv_attach_aio_context(bs
->file
, new_context
);
5744 if (bs
->drv
->bdrv_attach_aio_context
) {
5745 bs
->drv
->bdrv_attach_aio_context(bs
, new_context
);
5747 if (bs
->io_limits_enabled
) {
5748 throttle_attach_aio_context(&bs
->throttle_state
, new_context
);
5752 void bdrv_set_aio_context(BlockDriverState
*bs
, AioContext
*new_context
)
5754 bdrv_drain_all(); /* ensure there are no in-flight requests */
5756 bdrv_detach_aio_context(bs
);
5758 /* This function executes in the old AioContext so acquire the new one in
5759 * case it runs in a different thread.
5761 aio_context_acquire(new_context
);
5762 bdrv_attach_aio_context(bs
, new_context
);
5763 aio_context_release(new_context
);
5766 void bdrv_add_before_write_notifier(BlockDriverState
*bs
,
5767 NotifierWithReturn
*notifier
)
5769 notifier_with_return_list_add(&bs
->before_write_notifiers
, notifier
);
5772 int bdrv_amend_options(BlockDriverState
*bs
, QEMUOptionParameter
*options
,
5776 assert(!(options
&& opts
));
5778 if (!bs
->drv
->bdrv_amend_options
&& !bs
->drv
->bdrv_amend_options2
) {
5781 if (bs
->drv
->bdrv_amend_options2
) {
5782 QemuOptsList
*opts_list
= NULL
;
5784 opts_list
= params_to_opts(options
);
5785 opts
= qemu_opts_create(opts_list
, NULL
, 0, &error_abort
);
5787 ret
= bs
->drv
->bdrv_amend_options2(bs
, opts
);
5789 qemu_opts_del(opts
);
5790 qemu_opts_free(opts_list
);
5794 options
= opts_to_params(opts
);
5796 ret
= bs
->drv
->bdrv_amend_options(bs
, options
);
5798 free_option_parameters(options
);
5804 /* This function will be called by the bdrv_recurse_is_first_non_filter method
5805 * of block filter and by bdrv_is_first_non_filter.
5806 * It is used to test if the given bs is the candidate or recurse more in the
5809 bool bdrv_recurse_is_first_non_filter(BlockDriverState
*bs
,
5810 BlockDriverState
*candidate
)
5812 /* return false if basic checks fails */
5813 if (!bs
|| !bs
->drv
) {
5817 /* the code reached a non block filter driver -> check if the bs is
5818 * the same as the candidate. It's the recursion termination condition.
5820 if (!bs
->drv
->is_filter
) {
5821 return bs
== candidate
;
5823 /* Down this path the driver is a block filter driver */
5825 /* If the block filter recursion method is defined use it to recurse down
5828 if (bs
->drv
->bdrv_recurse_is_first_non_filter
) {
5829 return bs
->drv
->bdrv_recurse_is_first_non_filter(bs
, candidate
);
5832 /* the driver is a block filter but don't allow to recurse -> return false
5837 /* This function checks if the candidate is the first non filter bs down it's
5838 * bs chain. Since we don't have pointers to parents it explore all bs chains
5839 * from the top. Some filters can choose not to pass down the recursion.
5841 bool bdrv_is_first_non_filter(BlockDriverState
*candidate
)
5843 BlockDriverState
*bs
;
5845 /* walk down the bs forest recursively */
5846 QTAILQ_FOREACH(bs
, &bdrv_states
, device_list
) {
5849 /* try to recurse in this top level bs */
5850 perm
= bdrv_recurse_is_first_non_filter(bs
, candidate
);
5852 /* candidate is the first non filter */