2 * QEMU System Emulator block driver
4 * Copyright (c) 2003 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 #include "config-host.h"
25 #include "qemu-common.h"
27 #include "block/block_int.h"
28 #include "block/blockjob.h"
29 #include "qemu/module.h"
30 #include "qapi/qmp/qjson.h"
31 #include "sysemu/block-backend.h"
32 #include "sysemu/sysemu.h"
33 #include "qemu/notify.h"
34 #include "block/coroutine.h"
35 #include "block/qapi.h"
36 #include "qmp-commands.h"
37 #include "qemu/timer.h"
38 #include "qapi-event.h"
41 #include <sys/types.h>
43 #include <sys/ioctl.h>
44 #include <sys/queue.h>
54 struct BdrvDirtyBitmap
{
56 QLIST_ENTRY(BdrvDirtyBitmap
) list
;
59 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
61 static BlockAIOCB
*bdrv_aio_readv_em(BlockDriverState
*bs
,
62 int64_t sector_num
, QEMUIOVector
*qiov
, int nb_sectors
,
63 BlockCompletionFunc
*cb
, void *opaque
);
64 static BlockAIOCB
*bdrv_aio_writev_em(BlockDriverState
*bs
,
65 int64_t sector_num
, QEMUIOVector
*qiov
, int nb_sectors
,
66 BlockCompletionFunc
*cb
, void *opaque
);
67 static int coroutine_fn
bdrv_co_readv_em(BlockDriverState
*bs
,
68 int64_t sector_num
, int nb_sectors
,
70 static int coroutine_fn
bdrv_co_writev_em(BlockDriverState
*bs
,
71 int64_t sector_num
, int nb_sectors
,
73 static int coroutine_fn
bdrv_co_do_preadv(BlockDriverState
*bs
,
74 int64_t offset
, unsigned int bytes
, QEMUIOVector
*qiov
,
75 BdrvRequestFlags flags
);
76 static int coroutine_fn
bdrv_co_do_pwritev(BlockDriverState
*bs
,
77 int64_t offset
, unsigned int bytes
, QEMUIOVector
*qiov
,
78 BdrvRequestFlags flags
);
79 static BlockAIOCB
*bdrv_co_aio_rw_vector(BlockDriverState
*bs
,
83 BdrvRequestFlags flags
,
84 BlockCompletionFunc
*cb
,
87 static void coroutine_fn
bdrv_co_do_rw(void *opaque
);
88 static int coroutine_fn
bdrv_co_do_write_zeroes(BlockDriverState
*bs
,
89 int64_t sector_num
, int nb_sectors
, BdrvRequestFlags flags
);
91 static QTAILQ_HEAD(, BlockDriverState
) bdrv_states
=
92 QTAILQ_HEAD_INITIALIZER(bdrv_states
);
94 static QTAILQ_HEAD(, BlockDriverState
) graph_bdrv_states
=
95 QTAILQ_HEAD_INITIALIZER(graph_bdrv_states
);
97 static QLIST_HEAD(, BlockDriver
) bdrv_drivers
=
98 QLIST_HEAD_INITIALIZER(bdrv_drivers
);
100 /* If non-zero, use only whitelisted block drivers */
101 static int use_bdrv_whitelist
;
104 static int is_windows_drive_prefix(const char *filename
)
106 return (((filename
[0] >= 'a' && filename
[0] <= 'z') ||
107 (filename
[0] >= 'A' && filename
[0] <= 'Z')) &&
111 int is_windows_drive(const char *filename
)
113 if (is_windows_drive_prefix(filename
) &&
116 if (strstart(filename
, "\\\\.\\", NULL
) ||
117 strstart(filename
, "//./", NULL
))
123 /* throttling disk I/O limits */
124 void bdrv_set_io_limits(BlockDriverState
*bs
,
129 throttle_config(&bs
->throttle_state
, cfg
);
131 for (i
= 0; i
< 2; i
++) {
132 qemu_co_enter_next(&bs
->throttled_reqs
[i
]);
136 /* this function drain all the throttled IOs */
137 static bool bdrv_start_throttled_reqs(BlockDriverState
*bs
)
139 bool drained
= false;
140 bool enabled
= bs
->io_limits_enabled
;
143 bs
->io_limits_enabled
= false;
145 for (i
= 0; i
< 2; i
++) {
146 while (qemu_co_enter_next(&bs
->throttled_reqs
[i
])) {
151 bs
->io_limits_enabled
= enabled
;
156 void bdrv_io_limits_disable(BlockDriverState
*bs
)
158 bs
->io_limits_enabled
= false;
160 bdrv_start_throttled_reqs(bs
);
162 throttle_destroy(&bs
->throttle_state
);
165 static void bdrv_throttle_read_timer_cb(void *opaque
)
167 BlockDriverState
*bs
= opaque
;
168 qemu_co_enter_next(&bs
->throttled_reqs
[0]);
171 static void bdrv_throttle_write_timer_cb(void *opaque
)
173 BlockDriverState
*bs
= opaque
;
174 qemu_co_enter_next(&bs
->throttled_reqs
[1]);
177 /* should be called before bdrv_set_io_limits if a limit is set */
178 void bdrv_io_limits_enable(BlockDriverState
*bs
)
180 assert(!bs
->io_limits_enabled
);
181 throttle_init(&bs
->throttle_state
,
182 bdrv_get_aio_context(bs
),
184 bdrv_throttle_read_timer_cb
,
185 bdrv_throttle_write_timer_cb
,
187 bs
->io_limits_enabled
= true;
190 /* This function makes an IO wait if needed
192 * @nb_sectors: the number of sectors of the IO
193 * @is_write: is the IO a write
195 static void bdrv_io_limits_intercept(BlockDriverState
*bs
,
199 /* does this io must wait */
200 bool must_wait
= throttle_schedule_timer(&bs
->throttle_state
, is_write
);
202 /* if must wait or any request of this type throttled queue the IO */
204 !qemu_co_queue_empty(&bs
->throttled_reqs
[is_write
])) {
205 qemu_co_queue_wait(&bs
->throttled_reqs
[is_write
]);
208 /* the IO will be executed, do the accounting */
209 throttle_account(&bs
->throttle_state
, is_write
, bytes
);
212 /* if the next request must wait -> do nothing */
213 if (throttle_schedule_timer(&bs
->throttle_state
, is_write
)) {
217 /* else queue next request for execution */
218 qemu_co_queue_next(&bs
->throttled_reqs
[is_write
]);
221 size_t bdrv_opt_mem_align(BlockDriverState
*bs
)
223 if (!bs
|| !bs
->drv
) {
224 /* 4k should be on the safe side */
228 return bs
->bl
.opt_mem_alignment
;
231 /* check if the path starts with "<protocol>:" */
232 static int path_has_protocol(const char *path
)
237 if (is_windows_drive(path
) ||
238 is_windows_drive_prefix(path
)) {
241 p
= path
+ strcspn(path
, ":/\\");
243 p
= path
+ strcspn(path
, ":/");
249 int path_is_absolute(const char *path
)
252 /* specific case for names like: "\\.\d:" */
253 if (is_windows_drive(path
) || is_windows_drive_prefix(path
)) {
256 return (*path
== '/' || *path
== '\\');
258 return (*path
== '/');
262 /* if filename is absolute, just copy it to dest. Otherwise, build a
263 path to it by considering it is relative to base_path. URL are
265 void path_combine(char *dest
, int dest_size
,
266 const char *base_path
,
267 const char *filename
)
274 if (path_is_absolute(filename
)) {
275 pstrcpy(dest
, dest_size
, filename
);
277 p
= strchr(base_path
, ':');
282 p1
= strrchr(base_path
, '/');
286 p2
= strrchr(base_path
, '\\');
298 if (len
> dest_size
- 1)
300 memcpy(dest
, base_path
, len
);
302 pstrcat(dest
, dest_size
, filename
);
306 void bdrv_get_full_backing_filename(BlockDriverState
*bs
, char *dest
, size_t sz
)
308 if (bs
->backing_file
[0] == '\0' || path_has_protocol(bs
->backing_file
)) {
309 pstrcpy(dest
, sz
, bs
->backing_file
);
311 path_combine(dest
, sz
, bs
->filename
, bs
->backing_file
);
315 void bdrv_register(BlockDriver
*bdrv
)
317 /* Block drivers without coroutine functions need emulation */
318 if (!bdrv
->bdrv_co_readv
) {
319 bdrv
->bdrv_co_readv
= bdrv_co_readv_em
;
320 bdrv
->bdrv_co_writev
= bdrv_co_writev_em
;
322 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
323 * the block driver lacks aio we need to emulate that too.
325 if (!bdrv
->bdrv_aio_readv
) {
326 /* add AIO emulation layer */
327 bdrv
->bdrv_aio_readv
= bdrv_aio_readv_em
;
328 bdrv
->bdrv_aio_writev
= bdrv_aio_writev_em
;
332 QLIST_INSERT_HEAD(&bdrv_drivers
, bdrv
, list
);
335 BlockDriverState
*bdrv_new_root(void)
337 BlockDriverState
*bs
= bdrv_new();
339 QTAILQ_INSERT_TAIL(&bdrv_states
, bs
, device_list
);
343 BlockDriverState
*bdrv_new(void)
345 BlockDriverState
*bs
;
348 bs
= g_new0(BlockDriverState
, 1);
349 QLIST_INIT(&bs
->dirty_bitmaps
);
350 for (i
= 0; i
< BLOCK_OP_TYPE_MAX
; i
++) {
351 QLIST_INIT(&bs
->op_blockers
[i
]);
353 bdrv_iostatus_disable(bs
);
354 notifier_list_init(&bs
->close_notifiers
);
355 notifier_with_return_list_init(&bs
->before_write_notifiers
);
356 qemu_co_queue_init(&bs
->throttled_reqs
[0]);
357 qemu_co_queue_init(&bs
->throttled_reqs
[1]);
359 bs
->aio_context
= qemu_get_aio_context();
364 void bdrv_add_close_notifier(BlockDriverState
*bs
, Notifier
*notify
)
366 notifier_list_add(&bs
->close_notifiers
, notify
);
369 BlockDriver
*bdrv_find_format(const char *format_name
)
372 QLIST_FOREACH(drv1
, &bdrv_drivers
, list
) {
373 if (!strcmp(drv1
->format_name
, format_name
)) {
380 static int bdrv_is_whitelisted(BlockDriver
*drv
, bool read_only
)
382 static const char *whitelist_rw
[] = {
383 CONFIG_BDRV_RW_WHITELIST
385 static const char *whitelist_ro
[] = {
386 CONFIG_BDRV_RO_WHITELIST
390 if (!whitelist_rw
[0] && !whitelist_ro
[0]) {
391 return 1; /* no whitelist, anything goes */
394 for (p
= whitelist_rw
; *p
; p
++) {
395 if (!strcmp(drv
->format_name
, *p
)) {
400 for (p
= whitelist_ro
; *p
; p
++) {
401 if (!strcmp(drv
->format_name
, *p
)) {
409 BlockDriver
*bdrv_find_whitelisted_format(const char *format_name
,
412 BlockDriver
*drv
= bdrv_find_format(format_name
);
413 return drv
&& bdrv_is_whitelisted(drv
, read_only
) ? drv
: NULL
;
416 typedef struct CreateCo
{
424 static void coroutine_fn
bdrv_create_co_entry(void *opaque
)
426 Error
*local_err
= NULL
;
429 CreateCo
*cco
= opaque
;
432 ret
= cco
->drv
->bdrv_create(cco
->filename
, cco
->opts
, &local_err
);
434 error_propagate(&cco
->err
, local_err
);
439 int bdrv_create(BlockDriver
*drv
, const char* filename
,
440 QemuOpts
*opts
, Error
**errp
)
447 .filename
= g_strdup(filename
),
453 if (!drv
->bdrv_create
) {
454 error_setg(errp
, "Driver '%s' does not support image creation", drv
->format_name
);
459 if (qemu_in_coroutine()) {
460 /* Fast-path if already in coroutine context */
461 bdrv_create_co_entry(&cco
);
463 co
= qemu_coroutine_create(bdrv_create_co_entry
);
464 qemu_coroutine_enter(co
, &cco
);
465 while (cco
.ret
== NOT_DONE
) {
466 aio_poll(qemu_get_aio_context(), true);
473 error_propagate(errp
, cco
.err
);
475 error_setg_errno(errp
, -ret
, "Could not create image");
480 g_free(cco
.filename
);
484 int bdrv_create_file(const char *filename
, QemuOpts
*opts
, Error
**errp
)
487 Error
*local_err
= NULL
;
490 drv
= bdrv_find_protocol(filename
, true);
492 error_setg(errp
, "Could not find protocol for file '%s'", filename
);
496 ret
= bdrv_create(drv
, filename
, opts
, &local_err
);
498 error_propagate(errp
, local_err
);
503 void bdrv_refresh_limits(BlockDriverState
*bs
, Error
**errp
)
505 BlockDriver
*drv
= bs
->drv
;
506 Error
*local_err
= NULL
;
508 memset(&bs
->bl
, 0, sizeof(bs
->bl
));
514 /* Take some limits from the children as a default */
516 bdrv_refresh_limits(bs
->file
, &local_err
);
518 error_propagate(errp
, local_err
);
521 bs
->bl
.opt_transfer_length
= bs
->file
->bl
.opt_transfer_length
;
522 bs
->bl
.max_transfer_length
= bs
->file
->bl
.max_transfer_length
;
523 bs
->bl
.opt_mem_alignment
= bs
->file
->bl
.opt_mem_alignment
;
525 bs
->bl
.opt_mem_alignment
= 512;
528 if (bs
->backing_hd
) {
529 bdrv_refresh_limits(bs
->backing_hd
, &local_err
);
531 error_propagate(errp
, local_err
);
534 bs
->bl
.opt_transfer_length
=
535 MAX(bs
->bl
.opt_transfer_length
,
536 bs
->backing_hd
->bl
.opt_transfer_length
);
537 bs
->bl
.max_transfer_length
=
538 MIN_NON_ZERO(bs
->bl
.max_transfer_length
,
539 bs
->backing_hd
->bl
.max_transfer_length
);
540 bs
->bl
.opt_mem_alignment
=
541 MAX(bs
->bl
.opt_mem_alignment
,
542 bs
->backing_hd
->bl
.opt_mem_alignment
);
545 /* Then let the driver override it */
546 if (drv
->bdrv_refresh_limits
) {
547 drv
->bdrv_refresh_limits(bs
, errp
);
552 * Create a uniquely-named empty temporary file.
553 * Return 0 upon success, otherwise a negative errno value.
555 int get_tmp_filename(char *filename
, int size
)
558 char temp_dir
[MAX_PATH
];
559 /* GetTempFileName requires that its output buffer (4th param)
560 have length MAX_PATH or greater. */
561 assert(size
>= MAX_PATH
);
562 return (GetTempPath(MAX_PATH
, temp_dir
)
563 && GetTempFileName(temp_dir
, "qem", 0, filename
)
564 ? 0 : -GetLastError());
568 tmpdir
= getenv("TMPDIR");
572 if (snprintf(filename
, size
, "%s/vl.XXXXXX", tmpdir
) >= size
) {
575 fd
= mkstemp(filename
);
579 if (close(fd
) != 0) {
588 * Detect host devices. By convention, /dev/cdrom[N] is always
589 * recognized as a host CDROM.
591 static BlockDriver
*find_hdev_driver(const char *filename
)
593 int score_max
= 0, score
;
594 BlockDriver
*drv
= NULL
, *d
;
596 QLIST_FOREACH(d
, &bdrv_drivers
, list
) {
597 if (d
->bdrv_probe_device
) {
598 score
= d
->bdrv_probe_device(filename
);
599 if (score
> score_max
) {
609 BlockDriver
*bdrv_find_protocol(const char *filename
,
610 bool allow_protocol_prefix
)
617 /* TODO Drivers without bdrv_file_open must be specified explicitly */
620 * XXX(hch): we really should not let host device detection
621 * override an explicit protocol specification, but moving this
622 * later breaks access to device names with colons in them.
623 * Thanks to the brain-dead persistent naming schemes on udev-
624 * based Linux systems those actually are quite common.
626 drv1
= find_hdev_driver(filename
);
631 if (!path_has_protocol(filename
) || !allow_protocol_prefix
) {
635 p
= strchr(filename
, ':');
638 if (len
> sizeof(protocol
) - 1)
639 len
= sizeof(protocol
) - 1;
640 memcpy(protocol
, filename
, len
);
641 protocol
[len
] = '\0';
642 QLIST_FOREACH(drv1
, &bdrv_drivers
, list
) {
643 if (drv1
->protocol_name
&&
644 !strcmp(drv1
->protocol_name
, protocol
)) {
651 static int find_image_format(BlockDriverState
*bs
, const char *filename
,
652 BlockDriver
**pdrv
, Error
**errp
)
654 int score
, score_max
;
655 BlockDriver
*drv1
, *drv
;
659 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
660 if (bs
->sg
|| !bdrv_is_inserted(bs
) || bdrv_getlength(bs
) == 0) {
665 ret
= bdrv_pread(bs
, 0, buf
, sizeof(buf
));
667 error_setg_errno(errp
, -ret
, "Could not read image for determining its "
675 QLIST_FOREACH(drv1
, &bdrv_drivers
, list
) {
676 if (drv1
->bdrv_probe
) {
677 score
= drv1
->bdrv_probe(buf
, ret
, filename
);
678 if (score
> score_max
) {
685 error_setg(errp
, "Could not determine image format: No compatible "
694 * Set the current 'total_sectors' value
695 * Return 0 on success, -errno on error.
697 static int refresh_total_sectors(BlockDriverState
*bs
, int64_t hint
)
699 BlockDriver
*drv
= bs
->drv
;
701 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
705 /* query actual device if possible, otherwise just trust the hint */
706 if (drv
->bdrv_getlength
) {
707 int64_t length
= drv
->bdrv_getlength(bs
);
711 hint
= DIV_ROUND_UP(length
, BDRV_SECTOR_SIZE
);
714 bs
->total_sectors
= hint
;
719 * Set open flags for a given discard mode
721 * Return 0 on success, -1 if the discard mode was invalid.
723 int bdrv_parse_discard_flags(const char *mode
, int *flags
)
725 *flags
&= ~BDRV_O_UNMAP
;
727 if (!strcmp(mode
, "off") || !strcmp(mode
, "ignore")) {
729 } else if (!strcmp(mode
, "on") || !strcmp(mode
, "unmap")) {
730 *flags
|= BDRV_O_UNMAP
;
739 * Set open flags for a given cache mode
741 * Return 0 on success, -1 if the cache mode was invalid.
743 int bdrv_parse_cache_flags(const char *mode
, int *flags
)
745 *flags
&= ~BDRV_O_CACHE_MASK
;
747 if (!strcmp(mode
, "off") || !strcmp(mode
, "none")) {
748 *flags
|= BDRV_O_NOCACHE
| BDRV_O_CACHE_WB
;
749 } else if (!strcmp(mode
, "directsync")) {
750 *flags
|= BDRV_O_NOCACHE
;
751 } else if (!strcmp(mode
, "writeback")) {
752 *flags
|= BDRV_O_CACHE_WB
;
753 } else if (!strcmp(mode
, "unsafe")) {
754 *flags
|= BDRV_O_CACHE_WB
;
755 *flags
|= BDRV_O_NO_FLUSH
;
756 } else if (!strcmp(mode
, "writethrough")) {
757 /* this is the default */
766 * The copy-on-read flag is actually a reference count so multiple users may
767 * use the feature without worrying about clobbering its previous state.
768 * Copy-on-read stays enabled until all users have called to disable it.
770 void bdrv_enable_copy_on_read(BlockDriverState
*bs
)
775 void bdrv_disable_copy_on_read(BlockDriverState
*bs
)
777 assert(bs
->copy_on_read
> 0);
782 * Returns the flags that a temporary snapshot should get, based on the
783 * originally requested flags (the originally requested image will have flags
784 * like a backing file)
786 static int bdrv_temp_snapshot_flags(int flags
)
788 return (flags
& ~BDRV_O_SNAPSHOT
) | BDRV_O_TEMPORARY
;
792 * Returns the flags that bs->file should get, based on the given flags for
795 static int bdrv_inherited_flags(int flags
)
797 /* Enable protocol handling, disable format probing for bs->file */
798 flags
|= BDRV_O_PROTOCOL
;
800 /* Our block drivers take care to send flushes and respect unmap policy,
801 * so we can enable both unconditionally on lower layers. */
802 flags
|= BDRV_O_CACHE_WB
| BDRV_O_UNMAP
;
804 /* Clear flags that only apply to the top layer */
805 flags
&= ~(BDRV_O_SNAPSHOT
| BDRV_O_NO_BACKING
| BDRV_O_COPY_ON_READ
);
811 * Returns the flags that bs->backing_hd should get, based on the given flags
814 static int bdrv_backing_flags(int flags
)
816 /* backing files always opened read-only */
817 flags
&= ~(BDRV_O_RDWR
| BDRV_O_COPY_ON_READ
);
819 /* snapshot=on is handled on the top layer */
820 flags
&= ~(BDRV_O_SNAPSHOT
| BDRV_O_TEMPORARY
);
825 static int bdrv_open_flags(BlockDriverState
*bs
, int flags
)
827 int open_flags
= flags
| BDRV_O_CACHE_WB
;
830 * Clear flags that are internal to the block layer before opening the
833 open_flags
&= ~(BDRV_O_SNAPSHOT
| BDRV_O_NO_BACKING
| BDRV_O_PROTOCOL
);
836 * Snapshots should be writable.
838 if (flags
& BDRV_O_TEMPORARY
) {
839 open_flags
|= BDRV_O_RDWR
;
845 static void bdrv_assign_node_name(BlockDriverState
*bs
,
846 const char *node_name
,
853 /* Check for empty string or invalid characters */
854 if (!id_wellformed(node_name
)) {
855 error_setg(errp
, "Invalid node name");
859 /* takes care of avoiding namespaces collisions */
860 if (blk_by_name(node_name
)) {
861 error_setg(errp
, "node-name=%s is conflicting with a device id",
866 /* takes care of avoiding duplicates node names */
867 if (bdrv_find_node(node_name
)) {
868 error_setg(errp
, "Duplicate node name");
872 /* copy node name into the bs and insert it into the graph list */
873 pstrcpy(bs
->node_name
, sizeof(bs
->node_name
), node_name
);
874 QTAILQ_INSERT_TAIL(&graph_bdrv_states
, bs
, node_list
);
878 * Common part for opening disk images and files
880 * Removes all processed options from *options.
882 static int bdrv_open_common(BlockDriverState
*bs
, BlockDriverState
*file
,
883 QDict
*options
, int flags
, BlockDriver
*drv
, Error
**errp
)
886 const char *filename
;
887 const char *node_name
= NULL
;
888 Error
*local_err
= NULL
;
891 assert(bs
->file
== NULL
);
892 assert(options
!= NULL
&& bs
->options
!= options
);
895 filename
= file
->filename
;
897 filename
= qdict_get_try_str(options
, "filename");
900 if (drv
->bdrv_needs_filename
&& !filename
) {
901 error_setg(errp
, "The '%s' block driver requires a file name",
906 trace_bdrv_open_common(bs
, filename
?: "", flags
, drv
->format_name
);
908 node_name
= qdict_get_try_str(options
, "node-name");
909 bdrv_assign_node_name(bs
, node_name
, &local_err
);
911 error_propagate(errp
, local_err
);
914 qdict_del(options
, "node-name");
916 /* bdrv_open() with directly using a protocol as drv. This layer is already
917 * opened, so assign it to bs (while file becomes a closed BlockDriverState)
918 * and return immediately. */
919 if (file
!= NULL
&& drv
->bdrv_file_open
) {
924 bs
->open_flags
= flags
;
925 bs
->guest_block_size
= 512;
926 bs
->request_alignment
= 512;
927 bs
->zero_beyond_eof
= true;
928 open_flags
= bdrv_open_flags(bs
, flags
);
929 bs
->read_only
= !(open_flags
& BDRV_O_RDWR
);
930 bs
->growable
= !!(flags
& BDRV_O_PROTOCOL
);
932 if (use_bdrv_whitelist
&& !bdrv_is_whitelisted(drv
, bs
->read_only
)) {
934 !bs
->read_only
&& bdrv_is_whitelisted(drv
, true)
935 ? "Driver '%s' can only be used for read-only devices"
936 : "Driver '%s' is not whitelisted",
941 assert(bs
->copy_on_read
== 0); /* bdrv_new() and bdrv_close() make it so */
942 if (flags
& BDRV_O_COPY_ON_READ
) {
943 if (!bs
->read_only
) {
944 bdrv_enable_copy_on_read(bs
);
946 error_setg(errp
, "Can't use copy-on-read on read-only device");
951 if (filename
!= NULL
) {
952 pstrcpy(bs
->filename
, sizeof(bs
->filename
), filename
);
954 bs
->filename
[0] = '\0';
956 pstrcpy(bs
->exact_filename
, sizeof(bs
->exact_filename
), bs
->filename
);
959 bs
->opaque
= g_malloc0(drv
->instance_size
);
961 bs
->enable_write_cache
= !!(flags
& BDRV_O_CACHE_WB
);
963 /* Open the image, either directly or using a protocol */
964 if (drv
->bdrv_file_open
) {
965 assert(file
== NULL
);
966 assert(!drv
->bdrv_needs_filename
|| filename
!= NULL
);
967 ret
= drv
->bdrv_file_open(bs
, options
, open_flags
, &local_err
);
970 error_setg(errp
, "Can't use '%s' as a block driver for the "
971 "protocol level", drv
->format_name
);
976 ret
= drv
->bdrv_open(bs
, options
, open_flags
, &local_err
);
981 error_propagate(errp
, local_err
);
982 } else if (bs
->filename
[0]) {
983 error_setg_errno(errp
, -ret
, "Could not open '%s'", bs
->filename
);
985 error_setg_errno(errp
, -ret
, "Could not open image");
990 ret
= refresh_total_sectors(bs
, bs
->total_sectors
);
992 error_setg_errno(errp
, -ret
, "Could not refresh total sector count");
996 bdrv_refresh_limits(bs
, &local_err
);
998 error_propagate(errp
, local_err
);
1003 assert(bdrv_opt_mem_align(bs
) != 0);
1004 assert((bs
->request_alignment
!= 0) || bs
->sg
);
1015 static QDict
*parse_json_filename(const char *filename
, Error
**errp
)
1017 QObject
*options_obj
;
1021 ret
= strstart(filename
, "json:", &filename
);
1024 options_obj
= qobject_from_json(filename
);
1026 error_setg(errp
, "Could not parse the JSON options");
1030 if (qobject_type(options_obj
) != QTYPE_QDICT
) {
1031 qobject_decref(options_obj
);
1032 error_setg(errp
, "Invalid JSON object given");
1036 options
= qobject_to_qdict(options_obj
);
1037 qdict_flatten(options
);
1043 * Fills in default options for opening images and converts the legacy
1044 * filename/flags pair to option QDict entries.
1046 static int bdrv_fill_options(QDict
**options
, const char **pfilename
, int flags
,
1047 BlockDriver
*drv
, Error
**errp
)
1049 const char *filename
= *pfilename
;
1050 const char *drvname
;
1051 bool protocol
= flags
& BDRV_O_PROTOCOL
;
1052 bool parse_filename
= false;
1053 Error
*local_err
= NULL
;
1055 /* Parse json: pseudo-protocol */
1056 if (filename
&& g_str_has_prefix(filename
, "json:")) {
1057 QDict
*json_options
= parse_json_filename(filename
, &local_err
);
1059 error_propagate(errp
, local_err
);
1063 /* Options given in the filename have lower priority than options
1064 * specified directly */
1065 qdict_join(*options
, json_options
, false);
1066 QDECREF(json_options
);
1067 *pfilename
= filename
= NULL
;
1070 /* Fetch the file name from the options QDict if necessary */
1071 if (protocol
&& filename
) {
1072 if (!qdict_haskey(*options
, "filename")) {
1073 qdict_put(*options
, "filename", qstring_from_str(filename
));
1074 parse_filename
= true;
1076 error_setg(errp
, "Can't specify 'file' and 'filename' options at "
1082 /* Find the right block driver */
1083 filename
= qdict_get_try_str(*options
, "filename");
1084 drvname
= qdict_get_try_str(*options
, "driver");
1088 error_setg(errp
, "Driver specified twice");
1091 drvname
= drv
->format_name
;
1092 qdict_put(*options
, "driver", qstring_from_str(drvname
));
1094 if (!drvname
&& protocol
) {
1096 drv
= bdrv_find_protocol(filename
, parse_filename
);
1098 error_setg(errp
, "Unknown protocol");
1102 drvname
= drv
->format_name
;
1103 qdict_put(*options
, "driver", qstring_from_str(drvname
));
1105 error_setg(errp
, "Must specify either driver or file");
1108 } else if (drvname
) {
1109 drv
= bdrv_find_format(drvname
);
1111 error_setg(errp
, "Unknown driver '%s'", drvname
);
1117 assert(drv
|| !protocol
);
1119 /* Driver-specific filename parsing */
1120 if (drv
&& drv
->bdrv_parse_filename
&& parse_filename
) {
1121 drv
->bdrv_parse_filename(filename
, *options
, &local_err
);
1123 error_propagate(errp
, local_err
);
1127 if (!drv
->bdrv_needs_filename
) {
1128 qdict_del(*options
, "filename");
1135 void bdrv_set_backing_hd(BlockDriverState
*bs
, BlockDriverState
*backing_hd
)
1138 if (bs
->backing_hd
) {
1139 assert(bs
->backing_blocker
);
1140 bdrv_op_unblock_all(bs
->backing_hd
, bs
->backing_blocker
);
1141 } else if (backing_hd
) {
1142 error_setg(&bs
->backing_blocker
,
1143 "device is used as backing hd of '%s'",
1144 bdrv_get_device_name(bs
));
1147 bs
->backing_hd
= backing_hd
;
1149 error_free(bs
->backing_blocker
);
1150 bs
->backing_blocker
= NULL
;
1153 bs
->open_flags
&= ~BDRV_O_NO_BACKING
;
1154 pstrcpy(bs
->backing_file
, sizeof(bs
->backing_file
), backing_hd
->filename
);
1155 pstrcpy(bs
->backing_format
, sizeof(bs
->backing_format
),
1156 backing_hd
->drv
? backing_hd
->drv
->format_name
: "");
1158 bdrv_op_block_all(bs
->backing_hd
, bs
->backing_blocker
);
1159 /* Otherwise we won't be able to commit due to check in bdrv_commit */
1160 bdrv_op_unblock(bs
->backing_hd
, BLOCK_OP_TYPE_COMMIT
,
1161 bs
->backing_blocker
);
1163 bdrv_refresh_limits(bs
, NULL
);
1167 * Opens the backing file for a BlockDriverState if not yet open
1169 * options is a QDict of options to pass to the block drivers, or NULL for an
1170 * empty set of options. The reference to the QDict is transferred to this
1171 * function (even on failure), so if the caller intends to reuse the dictionary,
1172 * it needs to use QINCREF() before calling bdrv_file_open.
1174 int bdrv_open_backing_file(BlockDriverState
*bs
, QDict
*options
, Error
**errp
)
1176 char *backing_filename
= g_malloc0(PATH_MAX
);
1178 BlockDriver
*back_drv
= NULL
;
1179 BlockDriverState
*backing_hd
;
1180 Error
*local_err
= NULL
;
1182 if (bs
->backing_hd
!= NULL
) {
1187 /* NULL means an empty set of options */
1188 if (options
== NULL
) {
1189 options
= qdict_new();
1192 bs
->open_flags
&= ~BDRV_O_NO_BACKING
;
1193 if (qdict_haskey(options
, "file.filename")) {
1194 backing_filename
[0] = '\0';
1195 } else if (bs
->backing_file
[0] == '\0' && qdict_size(options
) == 0) {
1199 bdrv_get_full_backing_filename(bs
, backing_filename
, PATH_MAX
);
1202 if (!bs
->drv
|| !bs
->drv
->supports_backing
) {
1204 error_setg(errp
, "Driver doesn't support backing files");
1209 backing_hd
= bdrv_new();
1211 if (bs
->backing_format
[0] != '\0') {
1212 back_drv
= bdrv_find_format(bs
->backing_format
);
1215 assert(bs
->backing_hd
== NULL
);
1216 ret
= bdrv_open(&backing_hd
,
1217 *backing_filename
? backing_filename
: NULL
, NULL
, options
,
1218 bdrv_backing_flags(bs
->open_flags
), back_drv
, &local_err
);
1220 bdrv_unref(backing_hd
);
1222 bs
->open_flags
|= BDRV_O_NO_BACKING
;
1223 error_setg(errp
, "Could not open backing file: %s",
1224 error_get_pretty(local_err
));
1225 error_free(local_err
);
1228 bdrv_set_backing_hd(bs
, backing_hd
);
1231 g_free(backing_filename
);
1236 * Opens a disk image whose options are given as BlockdevRef in another block
1239 * If allow_none is true, no image will be opened if filename is false and no
1240 * BlockdevRef is given. *pbs will remain unchanged and 0 will be returned.
1242 * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
1243 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1244 * itself, all options starting with "${bdref_key}." are considered part of the
1247 * The BlockdevRef will be removed from the options QDict.
1249 * To conform with the behavior of bdrv_open(), *pbs has to be NULL.
1251 int bdrv_open_image(BlockDriverState
**pbs
, const char *filename
,
1252 QDict
*options
, const char *bdref_key
, int flags
,
1253 bool allow_none
, Error
**errp
)
1255 QDict
*image_options
;
1257 char *bdref_key_dot
;
1258 const char *reference
;
1261 assert(*pbs
== NULL
);
1263 bdref_key_dot
= g_strdup_printf("%s.", bdref_key
);
1264 qdict_extract_subqdict(options
, &image_options
, bdref_key_dot
);
1265 g_free(bdref_key_dot
);
1267 reference
= qdict_get_try_str(options
, bdref_key
);
1268 if (!filename
&& !reference
&& !qdict_size(image_options
)) {
1272 error_setg(errp
, "A block device must be specified for \"%s\"",
1276 QDECREF(image_options
);
1280 ret
= bdrv_open(pbs
, filename
, reference
, image_options
, flags
, NULL
, errp
);
1283 qdict_del(options
, bdref_key
);
1287 int bdrv_append_temp_snapshot(BlockDriverState
*bs
, int flags
, Error
**errp
)
1289 /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
1290 char *tmp_filename
= g_malloc0(PATH_MAX
+ 1);
1292 QemuOpts
*opts
= NULL
;
1293 QDict
*snapshot_options
;
1294 BlockDriverState
*bs_snapshot
;
1298 /* if snapshot, we create a temporary backing file and open it
1299 instead of opening 'filename' directly */
1301 /* Get the required size from the image */
1302 total_size
= bdrv_getlength(bs
);
1303 if (total_size
< 0) {
1305 error_setg_errno(errp
, -total_size
, "Could not get image size");
1309 /* Create the temporary image */
1310 ret
= get_tmp_filename(tmp_filename
, PATH_MAX
+ 1);
1312 error_setg_errno(errp
, -ret
, "Could not get temporary filename");
1316 opts
= qemu_opts_create(bdrv_qcow2
.create_opts
, NULL
, 0,
1318 qemu_opt_set_number(opts
, BLOCK_OPT_SIZE
, total_size
);
1319 ret
= bdrv_create(&bdrv_qcow2
, tmp_filename
, opts
, &local_err
);
1320 qemu_opts_del(opts
);
1322 error_setg_errno(errp
, -ret
, "Could not create temporary overlay "
1323 "'%s': %s", tmp_filename
,
1324 error_get_pretty(local_err
));
1325 error_free(local_err
);
1329 /* Prepare a new options QDict for the temporary file */
1330 snapshot_options
= qdict_new();
1331 qdict_put(snapshot_options
, "file.driver",
1332 qstring_from_str("file"));
1333 qdict_put(snapshot_options
, "file.filename",
1334 qstring_from_str(tmp_filename
));
1336 bs_snapshot
= bdrv_new();
1338 ret
= bdrv_open(&bs_snapshot
, NULL
, NULL
, snapshot_options
,
1339 flags
, &bdrv_qcow2
, &local_err
);
1341 error_propagate(errp
, local_err
);
1345 bdrv_append(bs_snapshot
, bs
);
1348 g_free(tmp_filename
);
1353 * Opens a disk image (raw, qcow2, vmdk, ...)
1355 * options is a QDict of options to pass to the block drivers, or NULL for an
1356 * empty set of options. The reference to the QDict belongs to the block layer
1357 * after the call (even on failure), so if the caller intends to reuse the
1358 * dictionary, it needs to use QINCREF() before calling bdrv_open.
1360 * If *pbs is NULL, a new BDS will be created with a pointer to it stored there.
1361 * If it is not NULL, the referenced BDS will be reused.
1363 * The reference parameter may be used to specify an existing block device which
1364 * should be opened. If specified, neither options nor a filename may be given,
1365 * nor can an existing BDS be reused (that is, *pbs has to be NULL).
1367 int bdrv_open(BlockDriverState
**pbs
, const char *filename
,
1368 const char *reference
, QDict
*options
, int flags
,
1369 BlockDriver
*drv
, Error
**errp
)
1372 BlockDriverState
*file
= NULL
, *bs
;
1373 const char *drvname
;
1374 Error
*local_err
= NULL
;
1375 int snapshot_flags
= 0;
1380 bool options_non_empty
= options
? qdict_size(options
) : false;
1384 error_setg(errp
, "Cannot reuse an existing BDS when referencing "
1385 "another block device");
1389 if (filename
|| options_non_empty
) {
1390 error_setg(errp
, "Cannot reference an existing block device with "
1391 "additional options or a new filename");
1395 bs
= bdrv_lookup_bs(reference
, reference
, errp
);
1410 /* NULL means an empty set of options */
1411 if (options
== NULL
) {
1412 options
= qdict_new();
1415 ret
= bdrv_fill_options(&options
, &filename
, flags
, drv
, &local_err
);
1420 /* Find the right image format driver */
1422 drvname
= qdict_get_try_str(options
, "driver");
1424 drv
= bdrv_find_format(drvname
);
1425 qdict_del(options
, "driver");
1427 error_setg(errp
, "Unknown driver: '%s'", drvname
);
1433 assert(drvname
|| !(flags
& BDRV_O_PROTOCOL
));
1434 if (drv
&& !drv
->bdrv_file_open
) {
1435 /* If the user explicitly wants a format driver here, we'll need to add
1436 * another layer for the protocol in bs->file */
1437 flags
&= ~BDRV_O_PROTOCOL
;
1440 bs
->options
= options
;
1441 options
= qdict_clone_shallow(options
);
1443 /* Open image file without format layer */
1444 if ((flags
& BDRV_O_PROTOCOL
) == 0) {
1445 if (flags
& BDRV_O_RDWR
) {
1446 flags
|= BDRV_O_ALLOW_RDWR
;
1448 if (flags
& BDRV_O_SNAPSHOT
) {
1449 snapshot_flags
= bdrv_temp_snapshot_flags(flags
);
1450 flags
= bdrv_backing_flags(flags
);
1453 assert(file
== NULL
);
1454 ret
= bdrv_open_image(&file
, filename
, options
, "file",
1455 bdrv_inherited_flags(flags
),
1462 /* Image format probing */
1464 ret
= find_image_format(file
, filename
, &drv
, &local_err
);
1469 error_setg(errp
, "Must specify either driver or file");
1474 /* Open the image */
1475 ret
= bdrv_open_common(bs
, file
, options
, flags
, drv
, &local_err
);
1480 if (file
&& (bs
->file
!= file
)) {
1485 /* If there is a backing file, use it */
1486 if ((flags
& BDRV_O_NO_BACKING
) == 0) {
1487 QDict
*backing_options
;
1489 qdict_extract_subqdict(options
, &backing_options
, "backing.");
1490 ret
= bdrv_open_backing_file(bs
, backing_options
, &local_err
);
1492 goto close_and_fail
;
1496 bdrv_refresh_filename(bs
);
1498 /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
1499 * temporary snapshot afterwards. */
1500 if (snapshot_flags
) {
1501 ret
= bdrv_append_temp_snapshot(bs
, snapshot_flags
, &local_err
);
1503 goto close_and_fail
;
1507 /* Check if any unknown options were used */
1508 if (options
&& (qdict_size(options
) != 0)) {
1509 const QDictEntry
*entry
= qdict_first(options
);
1510 if (flags
& BDRV_O_PROTOCOL
) {
1511 error_setg(errp
, "Block protocol '%s' doesn't support the option "
1512 "'%s'", drv
->format_name
, entry
->key
);
1514 error_setg(errp
, "Block format '%s' used by device '%s' doesn't "
1515 "support the option '%s'", drv
->format_name
,
1516 bdrv_get_device_name(bs
), entry
->key
);
1520 goto close_and_fail
;
1523 if (!bdrv_key_required(bs
)) {
1525 blk_dev_change_media_cb(bs
->blk
, true);
1527 } else if (!runstate_check(RUN_STATE_PRELAUNCH
)
1528 && !runstate_check(RUN_STATE_INMIGRATE
)
1529 && !runstate_check(RUN_STATE_PAUSED
)) { /* HACK */
1531 "Guest must be stopped for opening of encrypted image");
1533 goto close_and_fail
;
1544 QDECREF(bs
->options
);
1548 /* If *pbs is NULL, a new BDS has been created in this function and
1549 needs to be freed now. Otherwise, it does not need to be closed,
1550 since it has not really been opened yet. */
1554 error_propagate(errp
, local_err
);
1559 /* See fail path, but now the BDS has to be always closed */
1567 error_propagate(errp
, local_err
);
1572 typedef struct BlockReopenQueueEntry
{
1574 BDRVReopenState state
;
1575 QSIMPLEQ_ENTRY(BlockReopenQueueEntry
) entry
;
1576 } BlockReopenQueueEntry
;
1579 * Adds a BlockDriverState to a simple queue for an atomic, transactional
1580 * reopen of multiple devices.
1582 * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
1583 * already performed, or alternatively may be NULL a new BlockReopenQueue will
1584 * be created and initialized. This newly created BlockReopenQueue should be
1585 * passed back in for subsequent calls that are intended to be of the same
1588 * bs is the BlockDriverState to add to the reopen queue.
1590 * flags contains the open flags for the associated bs
1592 * returns a pointer to bs_queue, which is either the newly allocated
1593 * bs_queue, or the existing bs_queue being used.
1596 BlockReopenQueue
*bdrv_reopen_queue(BlockReopenQueue
*bs_queue
,
1597 BlockDriverState
*bs
, int flags
)
1601 BlockReopenQueueEntry
*bs_entry
;
1602 if (bs_queue
== NULL
) {
1603 bs_queue
= g_new0(BlockReopenQueue
, 1);
1604 QSIMPLEQ_INIT(bs_queue
);
1607 /* bdrv_open() masks this flag out */
1608 flags
&= ~BDRV_O_PROTOCOL
;
1611 bdrv_reopen_queue(bs_queue
, bs
->file
, bdrv_inherited_flags(flags
));
1614 bs_entry
= g_new0(BlockReopenQueueEntry
, 1);
1615 QSIMPLEQ_INSERT_TAIL(bs_queue
, bs_entry
, entry
);
1617 bs_entry
->state
.bs
= bs
;
1618 bs_entry
->state
.flags
= flags
;
1624 * Reopen multiple BlockDriverStates atomically & transactionally.
1626 * The queue passed in (bs_queue) must have been built up previous
1627 * via bdrv_reopen_queue().
1629 * Reopens all BDS specified in the queue, with the appropriate
1630 * flags. All devices are prepared for reopen, and failure of any
1631 * device will cause all device changes to be abandonded, and intermediate
1634 * If all devices prepare successfully, then the changes are committed
1638 int bdrv_reopen_multiple(BlockReopenQueue
*bs_queue
, Error
**errp
)
1641 BlockReopenQueueEntry
*bs_entry
, *next
;
1642 Error
*local_err
= NULL
;
1644 assert(bs_queue
!= NULL
);
1648 QSIMPLEQ_FOREACH(bs_entry
, bs_queue
, entry
) {
1649 if (bdrv_reopen_prepare(&bs_entry
->state
, bs_queue
, &local_err
)) {
1650 error_propagate(errp
, local_err
);
1653 bs_entry
->prepared
= true;
1656 /* If we reach this point, we have success and just need to apply the
1659 QSIMPLEQ_FOREACH(bs_entry
, bs_queue
, entry
) {
1660 bdrv_reopen_commit(&bs_entry
->state
);
1666 QSIMPLEQ_FOREACH_SAFE(bs_entry
, bs_queue
, entry
, next
) {
1667 if (ret
&& bs_entry
->prepared
) {
1668 bdrv_reopen_abort(&bs_entry
->state
);
1677 /* Reopen a single BlockDriverState with the specified flags. */
1678 int bdrv_reopen(BlockDriverState
*bs
, int bdrv_flags
, Error
**errp
)
1681 Error
*local_err
= NULL
;
1682 BlockReopenQueue
*queue
= bdrv_reopen_queue(NULL
, bs
, bdrv_flags
);
1684 ret
= bdrv_reopen_multiple(queue
, &local_err
);
1685 if (local_err
!= NULL
) {
1686 error_propagate(errp
, local_err
);
1693 * Prepares a BlockDriverState for reopen. All changes are staged in the
1694 * 'opaque' field of the BDRVReopenState, which is used and allocated by
1695 * the block driver layer .bdrv_reopen_prepare()
1697 * bs is the BlockDriverState to reopen
1698 * flags are the new open flags
1699 * queue is the reopen queue
1701 * Returns 0 on success, non-zero on error. On error errp will be set
1704 * On failure, bdrv_reopen_abort() will be called to clean up any data.
1705 * It is the responsibility of the caller to then call the abort() or
1706 * commit() for any other BDS that have been left in a prepare() state
1709 int bdrv_reopen_prepare(BDRVReopenState
*reopen_state
, BlockReopenQueue
*queue
,
1713 Error
*local_err
= NULL
;
1716 assert(reopen_state
!= NULL
);
1717 assert(reopen_state
->bs
->drv
!= NULL
);
1718 drv
= reopen_state
->bs
->drv
;
1720 /* if we are to stay read-only, do not allow permission change
1722 if (!(reopen_state
->bs
->open_flags
& BDRV_O_ALLOW_RDWR
) &&
1723 reopen_state
->flags
& BDRV_O_RDWR
) {
1724 error_set(errp
, QERR_DEVICE_IS_READ_ONLY
,
1725 bdrv_get_device_name(reopen_state
->bs
));
1730 ret
= bdrv_flush(reopen_state
->bs
);
1732 error_set(errp
, ERROR_CLASS_GENERIC_ERROR
, "Error (%s) flushing drive",
1737 if (drv
->bdrv_reopen_prepare
) {
1738 ret
= drv
->bdrv_reopen_prepare(reopen_state
, queue
, &local_err
);
1740 if (local_err
!= NULL
) {
1741 error_propagate(errp
, local_err
);
1743 error_setg(errp
, "failed while preparing to reopen image '%s'",
1744 reopen_state
->bs
->filename
);
1749 /* It is currently mandatory to have a bdrv_reopen_prepare()
1750 * handler for each supported drv. */
1751 error_set(errp
, QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED
,
1752 drv
->format_name
, bdrv_get_device_name(reopen_state
->bs
),
1753 "reopening of file");
1765 * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
1766 * makes them final by swapping the staging BlockDriverState contents into
1767 * the active BlockDriverState contents.
1769 void bdrv_reopen_commit(BDRVReopenState
*reopen_state
)
1773 assert(reopen_state
!= NULL
);
1774 drv
= reopen_state
->bs
->drv
;
1775 assert(drv
!= NULL
);
1777 /* If there are any driver level actions to take */
1778 if (drv
->bdrv_reopen_commit
) {
1779 drv
->bdrv_reopen_commit(reopen_state
);
1782 /* set BDS specific flags now */
1783 reopen_state
->bs
->open_flags
= reopen_state
->flags
;
1784 reopen_state
->bs
->enable_write_cache
= !!(reopen_state
->flags
&
1786 reopen_state
->bs
->read_only
= !(reopen_state
->flags
& BDRV_O_RDWR
);
1788 bdrv_refresh_limits(reopen_state
->bs
, NULL
);
1792 * Abort the reopen, and delete and free the staged changes in
1795 void bdrv_reopen_abort(BDRVReopenState
*reopen_state
)
1799 assert(reopen_state
!= NULL
);
1800 drv
= reopen_state
->bs
->drv
;
1801 assert(drv
!= NULL
);
1803 if (drv
->bdrv_reopen_abort
) {
1804 drv
->bdrv_reopen_abort(reopen_state
);
1809 void bdrv_close(BlockDriverState
*bs
)
1811 BdrvAioNotifier
*ban
, *ban_next
;
1814 block_job_cancel_sync(bs
->job
);
1816 bdrv_drain_all(); /* complete I/O */
1818 bdrv_drain_all(); /* in case flush left pending I/O */
1819 notifier_list_notify(&bs
->close_notifiers
, bs
);
1822 if (bs
->backing_hd
) {
1823 BlockDriverState
*backing_hd
= bs
->backing_hd
;
1824 bdrv_set_backing_hd(bs
, NULL
);
1825 bdrv_unref(backing_hd
);
1827 bs
->drv
->bdrv_close(bs
);
1831 bs
->copy_on_read
= 0;
1832 bs
->backing_file
[0] = '\0';
1833 bs
->backing_format
[0] = '\0';
1834 bs
->total_sectors
= 0;
1839 bs
->zero_beyond_eof
= false;
1840 QDECREF(bs
->options
);
1842 QDECREF(bs
->full_open_options
);
1843 bs
->full_open_options
= NULL
;
1845 if (bs
->file
!= NULL
) {
1846 bdrv_unref(bs
->file
);
1852 blk_dev_change_media_cb(bs
->blk
, false);
1855 /*throttling disk I/O limits*/
1856 if (bs
->io_limits_enabled
) {
1857 bdrv_io_limits_disable(bs
);
1860 QLIST_FOREACH_SAFE(ban
, &bs
->aio_notifiers
, list
, ban_next
) {
1863 QLIST_INIT(&bs
->aio_notifiers
);
1866 void bdrv_close_all(void)
1868 BlockDriverState
*bs
;
1870 QTAILQ_FOREACH(bs
, &bdrv_states
, device_list
) {
1871 AioContext
*aio_context
= bdrv_get_aio_context(bs
);
1873 aio_context_acquire(aio_context
);
1875 aio_context_release(aio_context
);
1879 /* Check if any requests are in-flight (including throttled requests) */
1880 static bool bdrv_requests_pending(BlockDriverState
*bs
)
1882 if (!QLIST_EMPTY(&bs
->tracked_requests
)) {
1885 if (!qemu_co_queue_empty(&bs
->throttled_reqs
[0])) {
1888 if (!qemu_co_queue_empty(&bs
->throttled_reqs
[1])) {
1891 if (bs
->file
&& bdrv_requests_pending(bs
->file
)) {
1894 if (bs
->backing_hd
&& bdrv_requests_pending(bs
->backing_hd
)) {
1900 static bool bdrv_drain_one(BlockDriverState
*bs
)
1904 bdrv_flush_io_queue(bs
);
1905 bdrv_start_throttled_reqs(bs
);
1906 bs_busy
= bdrv_requests_pending(bs
);
1907 bs_busy
|= aio_poll(bdrv_get_aio_context(bs
), bs_busy
);
1912 * Wait for pending requests to complete on a single BlockDriverState subtree
1914 * See the warning in bdrv_drain_all(). This function can only be called if
1915 * you are sure nothing can generate I/O because you have op blockers
1918 * Note that unlike bdrv_drain_all(), the caller must hold the BlockDriverState
1921 void bdrv_drain(BlockDriverState
*bs
)
1923 while (bdrv_drain_one(bs
)) {
1924 /* Keep iterating */
1929 * Wait for pending requests to complete across all BlockDriverStates
1931 * This function does not flush data to disk, use bdrv_flush_all() for that
1932 * after calling this function.
1934 * Note that completion of an asynchronous I/O operation can trigger any
1935 * number of other I/O operations on other devices---for example a coroutine
1936 * can be arbitrarily complex and a constant flow of I/O can come until the
1937 * coroutine is complete. Because of this, it is not possible to have a
1938 * function to drain a single device's I/O queue.
1940 void bdrv_drain_all(void)
1942 /* Always run first iteration so any pending completion BHs run */
1944 BlockDriverState
*bs
;
1949 QTAILQ_FOREACH(bs
, &bdrv_states
, device_list
) {
1950 AioContext
*aio_context
= bdrv_get_aio_context(bs
);
1952 aio_context_acquire(aio_context
);
1953 busy
|= bdrv_drain_one(bs
);
1954 aio_context_release(aio_context
);
1959 /* make a BlockDriverState anonymous by removing from bdrv_state and
1960 * graph_bdrv_state list.
1961 Also, NULL terminate the device_name to prevent double remove */
1962 void bdrv_make_anon(BlockDriverState
*bs
)
1965 * Take care to remove bs from bdrv_states only when it's actually
1966 * in it. Note that bs->device_list.tqe_prev is initially null,
1967 * and gets set to non-null by QTAILQ_INSERT_TAIL(). Establish
1968 * the useful invariant "bs in bdrv_states iff bs->tqe_prev" by
1969 * resetting it to null on remove.
1971 if (bs
->device_list
.tqe_prev
) {
1972 QTAILQ_REMOVE(&bdrv_states
, bs
, device_list
);
1973 bs
->device_list
.tqe_prev
= NULL
;
1975 if (bs
->node_name
[0] != '\0') {
1976 QTAILQ_REMOVE(&graph_bdrv_states
, bs
, node_list
);
1978 bs
->node_name
[0] = '\0';
1981 static void bdrv_rebind(BlockDriverState
*bs
)
1983 if (bs
->drv
&& bs
->drv
->bdrv_rebind
) {
1984 bs
->drv
->bdrv_rebind(bs
);
1988 static void bdrv_move_feature_fields(BlockDriverState
*bs_dest
,
1989 BlockDriverState
*bs_src
)
1991 /* move some fields that need to stay attached to the device */
1994 bs_dest
->guest_block_size
= bs_src
->guest_block_size
;
1995 bs_dest
->copy_on_read
= bs_src
->copy_on_read
;
1997 bs_dest
->enable_write_cache
= bs_src
->enable_write_cache
;
1999 /* i/o throttled req */
2000 memcpy(&bs_dest
->throttle_state
,
2001 &bs_src
->throttle_state
,
2002 sizeof(ThrottleState
));
2003 bs_dest
->throttled_reqs
[0] = bs_src
->throttled_reqs
[0];
2004 bs_dest
->throttled_reqs
[1] = bs_src
->throttled_reqs
[1];
2005 bs_dest
->io_limits_enabled
= bs_src
->io_limits_enabled
;
2008 bs_dest
->on_read_error
= bs_src
->on_read_error
;
2009 bs_dest
->on_write_error
= bs_src
->on_write_error
;
2012 bs_dest
->iostatus_enabled
= bs_src
->iostatus_enabled
;
2013 bs_dest
->iostatus
= bs_src
->iostatus
;
2016 bs_dest
->dirty_bitmaps
= bs_src
->dirty_bitmaps
;
2018 /* reference count */
2019 bs_dest
->refcnt
= bs_src
->refcnt
;
2022 bs_dest
->job
= bs_src
->job
;
2024 /* keep the same entry in bdrv_states */
2025 bs_dest
->device_list
= bs_src
->device_list
;
2026 bs_dest
->blk
= bs_src
->blk
;
2028 memcpy(bs_dest
->op_blockers
, bs_src
->op_blockers
,
2029 sizeof(bs_dest
->op_blockers
));
2033 * Swap bs contents for two image chains while they are live,
2034 * while keeping required fields on the BlockDriverState that is
2035 * actually attached to a device.
2037 * This will modify the BlockDriverState fields, and swap contents
2038 * between bs_new and bs_old. Both bs_new and bs_old are modified.
2040 * bs_new must not be attached to a BlockBackend.
2042 * This function does not create any image files.
2044 void bdrv_swap(BlockDriverState
*bs_new
, BlockDriverState
*bs_old
)
2046 BlockDriverState tmp
;
2048 /* The code needs to swap the node_name but simply swapping node_list won't
2049 * work so first remove the nodes from the graph list, do the swap then
2050 * insert them back if needed.
2052 if (bs_new
->node_name
[0] != '\0') {
2053 QTAILQ_REMOVE(&graph_bdrv_states
, bs_new
, node_list
);
2055 if (bs_old
->node_name
[0] != '\0') {
2056 QTAILQ_REMOVE(&graph_bdrv_states
, bs_old
, node_list
);
2059 /* bs_new must be unattached and shouldn't have anything fancy enabled */
2060 assert(!bs_new
->blk
);
2061 assert(QLIST_EMPTY(&bs_new
->dirty_bitmaps
));
2062 assert(bs_new
->job
== NULL
);
2063 assert(bs_new
->io_limits_enabled
== false);
2064 assert(!throttle_have_timer(&bs_new
->throttle_state
));
2070 /* there are some fields that should not be swapped, move them back */
2071 bdrv_move_feature_fields(&tmp
, bs_old
);
2072 bdrv_move_feature_fields(bs_old
, bs_new
);
2073 bdrv_move_feature_fields(bs_new
, &tmp
);
2075 /* bs_new must remain unattached */
2076 assert(!bs_new
->blk
);
2078 /* Check a few fields that should remain attached to the device */
2079 assert(bs_new
->job
== NULL
);
2080 assert(bs_new
->io_limits_enabled
== false);
2081 assert(!throttle_have_timer(&bs_new
->throttle_state
));
2083 /* insert the nodes back into the graph node list if needed */
2084 if (bs_new
->node_name
[0] != '\0') {
2085 QTAILQ_INSERT_TAIL(&graph_bdrv_states
, bs_new
, node_list
);
2087 if (bs_old
->node_name
[0] != '\0') {
2088 QTAILQ_INSERT_TAIL(&graph_bdrv_states
, bs_old
, node_list
);
2091 bdrv_rebind(bs_new
);
2092 bdrv_rebind(bs_old
);
2096 * Add new bs contents at the top of an image chain while the chain is
2097 * live, while keeping required fields on the top layer.
2099 * This will modify the BlockDriverState fields, and swap contents
2100 * between bs_new and bs_top. Both bs_new and bs_top are modified.
2102 * bs_new must not be attached to a BlockBackend.
2104 * This function does not create any image files.
2106 void bdrv_append(BlockDriverState
*bs_new
, BlockDriverState
*bs_top
)
2108 bdrv_swap(bs_new
, bs_top
);
2110 /* The contents of 'tmp' will become bs_top, as we are
2111 * swapping bs_new and bs_top contents. */
2112 bdrv_set_backing_hd(bs_top
, bs_new
);
2115 static void bdrv_delete(BlockDriverState
*bs
)
2118 assert(bdrv_op_blocker_is_empty(bs
));
2119 assert(!bs
->refcnt
);
2120 assert(QLIST_EMPTY(&bs
->dirty_bitmaps
));
2124 /* remove from list, if necessary */
2131 * Run consistency checks on an image
2133 * Returns 0 if the check could be completed (it doesn't mean that the image is
2134 * free of errors) or -errno when an internal error occurred. The results of the
2135 * check are stored in res.
2137 int bdrv_check(BlockDriverState
*bs
, BdrvCheckResult
*res
, BdrvCheckMode fix
)
2139 if (bs
->drv
== NULL
) {
2142 if (bs
->drv
->bdrv_check
== NULL
) {
2146 memset(res
, 0, sizeof(*res
));
2147 return bs
->drv
->bdrv_check(bs
, res
, fix
);
2150 #define COMMIT_BUF_SECTORS 2048
2152 /* commit COW file into the raw image */
2153 int bdrv_commit(BlockDriverState
*bs
)
2155 BlockDriver
*drv
= bs
->drv
;
2156 int64_t sector
, total_sectors
, length
, backing_length
;
2157 int n
, ro
, open_flags
;
2159 uint8_t *buf
= NULL
;
2160 char filename
[PATH_MAX
];
2165 if (!bs
->backing_hd
) {
2169 if (bdrv_op_is_blocked(bs
, BLOCK_OP_TYPE_COMMIT
, NULL
) ||
2170 bdrv_op_is_blocked(bs
->backing_hd
, BLOCK_OP_TYPE_COMMIT
, NULL
)) {
2174 ro
= bs
->backing_hd
->read_only
;
2175 /* Use pstrcpy (not strncpy): filename must be NUL-terminated. */
2176 pstrcpy(filename
, sizeof(filename
), bs
->backing_hd
->filename
);
2177 open_flags
= bs
->backing_hd
->open_flags
;
2180 if (bdrv_reopen(bs
->backing_hd
, open_flags
| BDRV_O_RDWR
, NULL
)) {
2185 length
= bdrv_getlength(bs
);
2191 backing_length
= bdrv_getlength(bs
->backing_hd
);
2192 if (backing_length
< 0) {
2193 ret
= backing_length
;
2197 /* If our top snapshot is larger than the backing file image,
2198 * grow the backing file image if possible. If not possible,
2199 * we must return an error */
2200 if (length
> backing_length
) {
2201 ret
= bdrv_truncate(bs
->backing_hd
, length
);
2207 total_sectors
= length
>> BDRV_SECTOR_BITS
;
2209 /* qemu_try_blockalign() for bs will choose an alignment that works for
2210 * bs->backing_hd as well, so no need to compare the alignment manually. */
2211 buf
= qemu_try_blockalign(bs
, COMMIT_BUF_SECTORS
* BDRV_SECTOR_SIZE
);
2217 for (sector
= 0; sector
< total_sectors
; sector
+= n
) {
2218 ret
= bdrv_is_allocated(bs
, sector
, COMMIT_BUF_SECTORS
, &n
);
2223 ret
= bdrv_read(bs
, sector
, buf
, n
);
2228 ret
= bdrv_write(bs
->backing_hd
, sector
, buf
, n
);
2235 if (drv
->bdrv_make_empty
) {
2236 ret
= drv
->bdrv_make_empty(bs
);
2244 * Make sure all data we wrote to the backing device is actually
2247 if (bs
->backing_hd
) {
2248 bdrv_flush(bs
->backing_hd
);
2256 /* ignoring error return here */
2257 bdrv_reopen(bs
->backing_hd
, open_flags
& ~BDRV_O_RDWR
, NULL
);
2263 int bdrv_commit_all(void)
2265 BlockDriverState
*bs
;
2267 QTAILQ_FOREACH(bs
, &bdrv_states
, device_list
) {
2268 AioContext
*aio_context
= bdrv_get_aio_context(bs
);
2270 aio_context_acquire(aio_context
);
2271 if (bs
->drv
&& bs
->backing_hd
) {
2272 int ret
= bdrv_commit(bs
);
2274 aio_context_release(aio_context
);
2278 aio_context_release(aio_context
);
2284 * Remove an active request from the tracked requests list
2286 * This function should be called when a tracked request is completing.
2288 static void tracked_request_end(BdrvTrackedRequest
*req
)
2290 if (req
->serialising
) {
2291 req
->bs
->serialising_in_flight
--;
2294 QLIST_REMOVE(req
, list
);
2295 qemu_co_queue_restart_all(&req
->wait_queue
);
2299 * Add an active request to the tracked requests list
2301 static void tracked_request_begin(BdrvTrackedRequest
*req
,
2302 BlockDriverState
*bs
,
2304 unsigned int bytes
, bool is_write
)
2306 *req
= (BdrvTrackedRequest
){
2310 .is_write
= is_write
,
2311 .co
= qemu_coroutine_self(),
2312 .serialising
= false,
2313 .overlap_offset
= offset
,
2314 .overlap_bytes
= bytes
,
2317 qemu_co_queue_init(&req
->wait_queue
);
2319 QLIST_INSERT_HEAD(&bs
->tracked_requests
, req
, list
);
2322 static void mark_request_serialising(BdrvTrackedRequest
*req
, uint64_t align
)
2324 int64_t overlap_offset
= req
->offset
& ~(align
- 1);
2325 unsigned int overlap_bytes
= ROUND_UP(req
->offset
+ req
->bytes
, align
)
2328 if (!req
->serialising
) {
2329 req
->bs
->serialising_in_flight
++;
2330 req
->serialising
= true;
2333 req
->overlap_offset
= MIN(req
->overlap_offset
, overlap_offset
);
2334 req
->overlap_bytes
= MAX(req
->overlap_bytes
, overlap_bytes
);
2338 * Round a region to cluster boundaries
2340 void bdrv_round_to_clusters(BlockDriverState
*bs
,
2341 int64_t sector_num
, int nb_sectors
,
2342 int64_t *cluster_sector_num
,
2343 int *cluster_nb_sectors
)
2345 BlockDriverInfo bdi
;
2347 if (bdrv_get_info(bs
, &bdi
) < 0 || bdi
.cluster_size
== 0) {
2348 *cluster_sector_num
= sector_num
;
2349 *cluster_nb_sectors
= nb_sectors
;
2351 int64_t c
= bdi
.cluster_size
/ BDRV_SECTOR_SIZE
;
2352 *cluster_sector_num
= QEMU_ALIGN_DOWN(sector_num
, c
);
2353 *cluster_nb_sectors
= QEMU_ALIGN_UP(sector_num
- *cluster_sector_num
+
2358 static int bdrv_get_cluster_size(BlockDriverState
*bs
)
2360 BlockDriverInfo bdi
;
2363 ret
= bdrv_get_info(bs
, &bdi
);
2364 if (ret
< 0 || bdi
.cluster_size
== 0) {
2365 return bs
->request_alignment
;
2367 return bdi
.cluster_size
;
2371 static bool tracked_request_overlaps(BdrvTrackedRequest
*req
,
2372 int64_t offset
, unsigned int bytes
)
2375 if (offset
>= req
->overlap_offset
+ req
->overlap_bytes
) {
2379 if (req
->overlap_offset
>= offset
+ bytes
) {
2385 static bool coroutine_fn
wait_serialising_requests(BdrvTrackedRequest
*self
)
2387 BlockDriverState
*bs
= self
->bs
;
2388 BdrvTrackedRequest
*req
;
2390 bool waited
= false;
2392 if (!bs
->serialising_in_flight
) {
2398 QLIST_FOREACH(req
, &bs
->tracked_requests
, list
) {
2399 if (req
== self
|| (!req
->serialising
&& !self
->serialising
)) {
2402 if (tracked_request_overlaps(req
, self
->overlap_offset
,
2403 self
->overlap_bytes
))
2405 /* Hitting this means there was a reentrant request, for
2406 * example, a block driver issuing nested requests. This must
2407 * never happen since it means deadlock.
2409 assert(qemu_coroutine_self() != req
->co
);
2411 /* If the request is already (indirectly) waiting for us, or
2412 * will wait for us as soon as it wakes up, then just go on
2413 * (instead of producing a deadlock in the former case). */
2414 if (!req
->waiting_for
) {
2415 self
->waiting_for
= req
;
2416 qemu_co_queue_wait(&req
->wait_queue
);
2417 self
->waiting_for
= NULL
;
2432 * -EINVAL - backing format specified, but no file
2433 * -ENOSPC - can't update the backing file because no space is left in the
2435 * -ENOTSUP - format driver doesn't support changing the backing file
2437 int bdrv_change_backing_file(BlockDriverState
*bs
,
2438 const char *backing_file
, const char *backing_fmt
)
2440 BlockDriver
*drv
= bs
->drv
;
2443 /* Backing file format doesn't make sense without a backing file */
2444 if (backing_fmt
&& !backing_file
) {
2448 if (drv
->bdrv_change_backing_file
!= NULL
) {
2449 ret
= drv
->bdrv_change_backing_file(bs
, backing_file
, backing_fmt
);
2455 pstrcpy(bs
->backing_file
, sizeof(bs
->backing_file
), backing_file
?: "");
2456 pstrcpy(bs
->backing_format
, sizeof(bs
->backing_format
), backing_fmt
?: "");
2462 * Finds the image layer in the chain that has 'bs' as its backing file.
2464 * active is the current topmost image.
2466 * Returns NULL if bs is not found in active's image chain,
2467 * or if active == bs.
2469 * Returns the bottommost base image if bs == NULL.
2471 BlockDriverState
*bdrv_find_overlay(BlockDriverState
*active
,
2472 BlockDriverState
*bs
)
2474 while (active
&& bs
!= active
->backing_hd
) {
2475 active
= active
->backing_hd
;
2481 /* Given a BDS, searches for the base layer. */
2482 BlockDriverState
*bdrv_find_base(BlockDriverState
*bs
)
2484 return bdrv_find_overlay(bs
, NULL
);
2487 typedef struct BlkIntermediateStates
{
2488 BlockDriverState
*bs
;
2489 QSIMPLEQ_ENTRY(BlkIntermediateStates
) entry
;
2490 } BlkIntermediateStates
;
2494 * Drops images above 'base' up to and including 'top', and sets the image
2495 * above 'top' to have base as its backing file.
2497 * Requires that the overlay to 'top' is opened r/w, so that the backing file
2498 * information in 'bs' can be properly updated.
2500 * E.g., this will convert the following chain:
2501 * bottom <- base <- intermediate <- top <- active
2505 * bottom <- base <- active
2507 * It is allowed for bottom==base, in which case it converts:
2509 * base <- intermediate <- top <- active
2515 * If backing_file_str is non-NULL, it will be used when modifying top's
2516 * overlay image metadata.
2519 * if active == top, that is considered an error
2522 int bdrv_drop_intermediate(BlockDriverState
*active
, BlockDriverState
*top
,
2523 BlockDriverState
*base
, const char *backing_file_str
)
2525 BlockDriverState
*intermediate
;
2526 BlockDriverState
*base_bs
= NULL
;
2527 BlockDriverState
*new_top_bs
= NULL
;
2528 BlkIntermediateStates
*intermediate_state
, *next
;
2531 QSIMPLEQ_HEAD(states_to_delete
, BlkIntermediateStates
) states_to_delete
;
2532 QSIMPLEQ_INIT(&states_to_delete
);
2534 if (!top
->drv
|| !base
->drv
) {
2538 new_top_bs
= bdrv_find_overlay(active
, top
);
2540 if (new_top_bs
== NULL
) {
2541 /* we could not find the image above 'top', this is an error */
2545 /* special case of new_top_bs->backing_hd already pointing to base - nothing
2546 * to do, no intermediate images */
2547 if (new_top_bs
->backing_hd
== base
) {
2554 /* now we will go down through the list, and add each BDS we find
2555 * into our deletion queue, until we hit the 'base'
2557 while (intermediate
) {
2558 intermediate_state
= g_new0(BlkIntermediateStates
, 1);
2559 intermediate_state
->bs
= intermediate
;
2560 QSIMPLEQ_INSERT_TAIL(&states_to_delete
, intermediate_state
, entry
);
2562 if (intermediate
->backing_hd
== base
) {
2563 base_bs
= intermediate
->backing_hd
;
2566 intermediate
= intermediate
->backing_hd
;
2568 if (base_bs
== NULL
) {
2569 /* something went wrong, we did not end at the base. safely
2570 * unravel everything, and exit with error */
2574 /* success - we can delete the intermediate states, and link top->base */
2575 backing_file_str
= backing_file_str
? backing_file_str
: base_bs
->filename
;
2576 ret
= bdrv_change_backing_file(new_top_bs
, backing_file_str
,
2577 base_bs
->drv
? base_bs
->drv
->format_name
: "");
2581 bdrv_set_backing_hd(new_top_bs
, base_bs
);
2583 QSIMPLEQ_FOREACH_SAFE(intermediate_state
, &states_to_delete
, entry
, next
) {
2584 /* so that bdrv_close() does not recursively close the chain */
2585 bdrv_set_backing_hd(intermediate_state
->bs
, NULL
);
2586 bdrv_unref(intermediate_state
->bs
);
2591 QSIMPLEQ_FOREACH_SAFE(intermediate_state
, &states_to_delete
, entry
, next
) {
2592 g_free(intermediate_state
);
2598 static int bdrv_check_byte_request(BlockDriverState
*bs
, int64_t offset
,
2603 if (size
> INT_MAX
) {
2607 if (!bdrv_is_inserted(bs
))
2613 len
= bdrv_getlength(bs
);
2618 if ((offset
> len
) || (len
- offset
< size
))
2624 static int bdrv_check_request(BlockDriverState
*bs
, int64_t sector_num
,
2627 if (nb_sectors
< 0 || nb_sectors
> INT_MAX
/ BDRV_SECTOR_SIZE
) {
2631 return bdrv_check_byte_request(bs
, sector_num
* BDRV_SECTOR_SIZE
,
2632 nb_sectors
* BDRV_SECTOR_SIZE
);
2635 typedef struct RwCo
{
2636 BlockDriverState
*bs
;
2641 BdrvRequestFlags flags
;
2644 static void coroutine_fn
bdrv_rw_co_entry(void *opaque
)
2646 RwCo
*rwco
= opaque
;
2648 if (!rwco
->is_write
) {
2649 rwco
->ret
= bdrv_co_do_preadv(rwco
->bs
, rwco
->offset
,
2650 rwco
->qiov
->size
, rwco
->qiov
,
2653 rwco
->ret
= bdrv_co_do_pwritev(rwco
->bs
, rwco
->offset
,
2654 rwco
->qiov
->size
, rwco
->qiov
,
2660 * Process a vectored synchronous request using coroutines
2662 static int bdrv_prwv_co(BlockDriverState
*bs
, int64_t offset
,
2663 QEMUIOVector
*qiov
, bool is_write
,
2664 BdrvRequestFlags flags
)
2671 .is_write
= is_write
,
2677 * In sync call context, when the vcpu is blocked, this throttling timer
2678 * will not fire; so the I/O throttling function has to be disabled here
2679 * if it has been enabled.
2681 if (bs
->io_limits_enabled
) {
2682 fprintf(stderr
, "Disabling I/O throttling on '%s' due "
2683 "to synchronous I/O.\n", bdrv_get_device_name(bs
));
2684 bdrv_io_limits_disable(bs
);
2687 if (qemu_in_coroutine()) {
2688 /* Fast-path if already in coroutine context */
2689 bdrv_rw_co_entry(&rwco
);
2691 AioContext
*aio_context
= bdrv_get_aio_context(bs
);
2693 co
= qemu_coroutine_create(bdrv_rw_co_entry
);
2694 qemu_coroutine_enter(co
, &rwco
);
2695 while (rwco
.ret
== NOT_DONE
) {
2696 aio_poll(aio_context
, true);
2703 * Process a synchronous request using coroutines
2705 static int bdrv_rw_co(BlockDriverState
*bs
, int64_t sector_num
, uint8_t *buf
,
2706 int nb_sectors
, bool is_write
, BdrvRequestFlags flags
)
2709 struct iovec iov
= {
2710 .iov_base
= (void *)buf
,
2711 .iov_len
= nb_sectors
* BDRV_SECTOR_SIZE
,
2714 if (nb_sectors
< 0 || nb_sectors
> INT_MAX
/ BDRV_SECTOR_SIZE
) {
2718 qemu_iovec_init_external(&qiov
, &iov
, 1);
2719 return bdrv_prwv_co(bs
, sector_num
<< BDRV_SECTOR_BITS
,
2720 &qiov
, is_write
, flags
);
2723 /* return < 0 if error. See bdrv_write() for the return codes */
2724 int bdrv_read(BlockDriverState
*bs
, int64_t sector_num
,
2725 uint8_t *buf
, int nb_sectors
)
2727 return bdrv_rw_co(bs
, sector_num
, buf
, nb_sectors
, false, 0);
2730 /* Just like bdrv_read(), but with I/O throttling temporarily disabled */
2731 int bdrv_read_unthrottled(BlockDriverState
*bs
, int64_t sector_num
,
2732 uint8_t *buf
, int nb_sectors
)
2737 enabled
= bs
->io_limits_enabled
;
2738 bs
->io_limits_enabled
= false;
2739 ret
= bdrv_read(bs
, sector_num
, buf
, nb_sectors
);
2740 bs
->io_limits_enabled
= enabled
;
2744 /* Return < 0 if error. Important errors are:
2745 -EIO generic I/O error (may happen for all errors)
2746 -ENOMEDIUM No media inserted.
2747 -EINVAL Invalid sector number or nb_sectors
2748 -EACCES Trying to write a read-only device
2750 int bdrv_write(BlockDriverState
*bs
, int64_t sector_num
,
2751 const uint8_t *buf
, int nb_sectors
)
2753 return bdrv_rw_co(bs
, sector_num
, (uint8_t *)buf
, nb_sectors
, true, 0);
2756 int bdrv_write_zeroes(BlockDriverState
*bs
, int64_t sector_num
,
2757 int nb_sectors
, BdrvRequestFlags flags
)
2759 return bdrv_rw_co(bs
, sector_num
, NULL
, nb_sectors
, true,
2760 BDRV_REQ_ZERO_WRITE
| flags
);
2764 * Completely zero out a block device with the help of bdrv_write_zeroes.
2765 * The operation is sped up by checking the block status and only writing
2766 * zeroes to the device if they currently do not return zeroes. Optional
2767 * flags are passed through to bdrv_write_zeroes (e.g. BDRV_REQ_MAY_UNMAP).
2769 * Returns < 0 on error, 0 on success. For error codes see bdrv_write().
2771 int bdrv_make_zero(BlockDriverState
*bs
, BdrvRequestFlags flags
)
2773 int64_t target_sectors
, ret
, nb_sectors
, sector_num
= 0;
2776 target_sectors
= bdrv_nb_sectors(bs
);
2777 if (target_sectors
< 0) {
2778 return target_sectors
;
2782 nb_sectors
= target_sectors
- sector_num
;
2783 if (nb_sectors
<= 0) {
2786 if (nb_sectors
> INT_MAX
/ BDRV_SECTOR_SIZE
) {
2787 nb_sectors
= INT_MAX
/ BDRV_SECTOR_SIZE
;
2789 ret
= bdrv_get_block_status(bs
, sector_num
, nb_sectors
, &n
);
2791 error_report("error getting block status at sector %" PRId64
": %s",
2792 sector_num
, strerror(-ret
));
2795 if (ret
& BDRV_BLOCK_ZERO
) {
2799 ret
= bdrv_write_zeroes(bs
, sector_num
, n
, flags
);
2801 error_report("error writing zeroes at sector %" PRId64
": %s",
2802 sector_num
, strerror(-ret
));
2809 int bdrv_pread(BlockDriverState
*bs
, int64_t offset
, void *buf
, int bytes
)
2812 struct iovec iov
= {
2813 .iov_base
= (void *)buf
,
2822 qemu_iovec_init_external(&qiov
, &iov
, 1);
2823 ret
= bdrv_prwv_co(bs
, offset
, &qiov
, false, 0);
2831 int bdrv_pwritev(BlockDriverState
*bs
, int64_t offset
, QEMUIOVector
*qiov
)
2835 ret
= bdrv_prwv_co(bs
, offset
, qiov
, true, 0);
2843 int bdrv_pwrite(BlockDriverState
*bs
, int64_t offset
,
2844 const void *buf
, int bytes
)
2847 struct iovec iov
= {
2848 .iov_base
= (void *) buf
,
2856 qemu_iovec_init_external(&qiov
, &iov
, 1);
2857 return bdrv_pwritev(bs
, offset
, &qiov
);
2861 * Writes to the file and ensures that no writes are reordered across this
2862 * request (acts as a barrier)
2864 * Returns 0 on success, -errno in error cases.
2866 int bdrv_pwrite_sync(BlockDriverState
*bs
, int64_t offset
,
2867 const void *buf
, int count
)
2871 ret
= bdrv_pwrite(bs
, offset
, buf
, count
);
2876 /* No flush needed for cache modes that already do it */
2877 if (bs
->enable_write_cache
) {
2884 static int coroutine_fn
bdrv_co_do_copy_on_readv(BlockDriverState
*bs
,
2885 int64_t sector_num
, int nb_sectors
, QEMUIOVector
*qiov
)
2887 /* Perform I/O through a temporary buffer so that users who scribble over
2888 * their read buffer while the operation is in progress do not end up
2889 * modifying the image file. This is critical for zero-copy guest I/O
2890 * where anything might happen inside guest memory.
2892 void *bounce_buffer
;
2894 BlockDriver
*drv
= bs
->drv
;
2896 QEMUIOVector bounce_qiov
;
2897 int64_t cluster_sector_num
;
2898 int cluster_nb_sectors
;
2902 /* Cover entire cluster so no additional backing file I/O is required when
2903 * allocating cluster in the image file.
2905 bdrv_round_to_clusters(bs
, sector_num
, nb_sectors
,
2906 &cluster_sector_num
, &cluster_nb_sectors
);
2908 trace_bdrv_co_do_copy_on_readv(bs
, sector_num
, nb_sectors
,
2909 cluster_sector_num
, cluster_nb_sectors
);
2911 iov
.iov_len
= cluster_nb_sectors
* BDRV_SECTOR_SIZE
;
2912 iov
.iov_base
= bounce_buffer
= qemu_try_blockalign(bs
, iov
.iov_len
);
2913 if (bounce_buffer
== NULL
) {
2918 qemu_iovec_init_external(&bounce_qiov
, &iov
, 1);
2920 ret
= drv
->bdrv_co_readv(bs
, cluster_sector_num
, cluster_nb_sectors
,
2926 if (drv
->bdrv_co_write_zeroes
&&
2927 buffer_is_zero(bounce_buffer
, iov
.iov_len
)) {
2928 ret
= bdrv_co_do_write_zeroes(bs
, cluster_sector_num
,
2929 cluster_nb_sectors
, 0);
2931 /* This does not change the data on the disk, it is not necessary
2932 * to flush even in cache=writethrough mode.
2934 ret
= drv
->bdrv_co_writev(bs
, cluster_sector_num
, cluster_nb_sectors
,
2939 /* It might be okay to ignore write errors for guest requests. If this
2940 * is a deliberate copy-on-read then we don't want to ignore the error.
2941 * Simply report it in all cases.
2946 skip_bytes
= (sector_num
- cluster_sector_num
) * BDRV_SECTOR_SIZE
;
2947 qemu_iovec_from_buf(qiov
, 0, bounce_buffer
+ skip_bytes
,
2948 nb_sectors
* BDRV_SECTOR_SIZE
);
2951 qemu_vfree(bounce_buffer
);
2956 * Forwards an already correctly aligned request to the BlockDriver. This
2957 * handles copy on read and zeroing after EOF; any other features must be
2958 * implemented by the caller.
2960 static int coroutine_fn
bdrv_aligned_preadv(BlockDriverState
*bs
,
2961 BdrvTrackedRequest
*req
, int64_t offset
, unsigned int bytes
,
2962 int64_t align
, QEMUIOVector
*qiov
, int flags
)
2964 BlockDriver
*drv
= bs
->drv
;
2967 int64_t sector_num
= offset
>> BDRV_SECTOR_BITS
;
2968 unsigned int nb_sectors
= bytes
>> BDRV_SECTOR_BITS
;
2970 assert((offset
& (BDRV_SECTOR_SIZE
- 1)) == 0);
2971 assert((bytes
& (BDRV_SECTOR_SIZE
- 1)) == 0);
2972 assert(!qiov
|| bytes
== qiov
->size
);
2974 /* Handle Copy on Read and associated serialisation */
2975 if (flags
& BDRV_REQ_COPY_ON_READ
) {
2976 /* If we touch the same cluster it counts as an overlap. This
2977 * guarantees that allocating writes will be serialized and not race
2978 * with each other for the same cluster. For example, in copy-on-read
2979 * it ensures that the CoR read and write operations are atomic and
2980 * guest writes cannot interleave between them. */
2981 mark_request_serialising(req
, bdrv_get_cluster_size(bs
));
2984 wait_serialising_requests(req
);
2986 if (flags
& BDRV_REQ_COPY_ON_READ
) {
2989 ret
= bdrv_is_allocated(bs
, sector_num
, nb_sectors
, &pnum
);
2994 if (!ret
|| pnum
!= nb_sectors
) {
2995 ret
= bdrv_co_do_copy_on_readv(bs
, sector_num
, nb_sectors
, qiov
);
3000 /* Forward the request to the BlockDriver */
3001 if (!(bs
->zero_beyond_eof
&& bs
->growable
)) {
3002 ret
= drv
->bdrv_co_readv(bs
, sector_num
, nb_sectors
, qiov
);
3004 /* Read zeros after EOF of growable BDSes */
3005 int64_t total_sectors
, max_nb_sectors
;
3007 total_sectors
= bdrv_nb_sectors(bs
);
3008 if (total_sectors
< 0) {
3009 ret
= total_sectors
;
3013 max_nb_sectors
= ROUND_UP(MAX(0, total_sectors
- sector_num
),
3014 align
>> BDRV_SECTOR_BITS
);
3015 if (max_nb_sectors
> 0) {
3016 QEMUIOVector local_qiov
;
3017 size_t local_sectors
;
3019 max_nb_sectors
= MIN(max_nb_sectors
, SIZE_MAX
/ BDRV_SECTOR_BITS
);
3020 local_sectors
= MIN(max_nb_sectors
, nb_sectors
);
3022 qemu_iovec_init(&local_qiov
, qiov
->niov
);
3023 qemu_iovec_concat(&local_qiov
, qiov
, 0,
3024 local_sectors
* BDRV_SECTOR_SIZE
);
3026 ret
= drv
->bdrv_co_readv(bs
, sector_num
, local_sectors
,
3029 qemu_iovec_destroy(&local_qiov
);
3034 /* Reading beyond end of file is supposed to produce zeroes */
3035 if (ret
== 0 && total_sectors
< sector_num
+ nb_sectors
) {
3036 uint64_t offset
= MAX(0, total_sectors
- sector_num
);
3037 uint64_t bytes
= (sector_num
+ nb_sectors
- offset
) *
3039 qemu_iovec_memset(qiov
, offset
* BDRV_SECTOR_SIZE
, 0, bytes
);
3048 * Handle a read request in coroutine context
3050 static int coroutine_fn
bdrv_co_do_preadv(BlockDriverState
*bs
,
3051 int64_t offset
, unsigned int bytes
, QEMUIOVector
*qiov
,
3052 BdrvRequestFlags flags
)
3054 BlockDriver
*drv
= bs
->drv
;
3055 BdrvTrackedRequest req
;
3057 /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */
3058 uint64_t align
= MAX(BDRV_SECTOR_SIZE
, bs
->request_alignment
);
3059 uint8_t *head_buf
= NULL
;
3060 uint8_t *tail_buf
= NULL
;
3061 QEMUIOVector local_qiov
;
3062 bool use_local_qiov
= false;
3068 if (bdrv_check_byte_request(bs
, offset
, bytes
)) {
3072 if (bs
->copy_on_read
) {
3073 flags
|= BDRV_REQ_COPY_ON_READ
;
3076 /* throttling disk I/O */
3077 if (bs
->io_limits_enabled
) {
3078 bdrv_io_limits_intercept(bs
, bytes
, false);
3081 /* Align read if necessary by padding qiov */
3082 if (offset
& (align
- 1)) {
3083 head_buf
= qemu_blockalign(bs
, align
);
3084 qemu_iovec_init(&local_qiov
, qiov
->niov
+ 2);
3085 qemu_iovec_add(&local_qiov
, head_buf
, offset
& (align
- 1));
3086 qemu_iovec_concat(&local_qiov
, qiov
, 0, qiov
->size
);
3087 use_local_qiov
= true;
3089 bytes
+= offset
& (align
- 1);
3090 offset
= offset
& ~(align
- 1);
3093 if ((offset
+ bytes
) & (align
- 1)) {
3094 if (!use_local_qiov
) {
3095 qemu_iovec_init(&local_qiov
, qiov
->niov
+ 1);
3096 qemu_iovec_concat(&local_qiov
, qiov
, 0, qiov
->size
);
3097 use_local_qiov
= true;
3099 tail_buf
= qemu_blockalign(bs
, align
);
3100 qemu_iovec_add(&local_qiov
, tail_buf
,
3101 align
- ((offset
+ bytes
) & (align
- 1)));
3103 bytes
= ROUND_UP(bytes
, align
);
3106 tracked_request_begin(&req
, bs
, offset
, bytes
, false);
3107 ret
= bdrv_aligned_preadv(bs
, &req
, offset
, bytes
, align
,
3108 use_local_qiov
? &local_qiov
: qiov
,
3110 tracked_request_end(&req
);
3112 if (use_local_qiov
) {
3113 qemu_iovec_destroy(&local_qiov
);
3114 qemu_vfree(head_buf
);
3115 qemu_vfree(tail_buf
);
3121 static int coroutine_fn
bdrv_co_do_readv(BlockDriverState
*bs
,
3122 int64_t sector_num
, int nb_sectors
, QEMUIOVector
*qiov
,
3123 BdrvRequestFlags flags
)
3125 if (nb_sectors
< 0 || nb_sectors
> (UINT_MAX
>> BDRV_SECTOR_BITS
)) {
3129 return bdrv_co_do_preadv(bs
, sector_num
<< BDRV_SECTOR_BITS
,
3130 nb_sectors
<< BDRV_SECTOR_BITS
, qiov
, flags
);
3133 int coroutine_fn
bdrv_co_readv(BlockDriverState
*bs
, int64_t sector_num
,
3134 int nb_sectors
, QEMUIOVector
*qiov
)
3136 trace_bdrv_co_readv(bs
, sector_num
, nb_sectors
);
3138 return bdrv_co_do_readv(bs
, sector_num
, nb_sectors
, qiov
, 0);
3141 int coroutine_fn
bdrv_co_copy_on_readv(BlockDriverState
*bs
,
3142 int64_t sector_num
, int nb_sectors
, QEMUIOVector
*qiov
)
3144 trace_bdrv_co_copy_on_readv(bs
, sector_num
, nb_sectors
);
3146 return bdrv_co_do_readv(bs
, sector_num
, nb_sectors
, qiov
,
3147 BDRV_REQ_COPY_ON_READ
);
3150 /* if no limit is specified in the BlockLimits use a default
3151 * of 32768 512-byte sectors (16 MiB) per request.
3153 #define MAX_WRITE_ZEROES_DEFAULT 32768
3155 static int coroutine_fn
bdrv_co_do_write_zeroes(BlockDriverState
*bs
,
3156 int64_t sector_num
, int nb_sectors
, BdrvRequestFlags flags
)
3158 BlockDriver
*drv
= bs
->drv
;
3160 struct iovec iov
= {0};
3163 int max_write_zeroes
= bs
->bl
.max_write_zeroes
?
3164 bs
->bl
.max_write_zeroes
: MAX_WRITE_ZEROES_DEFAULT
;
3166 while (nb_sectors
> 0 && !ret
) {
3167 int num
= nb_sectors
;
3169 /* Align request. Block drivers can expect the "bulk" of the request
3172 if (bs
->bl
.write_zeroes_alignment
3173 && num
> bs
->bl
.write_zeroes_alignment
) {
3174 if (sector_num
% bs
->bl
.write_zeroes_alignment
!= 0) {
3175 /* Make a small request up to the first aligned sector. */
3176 num
= bs
->bl
.write_zeroes_alignment
;
3177 num
-= sector_num
% bs
->bl
.write_zeroes_alignment
;
3178 } else if ((sector_num
+ num
) % bs
->bl
.write_zeroes_alignment
!= 0) {
3179 /* Shorten the request to the last aligned sector. num cannot
3180 * underflow because num > bs->bl.write_zeroes_alignment.
3182 num
-= (sector_num
+ num
) % bs
->bl
.write_zeroes_alignment
;
3186 /* limit request size */
3187 if (num
> max_write_zeroes
) {
3188 num
= max_write_zeroes
;
3192 /* First try the efficient write zeroes operation */
3193 if (drv
->bdrv_co_write_zeroes
) {
3194 ret
= drv
->bdrv_co_write_zeroes(bs
, sector_num
, num
, flags
);
3197 if (ret
== -ENOTSUP
) {
3198 /* Fall back to bounce buffer if write zeroes is unsupported */
3199 iov
.iov_len
= num
* BDRV_SECTOR_SIZE
;
3200 if (iov
.iov_base
== NULL
) {
3201 iov
.iov_base
= qemu_try_blockalign(bs
, num
* BDRV_SECTOR_SIZE
);
3202 if (iov
.iov_base
== NULL
) {
3206 memset(iov
.iov_base
, 0, num
* BDRV_SECTOR_SIZE
);
3208 qemu_iovec_init_external(&qiov
, &iov
, 1);
3210 ret
= drv
->bdrv_co_writev(bs
, sector_num
, num
, &qiov
);
3212 /* Keep bounce buffer around if it is big enough for all
3213 * all future requests.
3215 if (num
< max_write_zeroes
) {
3216 qemu_vfree(iov
.iov_base
);
3217 iov
.iov_base
= NULL
;
3226 qemu_vfree(iov
.iov_base
);
3231 * Forwards an already correctly aligned write request to the BlockDriver.
3233 static int coroutine_fn
bdrv_aligned_pwritev(BlockDriverState
*bs
,
3234 BdrvTrackedRequest
*req
, int64_t offset
, unsigned int bytes
,
3235 QEMUIOVector
*qiov
, int flags
)
3237 BlockDriver
*drv
= bs
->drv
;
3241 int64_t sector_num
= offset
>> BDRV_SECTOR_BITS
;
3242 unsigned int nb_sectors
= bytes
>> BDRV_SECTOR_BITS
;
3244 assert((offset
& (BDRV_SECTOR_SIZE
- 1)) == 0);
3245 assert((bytes
& (BDRV_SECTOR_SIZE
- 1)) == 0);
3246 assert(!qiov
|| bytes
== qiov
->size
);
3248 waited
= wait_serialising_requests(req
);
3249 assert(!waited
|| !req
->serialising
);
3250 assert(req
->overlap_offset
<= offset
);
3251 assert(offset
+ bytes
<= req
->overlap_offset
+ req
->overlap_bytes
);
3253 ret
= notifier_with_return_list_notify(&bs
->before_write_notifiers
, req
);
3255 if (!ret
&& bs
->detect_zeroes
!= BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF
&&
3256 !(flags
& BDRV_REQ_ZERO_WRITE
) && drv
->bdrv_co_write_zeroes
&&
3257 qemu_iovec_is_zero(qiov
)) {
3258 flags
|= BDRV_REQ_ZERO_WRITE
;
3259 if (bs
->detect_zeroes
== BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP
) {
3260 flags
|= BDRV_REQ_MAY_UNMAP
;
3265 /* Do nothing, write notifier decided to fail this request */
3266 } else if (flags
& BDRV_REQ_ZERO_WRITE
) {
3267 BLKDBG_EVENT(bs
, BLKDBG_PWRITEV_ZERO
);
3268 ret
= bdrv_co_do_write_zeroes(bs
, sector_num
, nb_sectors
, flags
);
3270 BLKDBG_EVENT(bs
, BLKDBG_PWRITEV
);
3271 ret
= drv
->bdrv_co_writev(bs
, sector_num
, nb_sectors
, qiov
);
3273 BLKDBG_EVENT(bs
, BLKDBG_PWRITEV_DONE
);
3275 if (ret
== 0 && !bs
->enable_write_cache
) {
3276 ret
= bdrv_co_flush(bs
);
3279 bdrv_set_dirty(bs
, sector_num
, nb_sectors
);
3281 block_acct_highest_sector(&bs
->stats
, sector_num
, nb_sectors
);
3283 if (bs
->growable
&& ret
>= 0) {
3284 bs
->total_sectors
= MAX(bs
->total_sectors
, sector_num
+ nb_sectors
);
3291 * Handle a write request in coroutine context
3293 static int coroutine_fn
bdrv_co_do_pwritev(BlockDriverState
*bs
,
3294 int64_t offset
, unsigned int bytes
, QEMUIOVector
*qiov
,
3295 BdrvRequestFlags flags
)
3297 BdrvTrackedRequest req
;
3298 /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */
3299 uint64_t align
= MAX(BDRV_SECTOR_SIZE
, bs
->request_alignment
);
3300 uint8_t *head_buf
= NULL
;
3301 uint8_t *tail_buf
= NULL
;
3302 QEMUIOVector local_qiov
;
3303 bool use_local_qiov
= false;
3309 if (bs
->read_only
) {
3312 if (bdrv_check_byte_request(bs
, offset
, bytes
)) {
3316 /* throttling disk I/O */
3317 if (bs
->io_limits_enabled
) {
3318 bdrv_io_limits_intercept(bs
, bytes
, true);
3322 * Align write if necessary by performing a read-modify-write cycle.
3323 * Pad qiov with the read parts and be sure to have a tracked request not
3324 * only for bdrv_aligned_pwritev, but also for the reads of the RMW cycle.
3326 tracked_request_begin(&req
, bs
, offset
, bytes
, true);
3328 if (offset
& (align
- 1)) {
3329 QEMUIOVector head_qiov
;
3330 struct iovec head_iov
;
3332 mark_request_serialising(&req
, align
);
3333 wait_serialising_requests(&req
);
3335 head_buf
= qemu_blockalign(bs
, align
);
3336 head_iov
= (struct iovec
) {
3337 .iov_base
= head_buf
,
3340 qemu_iovec_init_external(&head_qiov
, &head_iov
, 1);
3342 BLKDBG_EVENT(bs
, BLKDBG_PWRITEV_RMW_HEAD
);
3343 ret
= bdrv_aligned_preadv(bs
, &req
, offset
& ~(align
- 1), align
,
3344 align
, &head_qiov
, 0);
3348 BLKDBG_EVENT(bs
, BLKDBG_PWRITEV_RMW_AFTER_HEAD
);
3350 qemu_iovec_init(&local_qiov
, qiov
->niov
+ 2);
3351 qemu_iovec_add(&local_qiov
, head_buf
, offset
& (align
- 1));
3352 qemu_iovec_concat(&local_qiov
, qiov
, 0, qiov
->size
);
3353 use_local_qiov
= true;
3355 bytes
+= offset
& (align
- 1);
3356 offset
= offset
& ~(align
- 1);
3359 if ((offset
+ bytes
) & (align
- 1)) {
3360 QEMUIOVector tail_qiov
;
3361 struct iovec tail_iov
;
3365 mark_request_serialising(&req
, align
);
3366 waited
= wait_serialising_requests(&req
);
3367 assert(!waited
|| !use_local_qiov
);
3369 tail_buf
= qemu_blockalign(bs
, align
);
3370 tail_iov
= (struct iovec
) {
3371 .iov_base
= tail_buf
,
3374 qemu_iovec_init_external(&tail_qiov
, &tail_iov
, 1);
3376 BLKDBG_EVENT(bs
, BLKDBG_PWRITEV_RMW_TAIL
);
3377 ret
= bdrv_aligned_preadv(bs
, &req
, (offset
+ bytes
) & ~(align
- 1), align
,
3378 align
, &tail_qiov
, 0);
3382 BLKDBG_EVENT(bs
, BLKDBG_PWRITEV_RMW_AFTER_TAIL
);
3384 if (!use_local_qiov
) {
3385 qemu_iovec_init(&local_qiov
, qiov
->niov
+ 1);
3386 qemu_iovec_concat(&local_qiov
, qiov
, 0, qiov
->size
);
3387 use_local_qiov
= true;
3390 tail_bytes
= (offset
+ bytes
) & (align
- 1);
3391 qemu_iovec_add(&local_qiov
, tail_buf
+ tail_bytes
, align
- tail_bytes
);
3393 bytes
= ROUND_UP(bytes
, align
);
3396 ret
= bdrv_aligned_pwritev(bs
, &req
, offset
, bytes
,
3397 use_local_qiov
? &local_qiov
: qiov
,
3401 tracked_request_end(&req
);
3403 if (use_local_qiov
) {
3404 qemu_iovec_destroy(&local_qiov
);
3406 qemu_vfree(head_buf
);
3407 qemu_vfree(tail_buf
);
3412 static int coroutine_fn
bdrv_co_do_writev(BlockDriverState
*bs
,
3413 int64_t sector_num
, int nb_sectors
, QEMUIOVector
*qiov
,
3414 BdrvRequestFlags flags
)
3416 if (nb_sectors
< 0 || nb_sectors
> (INT_MAX
>> BDRV_SECTOR_BITS
)) {
3420 return bdrv_co_do_pwritev(bs
, sector_num
<< BDRV_SECTOR_BITS
,
3421 nb_sectors
<< BDRV_SECTOR_BITS
, qiov
, flags
);
3424 int coroutine_fn
bdrv_co_writev(BlockDriverState
*bs
, int64_t sector_num
,
3425 int nb_sectors
, QEMUIOVector
*qiov
)
3427 trace_bdrv_co_writev(bs
, sector_num
, nb_sectors
);
3429 return bdrv_co_do_writev(bs
, sector_num
, nb_sectors
, qiov
, 0);
3432 int coroutine_fn
bdrv_co_write_zeroes(BlockDriverState
*bs
,
3433 int64_t sector_num
, int nb_sectors
,
3434 BdrvRequestFlags flags
)
3436 trace_bdrv_co_write_zeroes(bs
, sector_num
, nb_sectors
, flags
);
3438 if (!(bs
->open_flags
& BDRV_O_UNMAP
)) {
3439 flags
&= ~BDRV_REQ_MAY_UNMAP
;
3442 return bdrv_co_do_writev(bs
, sector_num
, nb_sectors
, NULL
,
3443 BDRV_REQ_ZERO_WRITE
| flags
);
3447 * Truncate file to 'offset' bytes (needed only for file protocols)
3449 int bdrv_truncate(BlockDriverState
*bs
, int64_t offset
)
3451 BlockDriver
*drv
= bs
->drv
;
3455 if (!drv
->bdrv_truncate
)
3460 ret
= drv
->bdrv_truncate(bs
, offset
);
3462 ret
= refresh_total_sectors(bs
, offset
>> BDRV_SECTOR_BITS
);
3464 blk_dev_resize_cb(bs
->blk
);
3471 * Length of a allocated file in bytes. Sparse files are counted by actual
3472 * allocated space. Return < 0 if error or unknown.
3474 int64_t bdrv_get_allocated_file_size(BlockDriverState
*bs
)
3476 BlockDriver
*drv
= bs
->drv
;
3480 if (drv
->bdrv_get_allocated_file_size
) {
3481 return drv
->bdrv_get_allocated_file_size(bs
);
3484 return bdrv_get_allocated_file_size(bs
->file
);
3490 * Return number of sectors on success, -errno on error.
3492 int64_t bdrv_nb_sectors(BlockDriverState
*bs
)
3494 BlockDriver
*drv
= bs
->drv
;
3499 if (drv
->has_variable_length
) {
3500 int ret
= refresh_total_sectors(bs
, bs
->total_sectors
);
3505 return bs
->total_sectors
;
3509 * Return length in bytes on success, -errno on error.
3510 * The length is always a multiple of BDRV_SECTOR_SIZE.
3512 int64_t bdrv_getlength(BlockDriverState
*bs
)
3514 int64_t ret
= bdrv_nb_sectors(bs
);
3516 return ret
< 0 ? ret
: ret
* BDRV_SECTOR_SIZE
;
3519 /* return 0 as number of sectors if no device present or error */
3520 void bdrv_get_geometry(BlockDriverState
*bs
, uint64_t *nb_sectors_ptr
)
3522 int64_t nb_sectors
= bdrv_nb_sectors(bs
);
3524 *nb_sectors_ptr
= nb_sectors
< 0 ? 0 : nb_sectors
;
3527 void bdrv_set_on_error(BlockDriverState
*bs
, BlockdevOnError on_read_error
,
3528 BlockdevOnError on_write_error
)
3530 bs
->on_read_error
= on_read_error
;
3531 bs
->on_write_error
= on_write_error
;
3534 BlockdevOnError
bdrv_get_on_error(BlockDriverState
*bs
, bool is_read
)
3536 return is_read
? bs
->on_read_error
: bs
->on_write_error
;
3539 BlockErrorAction
bdrv_get_error_action(BlockDriverState
*bs
, bool is_read
, int error
)
3541 BlockdevOnError on_err
= is_read
? bs
->on_read_error
: bs
->on_write_error
;
3544 case BLOCKDEV_ON_ERROR_ENOSPC
:
3545 return (error
== ENOSPC
) ?
3546 BLOCK_ERROR_ACTION_STOP
: BLOCK_ERROR_ACTION_REPORT
;
3547 case BLOCKDEV_ON_ERROR_STOP
:
3548 return BLOCK_ERROR_ACTION_STOP
;
3549 case BLOCKDEV_ON_ERROR_REPORT
:
3550 return BLOCK_ERROR_ACTION_REPORT
;
3551 case BLOCKDEV_ON_ERROR_IGNORE
:
3552 return BLOCK_ERROR_ACTION_IGNORE
;
3558 static void send_qmp_error_event(BlockDriverState
*bs
,
3559 BlockErrorAction action
,
3560 bool is_read
, int error
)
3562 IoOperationType optype
;
3564 optype
= is_read
? IO_OPERATION_TYPE_READ
: IO_OPERATION_TYPE_WRITE
;
3565 qapi_event_send_block_io_error(bdrv_get_device_name(bs
), optype
, action
,
3566 bdrv_iostatus_is_enabled(bs
),
3567 error
== ENOSPC
, strerror(error
),
3571 /* This is done by device models because, while the block layer knows
3572 * about the error, it does not know whether an operation comes from
3573 * the device or the block layer (from a job, for example).
3575 void bdrv_error_action(BlockDriverState
*bs
, BlockErrorAction action
,
3576 bool is_read
, int error
)
3580 if (action
== BLOCK_ERROR_ACTION_STOP
) {
3581 /* First set the iostatus, so that "info block" returns an iostatus
3582 * that matches the events raised so far (an additional error iostatus
3583 * is fine, but not a lost one).
3585 bdrv_iostatus_set_err(bs
, error
);
3587 /* Then raise the request to stop the VM and the event.
3588 * qemu_system_vmstop_request_prepare has two effects. First,
3589 * it ensures that the STOP event always comes after the
3590 * BLOCK_IO_ERROR event. Second, it ensures that even if management
3591 * can observe the STOP event and do a "cont" before the STOP
3592 * event is issued, the VM will not stop. In this case, vm_start()
3593 * also ensures that the STOP/RESUME pair of events is emitted.
3595 qemu_system_vmstop_request_prepare();
3596 send_qmp_error_event(bs
, action
, is_read
, error
);
3597 qemu_system_vmstop_request(RUN_STATE_IO_ERROR
);
3599 send_qmp_error_event(bs
, action
, is_read
, error
);
3603 int bdrv_is_read_only(BlockDriverState
*bs
)
3605 return bs
->read_only
;
3608 int bdrv_is_sg(BlockDriverState
*bs
)
3613 int bdrv_enable_write_cache(BlockDriverState
*bs
)
3615 return bs
->enable_write_cache
;
3618 void bdrv_set_enable_write_cache(BlockDriverState
*bs
, bool wce
)
3620 bs
->enable_write_cache
= wce
;
3622 /* so a reopen() will preserve wce */
3624 bs
->open_flags
|= BDRV_O_CACHE_WB
;
3626 bs
->open_flags
&= ~BDRV_O_CACHE_WB
;
3630 int bdrv_is_encrypted(BlockDriverState
*bs
)
3632 if (bs
->backing_hd
&& bs
->backing_hd
->encrypted
)
3634 return bs
->encrypted
;
3637 int bdrv_key_required(BlockDriverState
*bs
)
3639 BlockDriverState
*backing_hd
= bs
->backing_hd
;
3641 if (backing_hd
&& backing_hd
->encrypted
&& !backing_hd
->valid_key
)
3643 return (bs
->encrypted
&& !bs
->valid_key
);
3646 int bdrv_set_key(BlockDriverState
*bs
, const char *key
)
3649 if (bs
->backing_hd
&& bs
->backing_hd
->encrypted
) {
3650 ret
= bdrv_set_key(bs
->backing_hd
, key
);
3656 if (!bs
->encrypted
) {
3658 } else if (!bs
->drv
|| !bs
->drv
->bdrv_set_key
) {
3661 ret
= bs
->drv
->bdrv_set_key(bs
, key
);
3664 } else if (!bs
->valid_key
) {
3667 /* call the change callback now, we skipped it on open */
3668 blk_dev_change_media_cb(bs
->blk
, true);
3674 const char *bdrv_get_format_name(BlockDriverState
*bs
)
3676 return bs
->drv
? bs
->drv
->format_name
: NULL
;
3679 static int qsort_strcmp(const void *a
, const void *b
)
3681 return strcmp(a
, b
);
3684 void bdrv_iterate_format(void (*it
)(void *opaque
, const char *name
),
3690 const char **formats
= NULL
;
3692 QLIST_FOREACH(drv
, &bdrv_drivers
, list
) {
3693 if (drv
->format_name
) {
3696 while (formats
&& i
&& !found
) {
3697 found
= !strcmp(formats
[--i
], drv
->format_name
);
3701 formats
= g_renew(const char *, formats
, count
+ 1);
3702 formats
[count
++] = drv
->format_name
;
3707 qsort(formats
, count
, sizeof(formats
[0]), qsort_strcmp
);
3709 for (i
= 0; i
< count
; i
++) {
3710 it(opaque
, formats
[i
]);
3716 /* This function is to find block backend bs */
3717 /* TODO convert callers to blk_by_name(), then remove */
3718 BlockDriverState
*bdrv_find(const char *name
)
3720 BlockBackend
*blk
= blk_by_name(name
);
3722 return blk
? blk_bs(blk
) : NULL
;
3725 /* This function is to find a node in the bs graph */
3726 BlockDriverState
*bdrv_find_node(const char *node_name
)
3728 BlockDriverState
*bs
;
3732 QTAILQ_FOREACH(bs
, &graph_bdrv_states
, node_list
) {
3733 if (!strcmp(node_name
, bs
->node_name
)) {
3740 /* Put this QMP function here so it can access the static graph_bdrv_states. */
3741 BlockDeviceInfoList
*bdrv_named_nodes_list(void)
3743 BlockDeviceInfoList
*list
, *entry
;
3744 BlockDriverState
*bs
;
3747 QTAILQ_FOREACH(bs
, &graph_bdrv_states
, node_list
) {
3748 entry
= g_malloc0(sizeof(*entry
));
3749 entry
->value
= bdrv_block_device_info(bs
);
3757 BlockDriverState
*bdrv_lookup_bs(const char *device
,
3758 const char *node_name
,
3762 BlockDriverState
*bs
;
3765 blk
= blk_by_name(device
);
3773 bs
= bdrv_find_node(node_name
);
3780 error_setg(errp
, "Cannot find device=%s nor node_name=%s",
3781 device
? device
: "",
3782 node_name
? node_name
: "");
3786 /* If 'base' is in the same chain as 'top', return true. Otherwise,
3787 * return false. If either argument is NULL, return false. */
3788 bool bdrv_chain_contains(BlockDriverState
*top
, BlockDriverState
*base
)
3790 while (top
&& top
!= base
) {
3791 top
= top
->backing_hd
;
3797 BlockDriverState
*bdrv_next(BlockDriverState
*bs
)
3800 return QTAILQ_FIRST(&bdrv_states
);
3802 return QTAILQ_NEXT(bs
, device_list
);
3805 /* TODO check what callers really want: bs->node_name or blk_name() */
3806 const char *bdrv_get_device_name(const BlockDriverState
*bs
)
3808 return bs
->blk
? blk_name(bs
->blk
) : "";
3811 int bdrv_get_flags(BlockDriverState
*bs
)
3813 return bs
->open_flags
;
3816 int bdrv_flush_all(void)
3818 BlockDriverState
*bs
;
3821 QTAILQ_FOREACH(bs
, &bdrv_states
, device_list
) {
3822 AioContext
*aio_context
= bdrv_get_aio_context(bs
);
3825 aio_context_acquire(aio_context
);
3826 ret
= bdrv_flush(bs
);
3827 if (ret
< 0 && !result
) {
3830 aio_context_release(aio_context
);
3836 int bdrv_has_zero_init_1(BlockDriverState
*bs
)
3841 int bdrv_has_zero_init(BlockDriverState
*bs
)
3845 /* If BS is a copy on write image, it is initialized to
3846 the contents of the base image, which may not be zeroes. */
3847 if (bs
->backing_hd
) {
3850 if (bs
->drv
->bdrv_has_zero_init
) {
3851 return bs
->drv
->bdrv_has_zero_init(bs
);
3858 bool bdrv_unallocated_blocks_are_zero(BlockDriverState
*bs
)
3860 BlockDriverInfo bdi
;
3862 if (bs
->backing_hd
) {
3866 if (bdrv_get_info(bs
, &bdi
) == 0) {
3867 return bdi
.unallocated_blocks_are_zero
;
3873 bool bdrv_can_write_zeroes_with_unmap(BlockDriverState
*bs
)
3875 BlockDriverInfo bdi
;
3877 if (bs
->backing_hd
|| !(bs
->open_flags
& BDRV_O_UNMAP
)) {
3881 if (bdrv_get_info(bs
, &bdi
) == 0) {
3882 return bdi
.can_write_zeroes_with_unmap
;
3888 typedef struct BdrvCoGetBlockStatusData
{
3889 BlockDriverState
*bs
;
3890 BlockDriverState
*base
;
3896 } BdrvCoGetBlockStatusData
;
3899 * Returns the allocation status of the specified sectors.
3900 * Drivers not implementing the functionality are assumed to not support
3901 * backing files, hence all their sectors are reported as allocated.
3903 * If 'sector_num' is beyond the end of the disk image the return value is 0
3904 * and 'pnum' is set to 0.
3906 * 'pnum' is set to the number of sectors (including and immediately following
3907 * the specified sector) that are known to be in the same
3908 * allocated/unallocated state.
3910 * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
3911 * beyond the end of the disk image it will be clamped.
3913 static int64_t coroutine_fn
bdrv_co_get_block_status(BlockDriverState
*bs
,
3915 int nb_sectors
, int *pnum
)
3917 int64_t total_sectors
;
3921 total_sectors
= bdrv_nb_sectors(bs
);
3922 if (total_sectors
< 0) {
3923 return total_sectors
;
3926 if (sector_num
>= total_sectors
) {
3931 n
= total_sectors
- sector_num
;
3932 if (n
< nb_sectors
) {
3936 if (!bs
->drv
->bdrv_co_get_block_status
) {
3938 ret
= BDRV_BLOCK_DATA
| BDRV_BLOCK_ALLOCATED
;
3939 if (bs
->drv
->protocol_name
) {
3940 ret
|= BDRV_BLOCK_OFFSET_VALID
| (sector_num
* BDRV_SECTOR_SIZE
);
3945 ret
= bs
->drv
->bdrv_co_get_block_status(bs
, sector_num
, nb_sectors
, pnum
);
3951 if (ret
& BDRV_BLOCK_RAW
) {
3952 assert(ret
& BDRV_BLOCK_OFFSET_VALID
);
3953 return bdrv_get_block_status(bs
->file
, ret
>> BDRV_SECTOR_BITS
,
3957 if (ret
& (BDRV_BLOCK_DATA
| BDRV_BLOCK_ZERO
)) {
3958 ret
|= BDRV_BLOCK_ALLOCATED
;
3961 if (!(ret
& BDRV_BLOCK_DATA
) && !(ret
& BDRV_BLOCK_ZERO
)) {
3962 if (bdrv_unallocated_blocks_are_zero(bs
)) {
3963 ret
|= BDRV_BLOCK_ZERO
;
3964 } else if (bs
->backing_hd
) {
3965 BlockDriverState
*bs2
= bs
->backing_hd
;
3966 int64_t nb_sectors2
= bdrv_nb_sectors(bs2
);
3967 if (nb_sectors2
>= 0 && sector_num
>= nb_sectors2
) {
3968 ret
|= BDRV_BLOCK_ZERO
;
3974 (ret
& BDRV_BLOCK_DATA
) && !(ret
& BDRV_BLOCK_ZERO
) &&
3975 (ret
& BDRV_BLOCK_OFFSET_VALID
)) {
3978 ret2
= bdrv_co_get_block_status(bs
->file
, ret
>> BDRV_SECTOR_BITS
,
3981 /* Ignore errors. This is just providing extra information, it
3982 * is useful but not necessary.
3985 /* !file_pnum indicates an offset at or beyond the EOF; it is
3986 * perfectly valid for the format block driver to point to such
3987 * offsets, so catch it and mark everything as zero */
3988 ret
|= BDRV_BLOCK_ZERO
;
3990 /* Limit request to the range reported by the protocol driver */
3992 ret
|= (ret2
& BDRV_BLOCK_ZERO
);
4000 /* Coroutine wrapper for bdrv_get_block_status() */
4001 static void coroutine_fn
bdrv_get_block_status_co_entry(void *opaque
)
4003 BdrvCoGetBlockStatusData
*data
= opaque
;
4004 BlockDriverState
*bs
= data
->bs
;
4006 data
->ret
= bdrv_co_get_block_status(bs
, data
->sector_num
, data
->nb_sectors
,
4012 * Synchronous wrapper around bdrv_co_get_block_status().
4014 * See bdrv_co_get_block_status() for details.
4016 int64_t bdrv_get_block_status(BlockDriverState
*bs
, int64_t sector_num
,
4017 int nb_sectors
, int *pnum
)
4020 BdrvCoGetBlockStatusData data
= {
4022 .sector_num
= sector_num
,
4023 .nb_sectors
= nb_sectors
,
4028 if (qemu_in_coroutine()) {
4029 /* Fast-path if already in coroutine context */
4030 bdrv_get_block_status_co_entry(&data
);
4032 AioContext
*aio_context
= bdrv_get_aio_context(bs
);
4034 co
= qemu_coroutine_create(bdrv_get_block_status_co_entry
);
4035 qemu_coroutine_enter(co
, &data
);
4036 while (!data
.done
) {
4037 aio_poll(aio_context
, true);
4043 int coroutine_fn
bdrv_is_allocated(BlockDriverState
*bs
, int64_t sector_num
,
4044 int nb_sectors
, int *pnum
)
4046 int64_t ret
= bdrv_get_block_status(bs
, sector_num
, nb_sectors
, pnum
);
4050 return !!(ret
& BDRV_BLOCK_ALLOCATED
);
4054 * Given an image chain: ... -> [BASE] -> [INTER1] -> [INTER2] -> [TOP]
4056 * Return true if the given sector is allocated in any image between
4057 * BASE and TOP (inclusive). BASE can be NULL to check if the given
4058 * sector is allocated in any image of the chain. Return false otherwise.
4060 * 'pnum' is set to the number of sectors (including and immediately following
4061 * the specified sector) that are known to be in the same
4062 * allocated/unallocated state.
4065 int bdrv_is_allocated_above(BlockDriverState
*top
,
4066 BlockDriverState
*base
,
4068 int nb_sectors
, int *pnum
)
4070 BlockDriverState
*intermediate
;
4071 int ret
, n
= nb_sectors
;
4074 while (intermediate
&& intermediate
!= base
) {
4076 ret
= bdrv_is_allocated(intermediate
, sector_num
, nb_sectors
,
4086 * [sector_num, nb_sectors] is unallocated on top but intermediate
4089 * [sector_num+x, nr_sectors] allocated.
4091 if (n
> pnum_inter
&&
4092 (intermediate
== top
||
4093 sector_num
+ pnum_inter
< intermediate
->total_sectors
)) {
4097 intermediate
= intermediate
->backing_hd
;
4104 const char *bdrv_get_encrypted_filename(BlockDriverState
*bs
)
4106 if (bs
->backing_hd
&& bs
->backing_hd
->encrypted
)
4107 return bs
->backing_file
;
4108 else if (bs
->encrypted
)
4109 return bs
->filename
;
4114 void bdrv_get_backing_filename(BlockDriverState
*bs
,
4115 char *filename
, int filename_size
)
4117 pstrcpy(filename
, filename_size
, bs
->backing_file
);
4120 int bdrv_write_compressed(BlockDriverState
*bs
, int64_t sector_num
,
4121 const uint8_t *buf
, int nb_sectors
)
4123 BlockDriver
*drv
= bs
->drv
;
4126 if (!drv
->bdrv_write_compressed
)
4128 if (bdrv_check_request(bs
, sector_num
, nb_sectors
))
4131 assert(QLIST_EMPTY(&bs
->dirty_bitmaps
));
4133 return drv
->bdrv_write_compressed(bs
, sector_num
, buf
, nb_sectors
);
4136 int bdrv_get_info(BlockDriverState
*bs
, BlockDriverInfo
*bdi
)
4138 BlockDriver
*drv
= bs
->drv
;
4141 if (!drv
->bdrv_get_info
)
4143 memset(bdi
, 0, sizeof(*bdi
));
4144 return drv
->bdrv_get_info(bs
, bdi
);
4147 ImageInfoSpecific
*bdrv_get_specific_info(BlockDriverState
*bs
)
4149 BlockDriver
*drv
= bs
->drv
;
4150 if (drv
&& drv
->bdrv_get_specific_info
) {
4151 return drv
->bdrv_get_specific_info(bs
);
4156 int bdrv_save_vmstate(BlockDriverState
*bs
, const uint8_t *buf
,
4157 int64_t pos
, int size
)
4160 struct iovec iov
= {
4161 .iov_base
= (void *) buf
,
4165 qemu_iovec_init_external(&qiov
, &iov
, 1);
4166 return bdrv_writev_vmstate(bs
, &qiov
, pos
);
4169 int bdrv_writev_vmstate(BlockDriverState
*bs
, QEMUIOVector
*qiov
, int64_t pos
)
4171 BlockDriver
*drv
= bs
->drv
;
4175 } else if (drv
->bdrv_save_vmstate
) {
4176 return drv
->bdrv_save_vmstate(bs
, qiov
, pos
);
4177 } else if (bs
->file
) {
4178 return bdrv_writev_vmstate(bs
->file
, qiov
, pos
);
4184 int bdrv_load_vmstate(BlockDriverState
*bs
, uint8_t *buf
,
4185 int64_t pos
, int size
)
4187 BlockDriver
*drv
= bs
->drv
;
4190 if (drv
->bdrv_load_vmstate
)
4191 return drv
->bdrv_load_vmstate(bs
, buf
, pos
, size
);
4193 return bdrv_load_vmstate(bs
->file
, buf
, pos
, size
);
4197 void bdrv_debug_event(BlockDriverState
*bs
, BlkDebugEvent event
)
4199 if (!bs
|| !bs
->drv
|| !bs
->drv
->bdrv_debug_event
) {
4203 bs
->drv
->bdrv_debug_event(bs
, event
);
4206 int bdrv_debug_breakpoint(BlockDriverState
*bs
, const char *event
,
4209 while (bs
&& bs
->drv
&& !bs
->drv
->bdrv_debug_breakpoint
) {
4213 if (bs
&& bs
->drv
&& bs
->drv
->bdrv_debug_breakpoint
) {
4214 return bs
->drv
->bdrv_debug_breakpoint(bs
, event
, tag
);
4220 int bdrv_debug_remove_breakpoint(BlockDriverState
*bs
, const char *tag
)
4222 while (bs
&& bs
->drv
&& !bs
->drv
->bdrv_debug_remove_breakpoint
) {
4226 if (bs
&& bs
->drv
&& bs
->drv
->bdrv_debug_remove_breakpoint
) {
4227 return bs
->drv
->bdrv_debug_remove_breakpoint(bs
, tag
);
4233 int bdrv_debug_resume(BlockDriverState
*bs
, const char *tag
)
4235 while (bs
&& (!bs
->drv
|| !bs
->drv
->bdrv_debug_resume
)) {
4239 if (bs
&& bs
->drv
&& bs
->drv
->bdrv_debug_resume
) {
4240 return bs
->drv
->bdrv_debug_resume(bs
, tag
);
4246 bool bdrv_debug_is_suspended(BlockDriverState
*bs
, const char *tag
)
4248 while (bs
&& bs
->drv
&& !bs
->drv
->bdrv_debug_is_suspended
) {
4252 if (bs
&& bs
->drv
&& bs
->drv
->bdrv_debug_is_suspended
) {
4253 return bs
->drv
->bdrv_debug_is_suspended(bs
, tag
);
4259 int bdrv_is_snapshot(BlockDriverState
*bs
)
4261 return !!(bs
->open_flags
& BDRV_O_SNAPSHOT
);
4264 /* backing_file can either be relative, or absolute, or a protocol. If it is
4265 * relative, it must be relative to the chain. So, passing in bs->filename
4266 * from a BDS as backing_file should not be done, as that may be relative to
4267 * the CWD rather than the chain. */
4268 BlockDriverState
*bdrv_find_backing_image(BlockDriverState
*bs
,
4269 const char *backing_file
)
4271 char *filename_full
= NULL
;
4272 char *backing_file_full
= NULL
;
4273 char *filename_tmp
= NULL
;
4274 int is_protocol
= 0;
4275 BlockDriverState
*curr_bs
= NULL
;
4276 BlockDriverState
*retval
= NULL
;
4278 if (!bs
|| !bs
->drv
|| !backing_file
) {
4282 filename_full
= g_malloc(PATH_MAX
);
4283 backing_file_full
= g_malloc(PATH_MAX
);
4284 filename_tmp
= g_malloc(PATH_MAX
);
4286 is_protocol
= path_has_protocol(backing_file
);
4288 for (curr_bs
= bs
; curr_bs
->backing_hd
; curr_bs
= curr_bs
->backing_hd
) {
4290 /* If either of the filename paths is actually a protocol, then
4291 * compare unmodified paths; otherwise make paths relative */
4292 if (is_protocol
|| path_has_protocol(curr_bs
->backing_file
)) {
4293 if (strcmp(backing_file
, curr_bs
->backing_file
) == 0) {
4294 retval
= curr_bs
->backing_hd
;
4298 /* If not an absolute filename path, make it relative to the current
4299 * image's filename path */
4300 path_combine(filename_tmp
, PATH_MAX
, curr_bs
->filename
,
4303 /* We are going to compare absolute pathnames */
4304 if (!realpath(filename_tmp
, filename_full
)) {
4308 /* We need to make sure the backing filename we are comparing against
4309 * is relative to the current image filename (or absolute) */
4310 path_combine(filename_tmp
, PATH_MAX
, curr_bs
->filename
,
4311 curr_bs
->backing_file
);
4313 if (!realpath(filename_tmp
, backing_file_full
)) {
4317 if (strcmp(backing_file_full
, filename_full
) == 0) {
4318 retval
= curr_bs
->backing_hd
;
4324 g_free(filename_full
);
4325 g_free(backing_file_full
);
4326 g_free(filename_tmp
);
4330 int bdrv_get_backing_file_depth(BlockDriverState
*bs
)
4336 if (!bs
->backing_hd
) {
4340 return 1 + bdrv_get_backing_file_depth(bs
->backing_hd
);
4343 /**************************************************************/
4346 BlockAIOCB
*bdrv_aio_readv(BlockDriverState
*bs
, int64_t sector_num
,
4347 QEMUIOVector
*qiov
, int nb_sectors
,
4348 BlockCompletionFunc
*cb
, void *opaque
)
4350 trace_bdrv_aio_readv(bs
, sector_num
, nb_sectors
, opaque
);
4352 return bdrv_co_aio_rw_vector(bs
, sector_num
, qiov
, nb_sectors
, 0,
4356 BlockAIOCB
*bdrv_aio_writev(BlockDriverState
*bs
, int64_t sector_num
,
4357 QEMUIOVector
*qiov
, int nb_sectors
,
4358 BlockCompletionFunc
*cb
, void *opaque
)
4360 trace_bdrv_aio_writev(bs
, sector_num
, nb_sectors
, opaque
);
4362 return bdrv_co_aio_rw_vector(bs
, sector_num
, qiov
, nb_sectors
, 0,
4366 BlockAIOCB
*bdrv_aio_write_zeroes(BlockDriverState
*bs
,
4367 int64_t sector_num
, int nb_sectors
, BdrvRequestFlags flags
,
4368 BlockCompletionFunc
*cb
, void *opaque
)
4370 trace_bdrv_aio_write_zeroes(bs
, sector_num
, nb_sectors
, flags
, opaque
);
4372 return bdrv_co_aio_rw_vector(bs
, sector_num
, NULL
, nb_sectors
,
4373 BDRV_REQ_ZERO_WRITE
| flags
,
4378 typedef struct MultiwriteCB
{
4383 BlockCompletionFunc
*cb
;
4385 QEMUIOVector
*free_qiov
;
4389 static void multiwrite_user_cb(MultiwriteCB
*mcb
)
4393 for (i
= 0; i
< mcb
->num_callbacks
; i
++) {
4394 mcb
->callbacks
[i
].cb(mcb
->callbacks
[i
].opaque
, mcb
->error
);
4395 if (mcb
->callbacks
[i
].free_qiov
) {
4396 qemu_iovec_destroy(mcb
->callbacks
[i
].free_qiov
);
4398 g_free(mcb
->callbacks
[i
].free_qiov
);
4402 static void multiwrite_cb(void *opaque
, int ret
)
4404 MultiwriteCB
*mcb
= opaque
;
4406 trace_multiwrite_cb(mcb
, ret
);
4408 if (ret
< 0 && !mcb
->error
) {
4412 mcb
->num_requests
--;
4413 if (mcb
->num_requests
== 0) {
4414 multiwrite_user_cb(mcb
);
4419 static int multiwrite_req_compare(const void *a
, const void *b
)
4421 const BlockRequest
*req1
= a
, *req2
= b
;
4424 * Note that we can't simply subtract req2->sector from req1->sector
4425 * here as that could overflow the return value.
4427 if (req1
->sector
> req2
->sector
) {
4429 } else if (req1
->sector
< req2
->sector
) {
4437 * Takes a bunch of requests and tries to merge them. Returns the number of
4438 * requests that remain after merging.
4440 static int multiwrite_merge(BlockDriverState
*bs
, BlockRequest
*reqs
,
4441 int num_reqs
, MultiwriteCB
*mcb
)
4445 // Sort requests by start sector
4446 qsort(reqs
, num_reqs
, sizeof(*reqs
), &multiwrite_req_compare
);
4448 // Check if adjacent requests touch the same clusters. If so, combine them,
4449 // filling up gaps with zero sectors.
4451 for (i
= 1; i
< num_reqs
; i
++) {
4453 int64_t oldreq_last
= reqs
[outidx
].sector
+ reqs
[outidx
].nb_sectors
;
4455 // Handle exactly sequential writes and overlapping writes.
4456 if (reqs
[i
].sector
<= oldreq_last
) {
4460 if (reqs
[outidx
].qiov
->niov
+ reqs
[i
].qiov
->niov
+ 1 > IOV_MAX
) {
4464 if (bs
->bl
.max_transfer_length
&& reqs
[outidx
].nb_sectors
+
4465 reqs
[i
].nb_sectors
> bs
->bl
.max_transfer_length
) {
4471 QEMUIOVector
*qiov
= g_malloc0(sizeof(*qiov
));
4472 qemu_iovec_init(qiov
,
4473 reqs
[outidx
].qiov
->niov
+ reqs
[i
].qiov
->niov
+ 1);
4475 // Add the first request to the merged one. If the requests are
4476 // overlapping, drop the last sectors of the first request.
4477 size
= (reqs
[i
].sector
- reqs
[outidx
].sector
) << 9;
4478 qemu_iovec_concat(qiov
, reqs
[outidx
].qiov
, 0, size
);
4480 // We should need to add any zeros between the two requests
4481 assert (reqs
[i
].sector
<= oldreq_last
);
4483 // Add the second request
4484 qemu_iovec_concat(qiov
, reqs
[i
].qiov
, 0, reqs
[i
].qiov
->size
);
4486 // Add tail of first request, if necessary
4487 if (qiov
->size
< reqs
[outidx
].qiov
->size
) {
4488 qemu_iovec_concat(qiov
, reqs
[outidx
].qiov
, qiov
->size
,
4489 reqs
[outidx
].qiov
->size
- qiov
->size
);
4492 reqs
[outidx
].nb_sectors
= qiov
->size
>> 9;
4493 reqs
[outidx
].qiov
= qiov
;
4495 mcb
->callbacks
[i
].free_qiov
= reqs
[outidx
].qiov
;
4498 reqs
[outidx
].sector
= reqs
[i
].sector
;
4499 reqs
[outidx
].nb_sectors
= reqs
[i
].nb_sectors
;
4500 reqs
[outidx
].qiov
= reqs
[i
].qiov
;
4508 * Submit multiple AIO write requests at once.
4510 * On success, the function returns 0 and all requests in the reqs array have
4511 * been submitted. In error case this function returns -1, and any of the
4512 * requests may or may not be submitted yet. In particular, this means that the
4513 * callback will be called for some of the requests, for others it won't. The
4514 * caller must check the error field of the BlockRequest to wait for the right
4515 * callbacks (if error != 0, no callback will be called).
4517 * The implementation may modify the contents of the reqs array, e.g. to merge
4518 * requests. However, the fields opaque and error are left unmodified as they
4519 * are used to signal failure for a single request to the caller.
4521 int bdrv_aio_multiwrite(BlockDriverState
*bs
, BlockRequest
*reqs
, int num_reqs
)
4526 /* don't submit writes if we don't have a medium */
4527 if (bs
->drv
== NULL
) {
4528 for (i
= 0; i
< num_reqs
; i
++) {
4529 reqs
[i
].error
= -ENOMEDIUM
;
4534 if (num_reqs
== 0) {
4538 // Create MultiwriteCB structure
4539 mcb
= g_malloc0(sizeof(*mcb
) + num_reqs
* sizeof(*mcb
->callbacks
));
4540 mcb
->num_requests
= 0;
4541 mcb
->num_callbacks
= num_reqs
;
4543 for (i
= 0; i
< num_reqs
; i
++) {
4544 mcb
->callbacks
[i
].cb
= reqs
[i
].cb
;
4545 mcb
->callbacks
[i
].opaque
= reqs
[i
].opaque
;
4548 // Check for mergable requests
4549 num_reqs
= multiwrite_merge(bs
, reqs
, num_reqs
, mcb
);
4551 trace_bdrv_aio_multiwrite(mcb
, mcb
->num_callbacks
, num_reqs
);
4553 /* Run the aio requests. */
4554 mcb
->num_requests
= num_reqs
;
4555 for (i
= 0; i
< num_reqs
; i
++) {
4556 bdrv_co_aio_rw_vector(bs
, reqs
[i
].sector
, reqs
[i
].qiov
,
4557 reqs
[i
].nb_sectors
, reqs
[i
].flags
,
4565 void bdrv_aio_cancel(BlockAIOCB
*acb
)
4568 bdrv_aio_cancel_async(acb
);
4569 while (acb
->refcnt
> 1) {
4570 if (acb
->aiocb_info
->get_aio_context
) {
4571 aio_poll(acb
->aiocb_info
->get_aio_context(acb
), true);
4572 } else if (acb
->bs
) {
4573 aio_poll(bdrv_get_aio_context(acb
->bs
), true);
4578 qemu_aio_unref(acb
);
4581 /* Async version of aio cancel. The caller is not blocked if the acb implements
4582 * cancel_async, otherwise we do nothing and let the request normally complete.
4583 * In either case the completion callback must be called. */
4584 void bdrv_aio_cancel_async(BlockAIOCB
*acb
)
4586 if (acb
->aiocb_info
->cancel_async
) {
4587 acb
->aiocb_info
->cancel_async(acb
);
4591 /**************************************************************/
4592 /* async block device emulation */
4594 typedef struct BlockAIOCBSync
{
4598 /* vector translation state */
4604 static const AIOCBInfo bdrv_em_aiocb_info
= {
4605 .aiocb_size
= sizeof(BlockAIOCBSync
),
4608 static void bdrv_aio_bh_cb(void *opaque
)
4610 BlockAIOCBSync
*acb
= opaque
;
4612 if (!acb
->is_write
&& acb
->ret
>= 0) {
4613 qemu_iovec_from_buf(acb
->qiov
, 0, acb
->bounce
, acb
->qiov
->size
);
4615 qemu_vfree(acb
->bounce
);
4616 acb
->common
.cb(acb
->common
.opaque
, acb
->ret
);
4617 qemu_bh_delete(acb
->bh
);
4619 qemu_aio_unref(acb
);
4622 static BlockAIOCB
*bdrv_aio_rw_vector(BlockDriverState
*bs
,
4626 BlockCompletionFunc
*cb
,
4631 BlockAIOCBSync
*acb
;
4633 acb
= qemu_aio_get(&bdrv_em_aiocb_info
, bs
, cb
, opaque
);
4634 acb
->is_write
= is_write
;
4636 acb
->bounce
= qemu_try_blockalign(bs
, qiov
->size
);
4637 acb
->bh
= aio_bh_new(bdrv_get_aio_context(bs
), bdrv_aio_bh_cb
, acb
);
4639 if (acb
->bounce
== NULL
) {
4641 } else if (is_write
) {
4642 qemu_iovec_to_buf(acb
->qiov
, 0, acb
->bounce
, qiov
->size
);
4643 acb
->ret
= bs
->drv
->bdrv_write(bs
, sector_num
, acb
->bounce
, nb_sectors
);
4645 acb
->ret
= bs
->drv
->bdrv_read(bs
, sector_num
, acb
->bounce
, nb_sectors
);
4648 qemu_bh_schedule(acb
->bh
);
4650 return &acb
->common
;
4653 static BlockAIOCB
*bdrv_aio_readv_em(BlockDriverState
*bs
,
4654 int64_t sector_num
, QEMUIOVector
*qiov
, int nb_sectors
,
4655 BlockCompletionFunc
*cb
, void *opaque
)
4657 return bdrv_aio_rw_vector(bs
, sector_num
, qiov
, nb_sectors
, cb
, opaque
, 0);
4660 static BlockAIOCB
*bdrv_aio_writev_em(BlockDriverState
*bs
,
4661 int64_t sector_num
, QEMUIOVector
*qiov
, int nb_sectors
,
4662 BlockCompletionFunc
*cb
, void *opaque
)
4664 return bdrv_aio_rw_vector(bs
, sector_num
, qiov
, nb_sectors
, cb
, opaque
, 1);
4668 typedef struct BlockAIOCBCoroutine
{
4674 } BlockAIOCBCoroutine
;
4676 static const AIOCBInfo bdrv_em_co_aiocb_info
= {
4677 .aiocb_size
= sizeof(BlockAIOCBCoroutine
),
4680 static void bdrv_co_em_bh(void *opaque
)
4682 BlockAIOCBCoroutine
*acb
= opaque
;
4684 acb
->common
.cb(acb
->common
.opaque
, acb
->req
.error
);
4686 qemu_bh_delete(acb
->bh
);
4687 qemu_aio_unref(acb
);
4690 /* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
4691 static void coroutine_fn
bdrv_co_do_rw(void *opaque
)
4693 BlockAIOCBCoroutine
*acb
= opaque
;
4694 BlockDriverState
*bs
= acb
->common
.bs
;
4696 if (!acb
->is_write
) {
4697 acb
->req
.error
= bdrv_co_do_readv(bs
, acb
->req
.sector
,
4698 acb
->req
.nb_sectors
, acb
->req
.qiov
, acb
->req
.flags
);
4700 acb
->req
.error
= bdrv_co_do_writev(bs
, acb
->req
.sector
,
4701 acb
->req
.nb_sectors
, acb
->req
.qiov
, acb
->req
.flags
);
4704 acb
->bh
= aio_bh_new(bdrv_get_aio_context(bs
), bdrv_co_em_bh
, acb
);
4705 qemu_bh_schedule(acb
->bh
);
4708 static BlockAIOCB
*bdrv_co_aio_rw_vector(BlockDriverState
*bs
,
4712 BdrvRequestFlags flags
,
4713 BlockCompletionFunc
*cb
,
4718 BlockAIOCBCoroutine
*acb
;
4720 acb
= qemu_aio_get(&bdrv_em_co_aiocb_info
, bs
, cb
, opaque
);
4721 acb
->req
.sector
= sector_num
;
4722 acb
->req
.nb_sectors
= nb_sectors
;
4723 acb
->req
.qiov
= qiov
;
4724 acb
->req
.flags
= flags
;
4725 acb
->is_write
= is_write
;
4727 co
= qemu_coroutine_create(bdrv_co_do_rw
);
4728 qemu_coroutine_enter(co
, acb
);
4730 return &acb
->common
;
4733 static void coroutine_fn
bdrv_aio_flush_co_entry(void *opaque
)
4735 BlockAIOCBCoroutine
*acb
= opaque
;
4736 BlockDriverState
*bs
= acb
->common
.bs
;
4738 acb
->req
.error
= bdrv_co_flush(bs
);
4739 acb
->bh
= aio_bh_new(bdrv_get_aio_context(bs
), bdrv_co_em_bh
, acb
);
4740 qemu_bh_schedule(acb
->bh
);
4743 BlockAIOCB
*bdrv_aio_flush(BlockDriverState
*bs
,
4744 BlockCompletionFunc
*cb
, void *opaque
)
4746 trace_bdrv_aio_flush(bs
, opaque
);
4749 BlockAIOCBCoroutine
*acb
;
4751 acb
= qemu_aio_get(&bdrv_em_co_aiocb_info
, bs
, cb
, opaque
);
4753 co
= qemu_coroutine_create(bdrv_aio_flush_co_entry
);
4754 qemu_coroutine_enter(co
, acb
);
4756 return &acb
->common
;
4759 static void coroutine_fn
bdrv_aio_discard_co_entry(void *opaque
)
4761 BlockAIOCBCoroutine
*acb
= opaque
;
4762 BlockDriverState
*bs
= acb
->common
.bs
;
4764 acb
->req
.error
= bdrv_co_discard(bs
, acb
->req
.sector
, acb
->req
.nb_sectors
);
4765 acb
->bh
= aio_bh_new(bdrv_get_aio_context(bs
), bdrv_co_em_bh
, acb
);
4766 qemu_bh_schedule(acb
->bh
);
4769 BlockAIOCB
*bdrv_aio_discard(BlockDriverState
*bs
,
4770 int64_t sector_num
, int nb_sectors
,
4771 BlockCompletionFunc
*cb
, void *opaque
)
4774 BlockAIOCBCoroutine
*acb
;
4776 trace_bdrv_aio_discard(bs
, sector_num
, nb_sectors
, opaque
);
4778 acb
= qemu_aio_get(&bdrv_em_co_aiocb_info
, bs
, cb
, opaque
);
4779 acb
->req
.sector
= sector_num
;
4780 acb
->req
.nb_sectors
= nb_sectors
;
4781 co
= qemu_coroutine_create(bdrv_aio_discard_co_entry
);
4782 qemu_coroutine_enter(co
, acb
);
4784 return &acb
->common
;
4787 void bdrv_init(void)
4789 module_call_init(MODULE_INIT_BLOCK
);
4792 void bdrv_init_with_whitelist(void)
4794 use_bdrv_whitelist
= 1;
4798 void *qemu_aio_get(const AIOCBInfo
*aiocb_info
, BlockDriverState
*bs
,
4799 BlockCompletionFunc
*cb
, void *opaque
)
4803 acb
= g_slice_alloc(aiocb_info
->aiocb_size
);
4804 acb
->aiocb_info
= aiocb_info
;
4807 acb
->opaque
= opaque
;
4812 void qemu_aio_ref(void *p
)
4814 BlockAIOCB
*acb
= p
;
4818 void qemu_aio_unref(void *p
)
4820 BlockAIOCB
*acb
= p
;
4821 assert(acb
->refcnt
> 0);
4822 if (--acb
->refcnt
== 0) {
4823 g_slice_free1(acb
->aiocb_info
->aiocb_size
, acb
);
4827 /**************************************************************/
4828 /* Coroutine block device emulation */
4830 typedef struct CoroutineIOCompletion
{
4831 Coroutine
*coroutine
;
4833 } CoroutineIOCompletion
;
4835 static void bdrv_co_io_em_complete(void *opaque
, int ret
)
4837 CoroutineIOCompletion
*co
= opaque
;
4840 qemu_coroutine_enter(co
->coroutine
, NULL
);
4843 static int coroutine_fn
bdrv_co_io_em(BlockDriverState
*bs
, int64_t sector_num
,
4844 int nb_sectors
, QEMUIOVector
*iov
,
4847 CoroutineIOCompletion co
= {
4848 .coroutine
= qemu_coroutine_self(),
4853 acb
= bs
->drv
->bdrv_aio_writev(bs
, sector_num
, iov
, nb_sectors
,
4854 bdrv_co_io_em_complete
, &co
);
4856 acb
= bs
->drv
->bdrv_aio_readv(bs
, sector_num
, iov
, nb_sectors
,
4857 bdrv_co_io_em_complete
, &co
);
4860 trace_bdrv_co_io_em(bs
, sector_num
, nb_sectors
, is_write
, acb
);
4864 qemu_coroutine_yield();
4869 static int coroutine_fn
bdrv_co_readv_em(BlockDriverState
*bs
,
4870 int64_t sector_num
, int nb_sectors
,
4873 return bdrv_co_io_em(bs
, sector_num
, nb_sectors
, iov
, false);
4876 static int coroutine_fn
bdrv_co_writev_em(BlockDriverState
*bs
,
4877 int64_t sector_num
, int nb_sectors
,
4880 return bdrv_co_io_em(bs
, sector_num
, nb_sectors
, iov
, true);
4883 static void coroutine_fn
bdrv_flush_co_entry(void *opaque
)
4885 RwCo
*rwco
= opaque
;
4887 rwco
->ret
= bdrv_co_flush(rwco
->bs
);
4890 int coroutine_fn
bdrv_co_flush(BlockDriverState
*bs
)
4894 if (!bs
|| !bdrv_is_inserted(bs
) || bdrv_is_read_only(bs
)) {
4898 /* Write back cached data to the OS even with cache=unsafe */
4899 BLKDBG_EVENT(bs
->file
, BLKDBG_FLUSH_TO_OS
);
4900 if (bs
->drv
->bdrv_co_flush_to_os
) {
4901 ret
= bs
->drv
->bdrv_co_flush_to_os(bs
);
4907 /* But don't actually force it to the disk with cache=unsafe */
4908 if (bs
->open_flags
& BDRV_O_NO_FLUSH
) {
4912 BLKDBG_EVENT(bs
->file
, BLKDBG_FLUSH_TO_DISK
);
4913 if (bs
->drv
->bdrv_co_flush_to_disk
) {
4914 ret
= bs
->drv
->bdrv_co_flush_to_disk(bs
);
4915 } else if (bs
->drv
->bdrv_aio_flush
) {
4917 CoroutineIOCompletion co
= {
4918 .coroutine
= qemu_coroutine_self(),
4921 acb
= bs
->drv
->bdrv_aio_flush(bs
, bdrv_co_io_em_complete
, &co
);
4925 qemu_coroutine_yield();
4930 * Some block drivers always operate in either writethrough or unsafe
4931 * mode and don't support bdrv_flush therefore. Usually qemu doesn't
4932 * know how the server works (because the behaviour is hardcoded or
4933 * depends on server-side configuration), so we can't ensure that
4934 * everything is safe on disk. Returning an error doesn't work because
4935 * that would break guests even if the server operates in writethrough
4938 * Let's hope the user knows what he's doing.
4946 /* Now flush the underlying protocol. It will also have BDRV_O_NO_FLUSH
4947 * in the case of cache=unsafe, so there are no useless flushes.
4950 return bdrv_co_flush(bs
->file
);
4953 void bdrv_invalidate_cache(BlockDriverState
*bs
, Error
**errp
)
4955 Error
*local_err
= NULL
;
4962 if (!(bs
->open_flags
& BDRV_O_INCOMING
)) {
4965 bs
->open_flags
&= ~BDRV_O_INCOMING
;
4967 if (bs
->drv
->bdrv_invalidate_cache
) {
4968 bs
->drv
->bdrv_invalidate_cache(bs
, &local_err
);
4969 } else if (bs
->file
) {
4970 bdrv_invalidate_cache(bs
->file
, &local_err
);
4973 error_propagate(errp
, local_err
);
4977 ret
= refresh_total_sectors(bs
, bs
->total_sectors
);
4979 error_setg_errno(errp
, -ret
, "Could not refresh total sector count");
4984 void bdrv_invalidate_cache_all(Error
**errp
)
4986 BlockDriverState
*bs
;
4987 Error
*local_err
= NULL
;
4989 QTAILQ_FOREACH(bs
, &bdrv_states
, device_list
) {
4990 AioContext
*aio_context
= bdrv_get_aio_context(bs
);
4992 aio_context_acquire(aio_context
);
4993 bdrv_invalidate_cache(bs
, &local_err
);
4994 aio_context_release(aio_context
);
4996 error_propagate(errp
, local_err
);
5002 int bdrv_flush(BlockDriverState
*bs
)
5010 if (qemu_in_coroutine()) {
5011 /* Fast-path if already in coroutine context */
5012 bdrv_flush_co_entry(&rwco
);
5014 AioContext
*aio_context
= bdrv_get_aio_context(bs
);
5016 co
= qemu_coroutine_create(bdrv_flush_co_entry
);
5017 qemu_coroutine_enter(co
, &rwco
);
5018 while (rwco
.ret
== NOT_DONE
) {
5019 aio_poll(aio_context
, true);
5026 typedef struct DiscardCo
{
5027 BlockDriverState
*bs
;
5032 static void coroutine_fn
bdrv_discard_co_entry(void *opaque
)
5034 DiscardCo
*rwco
= opaque
;
5036 rwco
->ret
= bdrv_co_discard(rwco
->bs
, rwco
->sector_num
, rwco
->nb_sectors
);
5039 /* if no limit is specified in the BlockLimits use a default
5040 * of 32768 512-byte sectors (16 MiB) per request.
5042 #define MAX_DISCARD_DEFAULT 32768
5044 int coroutine_fn
bdrv_co_discard(BlockDriverState
*bs
, int64_t sector_num
,
5051 } else if (bdrv_check_request(bs
, sector_num
, nb_sectors
)) {
5053 } else if (bs
->read_only
) {
5057 bdrv_reset_dirty(bs
, sector_num
, nb_sectors
);
5059 /* Do nothing if disabled. */
5060 if (!(bs
->open_flags
& BDRV_O_UNMAP
)) {
5064 if (!bs
->drv
->bdrv_co_discard
&& !bs
->drv
->bdrv_aio_discard
) {
5068 max_discard
= bs
->bl
.max_discard
? bs
->bl
.max_discard
: MAX_DISCARD_DEFAULT
;
5069 while (nb_sectors
> 0) {
5071 int num
= nb_sectors
;
5074 if (bs
->bl
.discard_alignment
&&
5075 num
>= bs
->bl
.discard_alignment
&&
5076 sector_num
% bs
->bl
.discard_alignment
) {
5077 if (num
> bs
->bl
.discard_alignment
) {
5078 num
= bs
->bl
.discard_alignment
;
5080 num
-= sector_num
% bs
->bl
.discard_alignment
;
5083 /* limit request size */
5084 if (num
> max_discard
) {
5088 if (bs
->drv
->bdrv_co_discard
) {
5089 ret
= bs
->drv
->bdrv_co_discard(bs
, sector_num
, num
);
5092 CoroutineIOCompletion co
= {
5093 .coroutine
= qemu_coroutine_self(),
5096 acb
= bs
->drv
->bdrv_aio_discard(bs
, sector_num
, nb_sectors
,
5097 bdrv_co_io_em_complete
, &co
);
5101 qemu_coroutine_yield();
5105 if (ret
&& ret
!= -ENOTSUP
) {
5115 int bdrv_discard(BlockDriverState
*bs
, int64_t sector_num
, int nb_sectors
)
5120 .sector_num
= sector_num
,
5121 .nb_sectors
= nb_sectors
,
5125 if (qemu_in_coroutine()) {
5126 /* Fast-path if already in coroutine context */
5127 bdrv_discard_co_entry(&rwco
);
5129 AioContext
*aio_context
= bdrv_get_aio_context(bs
);
5131 co
= qemu_coroutine_create(bdrv_discard_co_entry
);
5132 qemu_coroutine_enter(co
, &rwco
);
5133 while (rwco
.ret
== NOT_DONE
) {
5134 aio_poll(aio_context
, true);
5141 /**************************************************************/
5142 /* removable device support */
5145 * Return TRUE if the media is present
5147 int bdrv_is_inserted(BlockDriverState
*bs
)
5149 BlockDriver
*drv
= bs
->drv
;
5153 if (!drv
->bdrv_is_inserted
)
5155 return drv
->bdrv_is_inserted(bs
);
5159 * Return whether the media changed since the last call to this
5160 * function, or -ENOTSUP if we don't know. Most drivers don't know.
5162 int bdrv_media_changed(BlockDriverState
*bs
)
5164 BlockDriver
*drv
= bs
->drv
;
5166 if (drv
&& drv
->bdrv_media_changed
) {
5167 return drv
->bdrv_media_changed(bs
);
5173 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
5175 void bdrv_eject(BlockDriverState
*bs
, bool eject_flag
)
5177 BlockDriver
*drv
= bs
->drv
;
5178 const char *device_name
;
5180 if (drv
&& drv
->bdrv_eject
) {
5181 drv
->bdrv_eject(bs
, eject_flag
);
5184 device_name
= bdrv_get_device_name(bs
);
5185 if (device_name
[0] != '\0') {
5186 qapi_event_send_device_tray_moved(device_name
,
5187 eject_flag
, &error_abort
);
5192 * Lock or unlock the media (if it is locked, the user won't be able
5193 * to eject it manually).
5195 void bdrv_lock_medium(BlockDriverState
*bs
, bool locked
)
5197 BlockDriver
*drv
= bs
->drv
;
5199 trace_bdrv_lock_medium(bs
, locked
);
5201 if (drv
&& drv
->bdrv_lock_medium
) {
5202 drv
->bdrv_lock_medium(bs
, locked
);
5206 /* needed for generic scsi interface */
5208 int bdrv_ioctl(BlockDriverState
*bs
, unsigned long int req
, void *buf
)
5210 BlockDriver
*drv
= bs
->drv
;
5212 if (drv
&& drv
->bdrv_ioctl
)
5213 return drv
->bdrv_ioctl(bs
, req
, buf
);
5217 BlockAIOCB
*bdrv_aio_ioctl(BlockDriverState
*bs
,
5218 unsigned long int req
, void *buf
,
5219 BlockCompletionFunc
*cb
, void *opaque
)
5221 BlockDriver
*drv
= bs
->drv
;
5223 if (drv
&& drv
->bdrv_aio_ioctl
)
5224 return drv
->bdrv_aio_ioctl(bs
, req
, buf
, cb
, opaque
);
5228 void bdrv_set_guest_block_size(BlockDriverState
*bs
, int align
)
5230 bs
->guest_block_size
= align
;
5233 void *qemu_blockalign(BlockDriverState
*bs
, size_t size
)
5235 return qemu_memalign(bdrv_opt_mem_align(bs
), size
);
5238 void *qemu_blockalign0(BlockDriverState
*bs
, size_t size
)
5240 return memset(qemu_blockalign(bs
, size
), 0, size
);
5243 void *qemu_try_blockalign(BlockDriverState
*bs
, size_t size
)
5245 size_t align
= bdrv_opt_mem_align(bs
);
5247 /* Ensure that NULL is never returned on success */
5253 return qemu_try_memalign(align
, size
);
5256 void *qemu_try_blockalign0(BlockDriverState
*bs
, size_t size
)
5258 void *mem
= qemu_try_blockalign(bs
, size
);
5261 memset(mem
, 0, size
);
5268 * Check if all memory in this vector is sector aligned.
5270 bool bdrv_qiov_is_aligned(BlockDriverState
*bs
, QEMUIOVector
*qiov
)
5273 size_t alignment
= bdrv_opt_mem_align(bs
);
5275 for (i
= 0; i
< qiov
->niov
; i
++) {
5276 if ((uintptr_t) qiov
->iov
[i
].iov_base
% alignment
) {
5279 if (qiov
->iov
[i
].iov_len
% alignment
) {
5287 BdrvDirtyBitmap
*bdrv_create_dirty_bitmap(BlockDriverState
*bs
, int granularity
,
5290 int64_t bitmap_size
;
5291 BdrvDirtyBitmap
*bitmap
;
5293 assert((granularity
& (granularity
- 1)) == 0);
5295 granularity
>>= BDRV_SECTOR_BITS
;
5296 assert(granularity
);
5297 bitmap_size
= bdrv_nb_sectors(bs
);
5298 if (bitmap_size
< 0) {
5299 error_setg_errno(errp
, -bitmap_size
, "could not get length of device");
5300 errno
= -bitmap_size
;
5303 bitmap
= g_new0(BdrvDirtyBitmap
, 1);
5304 bitmap
->bitmap
= hbitmap_alloc(bitmap_size
, ffs(granularity
) - 1);
5305 QLIST_INSERT_HEAD(&bs
->dirty_bitmaps
, bitmap
, list
);
5309 void bdrv_release_dirty_bitmap(BlockDriverState
*bs
, BdrvDirtyBitmap
*bitmap
)
5311 BdrvDirtyBitmap
*bm
, *next
;
5312 QLIST_FOREACH_SAFE(bm
, &bs
->dirty_bitmaps
, list
, next
) {
5314 QLIST_REMOVE(bitmap
, list
);
5315 hbitmap_free(bitmap
->bitmap
);
5322 BlockDirtyInfoList
*bdrv_query_dirty_bitmaps(BlockDriverState
*bs
)
5324 BdrvDirtyBitmap
*bm
;
5325 BlockDirtyInfoList
*list
= NULL
;
5326 BlockDirtyInfoList
**plist
= &list
;
5328 QLIST_FOREACH(bm
, &bs
->dirty_bitmaps
, list
) {
5329 BlockDirtyInfo
*info
= g_new0(BlockDirtyInfo
, 1);
5330 BlockDirtyInfoList
*entry
= g_new0(BlockDirtyInfoList
, 1);
5331 info
->count
= bdrv_get_dirty_count(bs
, bm
);
5333 ((int64_t) BDRV_SECTOR_SIZE
<< hbitmap_granularity(bm
->bitmap
));
5334 entry
->value
= info
;
5336 plist
= &entry
->next
;
5342 int bdrv_get_dirty(BlockDriverState
*bs
, BdrvDirtyBitmap
*bitmap
, int64_t sector
)
5345 return hbitmap_get(bitmap
->bitmap
, sector
);
5351 void bdrv_dirty_iter_init(BlockDriverState
*bs
,
5352 BdrvDirtyBitmap
*bitmap
, HBitmapIter
*hbi
)
5354 hbitmap_iter_init(hbi
, bitmap
->bitmap
, 0);
5357 void bdrv_set_dirty(BlockDriverState
*bs
, int64_t cur_sector
,
5360 BdrvDirtyBitmap
*bitmap
;
5361 QLIST_FOREACH(bitmap
, &bs
->dirty_bitmaps
, list
) {
5362 hbitmap_set(bitmap
->bitmap
, cur_sector
, nr_sectors
);
5366 void bdrv_reset_dirty(BlockDriverState
*bs
, int64_t cur_sector
, int nr_sectors
)
5368 BdrvDirtyBitmap
*bitmap
;
5369 QLIST_FOREACH(bitmap
, &bs
->dirty_bitmaps
, list
) {
5370 hbitmap_reset(bitmap
->bitmap
, cur_sector
, nr_sectors
);
5374 int64_t bdrv_get_dirty_count(BlockDriverState
*bs
, BdrvDirtyBitmap
*bitmap
)
5376 return hbitmap_count(bitmap
->bitmap
);
5379 /* Get a reference to bs */
5380 void bdrv_ref(BlockDriverState
*bs
)
5385 /* Release a previously grabbed reference to bs.
5386 * If after releasing, reference count is zero, the BlockDriverState is
5388 void bdrv_unref(BlockDriverState
*bs
)
5393 assert(bs
->refcnt
> 0);
5394 if (--bs
->refcnt
== 0) {
5399 struct BdrvOpBlocker
{
5401 QLIST_ENTRY(BdrvOpBlocker
) list
;
5404 bool bdrv_op_is_blocked(BlockDriverState
*bs
, BlockOpType op
, Error
**errp
)
5406 BdrvOpBlocker
*blocker
;
5407 assert((int) op
>= 0 && op
< BLOCK_OP_TYPE_MAX
);
5408 if (!QLIST_EMPTY(&bs
->op_blockers
[op
])) {
5409 blocker
= QLIST_FIRST(&bs
->op_blockers
[op
]);
5411 error_setg(errp
, "Device '%s' is busy: %s",
5412 bdrv_get_device_name(bs
),
5413 error_get_pretty(blocker
->reason
));
5420 void bdrv_op_block(BlockDriverState
*bs
, BlockOpType op
, Error
*reason
)
5422 BdrvOpBlocker
*blocker
;
5423 assert((int) op
>= 0 && op
< BLOCK_OP_TYPE_MAX
);
5425 blocker
= g_new0(BdrvOpBlocker
, 1);
5426 blocker
->reason
= reason
;
5427 QLIST_INSERT_HEAD(&bs
->op_blockers
[op
], blocker
, list
);
5430 void bdrv_op_unblock(BlockDriverState
*bs
, BlockOpType op
, Error
*reason
)
5432 BdrvOpBlocker
*blocker
, *next
;
5433 assert((int) op
>= 0 && op
< BLOCK_OP_TYPE_MAX
);
5434 QLIST_FOREACH_SAFE(blocker
, &bs
->op_blockers
[op
], list
, next
) {
5435 if (blocker
->reason
== reason
) {
5436 QLIST_REMOVE(blocker
, list
);
5442 void bdrv_op_block_all(BlockDriverState
*bs
, Error
*reason
)
5445 for (i
= 0; i
< BLOCK_OP_TYPE_MAX
; i
++) {
5446 bdrv_op_block(bs
, i
, reason
);
5450 void bdrv_op_unblock_all(BlockDriverState
*bs
, Error
*reason
)
5453 for (i
= 0; i
< BLOCK_OP_TYPE_MAX
; i
++) {
5454 bdrv_op_unblock(bs
, i
, reason
);
5458 bool bdrv_op_blocker_is_empty(BlockDriverState
*bs
)
5462 for (i
= 0; i
< BLOCK_OP_TYPE_MAX
; i
++) {
5463 if (!QLIST_EMPTY(&bs
->op_blockers
[i
])) {
5470 void bdrv_iostatus_enable(BlockDriverState
*bs
)
5472 bs
->iostatus_enabled
= true;
5473 bs
->iostatus
= BLOCK_DEVICE_IO_STATUS_OK
;
5476 /* The I/O status is only enabled if the drive explicitly
5477 * enables it _and_ the VM is configured to stop on errors */
5478 bool bdrv_iostatus_is_enabled(const BlockDriverState
*bs
)
5480 return (bs
->iostatus_enabled
&&
5481 (bs
->on_write_error
== BLOCKDEV_ON_ERROR_ENOSPC
||
5482 bs
->on_write_error
== BLOCKDEV_ON_ERROR_STOP
||
5483 bs
->on_read_error
== BLOCKDEV_ON_ERROR_STOP
));
5486 void bdrv_iostatus_disable(BlockDriverState
*bs
)
5488 bs
->iostatus_enabled
= false;
5491 void bdrv_iostatus_reset(BlockDriverState
*bs
)
5493 if (bdrv_iostatus_is_enabled(bs
)) {
5494 bs
->iostatus
= BLOCK_DEVICE_IO_STATUS_OK
;
5496 block_job_iostatus_reset(bs
->job
);
5501 void bdrv_iostatus_set_err(BlockDriverState
*bs
, int error
)
5503 assert(bdrv_iostatus_is_enabled(bs
));
5504 if (bs
->iostatus
== BLOCK_DEVICE_IO_STATUS_OK
) {
5505 bs
->iostatus
= error
== ENOSPC
? BLOCK_DEVICE_IO_STATUS_NOSPACE
:
5506 BLOCK_DEVICE_IO_STATUS_FAILED
;
5510 void bdrv_img_create(const char *filename
, const char *fmt
,
5511 const char *base_filename
, const char *base_fmt
,
5512 char *options
, uint64_t img_size
, int flags
,
5513 Error
**errp
, bool quiet
)
5515 QemuOptsList
*create_opts
= NULL
;
5516 QemuOpts
*opts
= NULL
;
5517 const char *backing_fmt
, *backing_file
;
5519 BlockDriver
*drv
, *proto_drv
;
5520 BlockDriver
*backing_drv
= NULL
;
5521 Error
*local_err
= NULL
;
5524 /* Find driver and parse its options */
5525 drv
= bdrv_find_format(fmt
);
5527 error_setg(errp
, "Unknown file format '%s'", fmt
);
5531 proto_drv
= bdrv_find_protocol(filename
, true);
5533 error_setg(errp
, "Unknown protocol '%s'", filename
);
5537 if (!drv
->create_opts
) {
5538 error_setg(errp
, "Format driver '%s' does not support image creation",
5543 if (!proto_drv
->create_opts
) {
5544 error_setg(errp
, "Protocol driver '%s' does not support image creation",
5545 proto_drv
->format_name
);
5549 create_opts
= qemu_opts_append(create_opts
, drv
->create_opts
);
5550 create_opts
= qemu_opts_append(create_opts
, proto_drv
->create_opts
);
5552 /* Create parameter list with default values */
5553 opts
= qemu_opts_create(create_opts
, NULL
, 0, &error_abort
);
5554 qemu_opt_set_number(opts
, BLOCK_OPT_SIZE
, img_size
);
5556 /* Parse -o options */
5558 if (qemu_opts_do_parse(opts
, options
, NULL
) != 0) {
5559 error_setg(errp
, "Invalid options for file format '%s'", fmt
);
5564 if (base_filename
) {
5565 if (qemu_opt_set(opts
, BLOCK_OPT_BACKING_FILE
, base_filename
)) {
5566 error_setg(errp
, "Backing file not supported for file format '%s'",
5573 if (qemu_opt_set(opts
, BLOCK_OPT_BACKING_FMT
, base_fmt
)) {
5574 error_setg(errp
, "Backing file format not supported for file "
5575 "format '%s'", fmt
);
5580 backing_file
= qemu_opt_get(opts
, BLOCK_OPT_BACKING_FILE
);
5582 if (!strcmp(filename
, backing_file
)) {
5583 error_setg(errp
, "Error: Trying to create an image with the "
5584 "same filename as the backing file");
5589 backing_fmt
= qemu_opt_get(opts
, BLOCK_OPT_BACKING_FMT
);
5591 backing_drv
= bdrv_find_format(backing_fmt
);
5593 error_setg(errp
, "Unknown backing file format '%s'",
5599 // The size for the image must always be specified, with one exception:
5600 // If we are using a backing file, we can obtain the size from there
5601 size
= qemu_opt_get_size(opts
, BLOCK_OPT_SIZE
, 0);
5604 BlockDriverState
*bs
;
5608 /* backing files always opened read-only */
5610 flags
& ~(BDRV_O_RDWR
| BDRV_O_SNAPSHOT
| BDRV_O_NO_BACKING
);
5613 ret
= bdrv_open(&bs
, backing_file
, NULL
, NULL
, back_flags
,
5614 backing_drv
, &local_err
);
5618 size
= bdrv_getlength(bs
);
5620 error_setg_errno(errp
, -size
, "Could not get size of '%s'",
5626 qemu_opt_set_number(opts
, BLOCK_OPT_SIZE
, size
);
5630 error_setg(errp
, "Image creation needs a size parameter");
5636 printf("Formatting '%s', fmt=%s ", filename
, fmt
);
5637 qemu_opts_print(opts
);
5641 ret
= bdrv_create(drv
, filename
, opts
, &local_err
);
5643 if (ret
== -EFBIG
) {
5644 /* This is generally a better message than whatever the driver would
5645 * deliver (especially because of the cluster_size_hint), since that
5646 * is most probably not much different from "image too large". */
5647 const char *cluster_size_hint
= "";
5648 if (qemu_opt_get_size(opts
, BLOCK_OPT_CLUSTER_SIZE
, 0)) {
5649 cluster_size_hint
= " (try using a larger cluster size)";
5651 error_setg(errp
, "The image size is too large for file format '%s'"
5652 "%s", fmt
, cluster_size_hint
);
5653 error_free(local_err
);
5658 qemu_opts_del(opts
);
5659 qemu_opts_free(create_opts
);
5661 error_propagate(errp
, local_err
);
5665 AioContext
*bdrv_get_aio_context(BlockDriverState
*bs
)
5667 return bs
->aio_context
;
5670 void bdrv_detach_aio_context(BlockDriverState
*bs
)
5672 BdrvAioNotifier
*baf
;
5678 QLIST_FOREACH(baf
, &bs
->aio_notifiers
, list
) {
5679 baf
->detach_aio_context(baf
->opaque
);
5682 if (bs
->io_limits_enabled
) {
5683 throttle_detach_aio_context(&bs
->throttle_state
);
5685 if (bs
->drv
->bdrv_detach_aio_context
) {
5686 bs
->drv
->bdrv_detach_aio_context(bs
);
5689 bdrv_detach_aio_context(bs
->file
);
5691 if (bs
->backing_hd
) {
5692 bdrv_detach_aio_context(bs
->backing_hd
);
5695 bs
->aio_context
= NULL
;
5698 void bdrv_attach_aio_context(BlockDriverState
*bs
,
5699 AioContext
*new_context
)
5701 BdrvAioNotifier
*ban
;
5707 bs
->aio_context
= new_context
;
5709 if (bs
->backing_hd
) {
5710 bdrv_attach_aio_context(bs
->backing_hd
, new_context
);
5713 bdrv_attach_aio_context(bs
->file
, new_context
);
5715 if (bs
->drv
->bdrv_attach_aio_context
) {
5716 bs
->drv
->bdrv_attach_aio_context(bs
, new_context
);
5718 if (bs
->io_limits_enabled
) {
5719 throttle_attach_aio_context(&bs
->throttle_state
, new_context
);
5722 QLIST_FOREACH(ban
, &bs
->aio_notifiers
, list
) {
5723 ban
->attached_aio_context(new_context
, ban
->opaque
);
5727 void bdrv_set_aio_context(BlockDriverState
*bs
, AioContext
*new_context
)
5729 bdrv_drain_all(); /* ensure there are no in-flight requests */
5731 bdrv_detach_aio_context(bs
);
5733 /* This function executes in the old AioContext so acquire the new one in
5734 * case it runs in a different thread.
5736 aio_context_acquire(new_context
);
5737 bdrv_attach_aio_context(bs
, new_context
);
5738 aio_context_release(new_context
);
5741 void bdrv_add_aio_context_notifier(BlockDriverState
*bs
,
5742 void (*attached_aio_context
)(AioContext
*new_context
, void *opaque
),
5743 void (*detach_aio_context
)(void *opaque
), void *opaque
)
5745 BdrvAioNotifier
*ban
= g_new(BdrvAioNotifier
, 1);
5746 *ban
= (BdrvAioNotifier
){
5747 .attached_aio_context
= attached_aio_context
,
5748 .detach_aio_context
= detach_aio_context
,
5752 QLIST_INSERT_HEAD(&bs
->aio_notifiers
, ban
, list
);
5755 void bdrv_remove_aio_context_notifier(BlockDriverState
*bs
,
5756 void (*attached_aio_context
)(AioContext
*,
5758 void (*detach_aio_context
)(void *),
5761 BdrvAioNotifier
*ban
, *ban_next
;
5763 QLIST_FOREACH_SAFE(ban
, &bs
->aio_notifiers
, list
, ban_next
) {
5764 if (ban
->attached_aio_context
== attached_aio_context
&&
5765 ban
->detach_aio_context
== detach_aio_context
&&
5766 ban
->opaque
== opaque
)
5768 QLIST_REMOVE(ban
, list
);
5778 void bdrv_add_before_write_notifier(BlockDriverState
*bs
,
5779 NotifierWithReturn
*notifier
)
5781 notifier_with_return_list_add(&bs
->before_write_notifiers
, notifier
);
5784 int bdrv_amend_options(BlockDriverState
*bs
, QemuOpts
*opts
,
5785 BlockDriverAmendStatusCB
*status_cb
)
5787 if (!bs
->drv
->bdrv_amend_options
) {
5790 return bs
->drv
->bdrv_amend_options(bs
, opts
, status_cb
);
5793 /* This function will be called by the bdrv_recurse_is_first_non_filter method
5794 * of block filter and by bdrv_is_first_non_filter.
5795 * It is used to test if the given bs is the candidate or recurse more in the
5798 bool bdrv_recurse_is_first_non_filter(BlockDriverState
*bs
,
5799 BlockDriverState
*candidate
)
5801 /* return false if basic checks fails */
5802 if (!bs
|| !bs
->drv
) {
5806 /* the code reached a non block filter driver -> check if the bs is
5807 * the same as the candidate. It's the recursion termination condition.
5809 if (!bs
->drv
->is_filter
) {
5810 return bs
== candidate
;
5812 /* Down this path the driver is a block filter driver */
5814 /* If the block filter recursion method is defined use it to recurse down
5817 if (bs
->drv
->bdrv_recurse_is_first_non_filter
) {
5818 return bs
->drv
->bdrv_recurse_is_first_non_filter(bs
, candidate
);
5821 /* the driver is a block filter but don't allow to recurse -> return false
5826 /* This function checks if the candidate is the first non filter bs down it's
5827 * bs chain. Since we don't have pointers to parents it explore all bs chains
5828 * from the top. Some filters can choose not to pass down the recursion.
5830 bool bdrv_is_first_non_filter(BlockDriverState
*candidate
)
5832 BlockDriverState
*bs
;
5834 /* walk down the bs forest recursively */
5835 QTAILQ_FOREACH(bs
, &bdrv_states
, device_list
) {
5838 /* try to recurse in this top level bs */
5839 perm
= bdrv_recurse_is_first_non_filter(bs
, candidate
);
5841 /* candidate is the first non filter */
5850 BlockDriverState
*check_to_replace_node(const char *node_name
, Error
**errp
)
5852 BlockDriverState
*to_replace_bs
= bdrv_find_node(node_name
);
5853 AioContext
*aio_context
;
5855 if (!to_replace_bs
) {
5856 error_setg(errp
, "Node name '%s' not found", node_name
);
5860 aio_context
= bdrv_get_aio_context(to_replace_bs
);
5861 aio_context_acquire(aio_context
);
5863 if (bdrv_op_is_blocked(to_replace_bs
, BLOCK_OP_TYPE_REPLACE
, errp
)) {
5864 to_replace_bs
= NULL
;
5868 /* We don't want arbitrary node of the BDS chain to be replaced only the top
5869 * most non filter in order to prevent data corruption.
5870 * Another benefit is that this tests exclude backing files which are
5871 * blocked by the backing blockers.
5873 if (!bdrv_is_first_non_filter(to_replace_bs
)) {
5874 error_setg(errp
, "Only top most non filter can be replaced");
5875 to_replace_bs
= NULL
;
5880 aio_context_release(aio_context
);
5881 return to_replace_bs
;
5884 void bdrv_io_plug(BlockDriverState
*bs
)
5886 BlockDriver
*drv
= bs
->drv
;
5887 if (drv
&& drv
->bdrv_io_plug
) {
5888 drv
->bdrv_io_plug(bs
);
5889 } else if (bs
->file
) {
5890 bdrv_io_plug(bs
->file
);
5894 void bdrv_io_unplug(BlockDriverState
*bs
)
5896 BlockDriver
*drv
= bs
->drv
;
5897 if (drv
&& drv
->bdrv_io_unplug
) {
5898 drv
->bdrv_io_unplug(bs
);
5899 } else if (bs
->file
) {
5900 bdrv_io_unplug(bs
->file
);
5904 void bdrv_flush_io_queue(BlockDriverState
*bs
)
5906 BlockDriver
*drv
= bs
->drv
;
5907 if (drv
&& drv
->bdrv_flush_io_queue
) {
5908 drv
->bdrv_flush_io_queue(bs
);
5909 } else if (bs
->file
) {
5910 bdrv_flush_io_queue(bs
->file
);
5914 static bool append_open_options(QDict
*d
, BlockDriverState
*bs
)
5916 const QDictEntry
*entry
;
5917 bool found_any
= false;
5919 for (entry
= qdict_first(bs
->options
); entry
;
5920 entry
= qdict_next(bs
->options
, entry
))
5922 /* Only take options for this level and exclude all non-driver-specific
5924 if (!strchr(qdict_entry_key(entry
), '.') &&
5925 strcmp(qdict_entry_key(entry
), "node-name"))
5927 qobject_incref(qdict_entry_value(entry
));
5928 qdict_put_obj(d
, qdict_entry_key(entry
), qdict_entry_value(entry
));
5936 /* Updates the following BDS fields:
5937 * - exact_filename: A filename which may be used for opening a block device
5938 * which (mostly) equals the given BDS (even without any
5939 * other options; so reading and writing must return the same
5940 * results, but caching etc. may be different)
5941 * - full_open_options: Options which, when given when opening a block device
5942 * (without a filename), result in a BDS (mostly)
5943 * equalling the given one
5944 * - filename: If exact_filename is set, it is copied here. Otherwise,
5945 * full_open_options is converted to a JSON object, prefixed with
5946 * "json:" (for use through the JSON pseudo protocol) and put here.
5948 void bdrv_refresh_filename(BlockDriverState
*bs
)
5950 BlockDriver
*drv
= bs
->drv
;
5957 /* This BDS's file name will most probably depend on its file's name, so
5958 * refresh that first */
5960 bdrv_refresh_filename(bs
->file
);
5963 if (drv
->bdrv_refresh_filename
) {
5964 /* Obsolete information is of no use here, so drop the old file name
5965 * information before refreshing it */
5966 bs
->exact_filename
[0] = '\0';
5967 if (bs
->full_open_options
) {
5968 QDECREF(bs
->full_open_options
);
5969 bs
->full_open_options
= NULL
;
5972 drv
->bdrv_refresh_filename(bs
);
5973 } else if (bs
->file
) {
5974 /* Try to reconstruct valid information from the underlying file */
5975 bool has_open_options
;
5977 bs
->exact_filename
[0] = '\0';
5978 if (bs
->full_open_options
) {
5979 QDECREF(bs
->full_open_options
);
5980 bs
->full_open_options
= NULL
;
5984 has_open_options
= append_open_options(opts
, bs
);
5986 /* If no specific options have been given for this BDS, the filename of
5987 * the underlying file should suffice for this one as well */
5988 if (bs
->file
->exact_filename
[0] && !has_open_options
) {
5989 strcpy(bs
->exact_filename
, bs
->file
->exact_filename
);
5991 /* Reconstructing the full options QDict is simple for most format block
5992 * drivers, as long as the full options are known for the underlying
5993 * file BDS. The full options QDict of that file BDS should somehow
5994 * contain a representation of the filename, therefore the following
5995 * suffices without querying the (exact_)filename of this BDS. */
5996 if (bs
->file
->full_open_options
) {
5997 qdict_put_obj(opts
, "driver",
5998 QOBJECT(qstring_from_str(drv
->format_name
)));
5999 QINCREF(bs
->file
->full_open_options
);
6000 qdict_put_obj(opts
, "file", QOBJECT(bs
->file
->full_open_options
));
6002 bs
->full_open_options
= opts
;
6006 } else if (!bs
->full_open_options
&& qdict_size(bs
->options
)) {
6007 /* There is no underlying file BDS (at least referenced by BDS.file),
6008 * so the full options QDict should be equal to the options given
6009 * specifically for this block device when it was opened (plus the
6010 * driver specification).
6011 * Because those options don't change, there is no need to update
6012 * full_open_options when it's already set. */
6015 append_open_options(opts
, bs
);
6016 qdict_put_obj(opts
, "driver",
6017 QOBJECT(qstring_from_str(drv
->format_name
)));
6019 if (bs
->exact_filename
[0]) {
6020 /* This may not work for all block protocol drivers (some may
6021 * require this filename to be parsed), but we have to find some
6022 * default solution here, so just include it. If some block driver
6023 * does not support pure options without any filename at all or
6024 * needs some special format of the options QDict, it needs to
6025 * implement the driver-specific bdrv_refresh_filename() function.
6027 qdict_put_obj(opts
, "filename",
6028 QOBJECT(qstring_from_str(bs
->exact_filename
)));
6031 bs
->full_open_options
= opts
;
6034 if (bs
->exact_filename
[0]) {
6035 pstrcpy(bs
->filename
, sizeof(bs
->filename
), bs
->exact_filename
);
6036 } else if (bs
->full_open_options
) {
6037 QString
*json
= qobject_to_json(QOBJECT(bs
->full_open_options
));
6038 snprintf(bs
->filename
, sizeof(bs
->filename
), "json:%s",
6039 qstring_get_str(json
));
6044 /* This accessor function purpose is to allow the device models to access the
6045 * BlockAcctStats structure embedded inside a BlockDriverState without being
6046 * aware of the BlockDriverState structure layout.
6047 * It will go away when the BlockAcctStats structure will be moved inside
6048 * the device models.
6050 BlockAcctStats
*bdrv_get_stats(BlockDriverState
*bs
)