2 * QEMU System Emulator block driver
4 * Copyright (c) 2003 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 #include "config-host.h"
25 #include "qemu-common.h"
28 #include "block_int.h"
30 #include "qemu-objects.h"
31 #include "qemu-coroutine.h"
34 #include <sys/types.h>
36 #include <sys/ioctl.h>
37 #include <sys/queue.h>
47 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
49 static void bdrv_dev_change_media_cb(BlockDriverState
*bs
, bool load
);
50 static BlockDriverAIOCB
*bdrv_aio_readv_em(BlockDriverState
*bs
,
51 int64_t sector_num
, QEMUIOVector
*qiov
, int nb_sectors
,
52 BlockDriverCompletionFunc
*cb
, void *opaque
);
53 static BlockDriverAIOCB
*bdrv_aio_writev_em(BlockDriverState
*bs
,
54 int64_t sector_num
, QEMUIOVector
*qiov
, int nb_sectors
,
55 BlockDriverCompletionFunc
*cb
, void *opaque
);
56 static int coroutine_fn
bdrv_co_readv_em(BlockDriverState
*bs
,
57 int64_t sector_num
, int nb_sectors
,
59 static int coroutine_fn
bdrv_co_writev_em(BlockDriverState
*bs
,
60 int64_t sector_num
, int nb_sectors
,
62 static int coroutine_fn
bdrv_co_do_readv(BlockDriverState
*bs
,
63 int64_t sector_num
, int nb_sectors
, QEMUIOVector
*qiov
);
64 static int coroutine_fn
bdrv_co_do_writev(BlockDriverState
*bs
,
65 int64_t sector_num
, int nb_sectors
, QEMUIOVector
*qiov
);
66 static BlockDriverAIOCB
*bdrv_co_aio_rw_vector(BlockDriverState
*bs
,
70 BlockDriverCompletionFunc
*cb
,
73 static void coroutine_fn
bdrv_co_do_rw(void *opaque
);
75 static QTAILQ_HEAD(, BlockDriverState
) bdrv_states
=
76 QTAILQ_HEAD_INITIALIZER(bdrv_states
);
78 static QLIST_HEAD(, BlockDriver
) bdrv_drivers
=
79 QLIST_HEAD_INITIALIZER(bdrv_drivers
);
81 /* The device to use for VM snapshots */
82 static BlockDriverState
*bs_snapshots
;
84 /* If non-zero, use only whitelisted block drivers */
85 static int use_bdrv_whitelist
;
88 static int is_windows_drive_prefix(const char *filename
)
90 return (((filename
[0] >= 'a' && filename
[0] <= 'z') ||
91 (filename
[0] >= 'A' && filename
[0] <= 'Z')) &&
95 int is_windows_drive(const char *filename
)
97 if (is_windows_drive_prefix(filename
) &&
100 if (strstart(filename
, "\\\\.\\", NULL
) ||
101 strstart(filename
, "//./", NULL
))
107 /* check if the path starts with "<protocol>:" */
108 static int path_has_protocol(const char *path
)
111 if (is_windows_drive(path
) ||
112 is_windows_drive_prefix(path
)) {
117 return strchr(path
, ':') != NULL
;
120 int path_is_absolute(const char *path
)
124 /* specific case for names like: "\\.\d:" */
125 if (*path
== '/' || *path
== '\\')
128 p
= strchr(path
, ':');
134 return (*p
== '/' || *p
== '\\');
140 /* if filename is absolute, just copy it to dest. Otherwise, build a
141 path to it by considering it is relative to base_path. URL are
143 void path_combine(char *dest
, int dest_size
,
144 const char *base_path
,
145 const char *filename
)
152 if (path_is_absolute(filename
)) {
153 pstrcpy(dest
, dest_size
, filename
);
155 p
= strchr(base_path
, ':');
160 p1
= strrchr(base_path
, '/');
164 p2
= strrchr(base_path
, '\\');
176 if (len
> dest_size
- 1)
178 memcpy(dest
, base_path
, len
);
180 pstrcat(dest
, dest_size
, filename
);
184 void bdrv_register(BlockDriver
*bdrv
)
186 /* Block drivers without coroutine functions need emulation */
187 if (!bdrv
->bdrv_co_readv
) {
188 bdrv
->bdrv_co_readv
= bdrv_co_readv_em
;
189 bdrv
->bdrv_co_writev
= bdrv_co_writev_em
;
191 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
192 * the block driver lacks aio we need to emulate that too.
194 if (!bdrv
->bdrv_aio_readv
) {
195 /* add AIO emulation layer */
196 bdrv
->bdrv_aio_readv
= bdrv_aio_readv_em
;
197 bdrv
->bdrv_aio_writev
= bdrv_aio_writev_em
;
201 QLIST_INSERT_HEAD(&bdrv_drivers
, bdrv
, list
);
204 /* create a new block device (by default it is empty) */
205 BlockDriverState
*bdrv_new(const char *device_name
)
207 BlockDriverState
*bs
;
209 bs
= g_malloc0(sizeof(BlockDriverState
));
210 pstrcpy(bs
->device_name
, sizeof(bs
->device_name
), device_name
);
211 if (device_name
[0] != '\0') {
212 QTAILQ_INSERT_TAIL(&bdrv_states
, bs
, list
);
214 bdrv_iostatus_disable(bs
);
218 BlockDriver
*bdrv_find_format(const char *format_name
)
221 QLIST_FOREACH(drv1
, &bdrv_drivers
, list
) {
222 if (!strcmp(drv1
->format_name
, format_name
)) {
229 static int bdrv_is_whitelisted(BlockDriver
*drv
)
231 static const char *whitelist
[] = {
232 CONFIG_BDRV_WHITELIST
237 return 1; /* no whitelist, anything goes */
239 for (p
= whitelist
; *p
; p
++) {
240 if (!strcmp(drv
->format_name
, *p
)) {
247 BlockDriver
*bdrv_find_whitelisted_format(const char *format_name
)
249 BlockDriver
*drv
= bdrv_find_format(format_name
);
250 return drv
&& bdrv_is_whitelisted(drv
) ? drv
: NULL
;
253 int bdrv_create(BlockDriver
*drv
, const char* filename
,
254 QEMUOptionParameter
*options
)
256 if (!drv
->bdrv_create
)
259 return drv
->bdrv_create(filename
, options
);
262 int bdrv_create_file(const char* filename
, QEMUOptionParameter
*options
)
266 drv
= bdrv_find_protocol(filename
);
271 return bdrv_create(drv
, filename
, options
);
275 void get_tmp_filename(char *filename
, int size
)
277 char temp_dir
[MAX_PATH
];
279 GetTempPath(MAX_PATH
, temp_dir
);
280 GetTempFileName(temp_dir
, "qem", 0, filename
);
283 void get_tmp_filename(char *filename
, int size
)
287 /* XXX: race condition possible */
288 tmpdir
= getenv("TMPDIR");
291 snprintf(filename
, size
, "%s/vl.XXXXXX", tmpdir
);
292 fd
= mkstemp(filename
);
298 * Detect host devices. By convention, /dev/cdrom[N] is always
299 * recognized as a host CDROM.
301 static BlockDriver
*find_hdev_driver(const char *filename
)
303 int score_max
= 0, score
;
304 BlockDriver
*drv
= NULL
, *d
;
306 QLIST_FOREACH(d
, &bdrv_drivers
, list
) {
307 if (d
->bdrv_probe_device
) {
308 score
= d
->bdrv_probe_device(filename
);
309 if (score
> score_max
) {
319 BlockDriver
*bdrv_find_protocol(const char *filename
)
326 /* TODO Drivers without bdrv_file_open must be specified explicitly */
329 * XXX(hch): we really should not let host device detection
330 * override an explicit protocol specification, but moving this
331 * later breaks access to device names with colons in them.
332 * Thanks to the brain-dead persistent naming schemes on udev-
333 * based Linux systems those actually are quite common.
335 drv1
= find_hdev_driver(filename
);
340 if (!path_has_protocol(filename
)) {
341 return bdrv_find_format("file");
343 p
= strchr(filename
, ':');
346 if (len
> sizeof(protocol
) - 1)
347 len
= sizeof(protocol
) - 1;
348 memcpy(protocol
, filename
, len
);
349 protocol
[len
] = '\0';
350 QLIST_FOREACH(drv1
, &bdrv_drivers
, list
) {
351 if (drv1
->protocol_name
&&
352 !strcmp(drv1
->protocol_name
, protocol
)) {
359 static int find_image_format(const char *filename
, BlockDriver
**pdrv
)
361 int ret
, score
, score_max
;
362 BlockDriver
*drv1
, *drv
;
364 BlockDriverState
*bs
;
366 ret
= bdrv_file_open(&bs
, filename
, 0);
372 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
373 if (bs
->sg
|| !bdrv_is_inserted(bs
)) {
375 drv
= bdrv_find_format("raw");
383 ret
= bdrv_pread(bs
, 0, buf
, sizeof(buf
));
392 QLIST_FOREACH(drv1
, &bdrv_drivers
, list
) {
393 if (drv1
->bdrv_probe
) {
394 score
= drv1
->bdrv_probe(buf
, ret
, filename
);
395 if (score
> score_max
) {
409 * Set the current 'total_sectors' value
411 static int refresh_total_sectors(BlockDriverState
*bs
, int64_t hint
)
413 BlockDriver
*drv
= bs
->drv
;
415 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
419 /* query actual device if possible, otherwise just trust the hint */
420 if (drv
->bdrv_getlength
) {
421 int64_t length
= drv
->bdrv_getlength(bs
);
425 hint
= length
>> BDRV_SECTOR_BITS
;
428 bs
->total_sectors
= hint
;
433 * Set open flags for a given cache mode
435 * Return 0 on success, -1 if the cache mode was invalid.
437 int bdrv_parse_cache_flags(const char *mode
, int *flags
)
439 *flags
&= ~BDRV_O_CACHE_MASK
;
441 if (!strcmp(mode
, "off") || !strcmp(mode
, "none")) {
442 *flags
|= BDRV_O_NOCACHE
| BDRV_O_CACHE_WB
;
443 } else if (!strcmp(mode
, "directsync")) {
444 *flags
|= BDRV_O_NOCACHE
;
445 } else if (!strcmp(mode
, "writeback")) {
446 *flags
|= BDRV_O_CACHE_WB
;
447 } else if (!strcmp(mode
, "unsafe")) {
448 *flags
|= BDRV_O_CACHE_WB
;
449 *flags
|= BDRV_O_NO_FLUSH
;
450 } else if (!strcmp(mode
, "writethrough")) {
451 /* this is the default */
460 * Common part for opening disk images and files
462 static int bdrv_open_common(BlockDriverState
*bs
, const char *filename
,
463 int flags
, BlockDriver
*drv
)
469 trace_bdrv_open_common(bs
, filename
, flags
, drv
->format_name
);
472 bs
->total_sectors
= 0;
475 bs
->open_flags
= flags
;
476 bs
->buffer_alignment
= 512;
478 pstrcpy(bs
->filename
, sizeof(bs
->filename
), filename
);
480 if (use_bdrv_whitelist
&& !bdrv_is_whitelisted(drv
)) {
485 bs
->opaque
= g_malloc0(drv
->instance_size
);
487 if (flags
& BDRV_O_CACHE_WB
)
488 bs
->enable_write_cache
= 1;
491 * Clear flags that are internal to the block layer before opening the
494 open_flags
= flags
& ~(BDRV_O_SNAPSHOT
| BDRV_O_NO_BACKING
);
497 * Snapshots should be writable.
499 if (bs
->is_temporary
) {
500 open_flags
|= BDRV_O_RDWR
;
503 /* Open the image, either directly or using a protocol */
504 if (drv
->bdrv_file_open
) {
505 ret
= drv
->bdrv_file_open(bs
, filename
, open_flags
);
507 ret
= bdrv_file_open(&bs
->file
, filename
, open_flags
);
509 ret
= drv
->bdrv_open(bs
, open_flags
);
517 bs
->keep_read_only
= bs
->read_only
= !(open_flags
& BDRV_O_RDWR
);
519 ret
= refresh_total_sectors(bs
, bs
->total_sectors
);
525 if (bs
->is_temporary
) {
533 bdrv_delete(bs
->file
);
543 * Opens a file using a protocol (file, host_device, nbd, ...)
545 int bdrv_file_open(BlockDriverState
**pbs
, const char *filename
, int flags
)
547 BlockDriverState
*bs
;
551 drv
= bdrv_find_protocol(filename
);
557 ret
= bdrv_open_common(bs
, filename
, flags
, drv
);
568 * Opens a disk image (raw, qcow2, vmdk, ...)
570 int bdrv_open(BlockDriverState
*bs
, const char *filename
, int flags
,
575 if (flags
& BDRV_O_SNAPSHOT
) {
576 BlockDriverState
*bs1
;
579 BlockDriver
*bdrv_qcow2
;
580 QEMUOptionParameter
*options
;
581 char tmp_filename
[PATH_MAX
];
582 char backing_filename
[PATH_MAX
];
584 /* if snapshot, we create a temporary backing file and open it
585 instead of opening 'filename' directly */
587 /* if there is a backing file, use it */
589 ret
= bdrv_open(bs1
, filename
, 0, drv
);
594 total_size
= bdrv_getlength(bs1
) & BDRV_SECTOR_MASK
;
596 if (bs1
->drv
&& bs1
->drv
->protocol_name
)
601 get_tmp_filename(tmp_filename
, sizeof(tmp_filename
));
603 /* Real path is meaningless for protocols */
605 snprintf(backing_filename
, sizeof(backing_filename
),
607 else if (!realpath(filename
, backing_filename
))
610 bdrv_qcow2
= bdrv_find_format("qcow2");
611 options
= parse_option_parameters("", bdrv_qcow2
->create_options
, NULL
);
613 set_option_parameter_int(options
, BLOCK_OPT_SIZE
, total_size
);
614 set_option_parameter(options
, BLOCK_OPT_BACKING_FILE
, backing_filename
);
616 set_option_parameter(options
, BLOCK_OPT_BACKING_FMT
,
620 ret
= bdrv_create(bdrv_qcow2
, tmp_filename
, options
);
621 free_option_parameters(options
);
626 filename
= tmp_filename
;
628 bs
->is_temporary
= 1;
631 /* Find the right image format driver */
633 ret
= find_image_format(filename
, &drv
);
637 goto unlink_and_fail
;
641 ret
= bdrv_open_common(bs
, filename
, flags
, drv
);
643 goto unlink_and_fail
;
646 /* If there is a backing file, use it */
647 if ((flags
& BDRV_O_NO_BACKING
) == 0 && bs
->backing_file
[0] != '\0') {
648 char backing_filename
[PATH_MAX
];
650 BlockDriver
*back_drv
= NULL
;
652 bs
->backing_hd
= bdrv_new("");
654 if (path_has_protocol(bs
->backing_file
)) {
655 pstrcpy(backing_filename
, sizeof(backing_filename
),
658 path_combine(backing_filename
, sizeof(backing_filename
),
659 filename
, bs
->backing_file
);
662 if (bs
->backing_format
[0] != '\0') {
663 back_drv
= bdrv_find_format(bs
->backing_format
);
666 /* backing files always opened read-only */
668 flags
& ~(BDRV_O_RDWR
| BDRV_O_SNAPSHOT
| BDRV_O_NO_BACKING
);
670 ret
= bdrv_open(bs
->backing_hd
, backing_filename
, back_flags
, back_drv
);
675 if (bs
->is_temporary
) {
676 bs
->backing_hd
->keep_read_only
= !(flags
& BDRV_O_RDWR
);
678 /* base image inherits from "parent" */
679 bs
->backing_hd
->keep_read_only
= bs
->keep_read_only
;
683 if (!bdrv_key_required(bs
)) {
684 bdrv_dev_change_media_cb(bs
, true);
690 if (bs
->is_temporary
) {
696 void bdrv_close(BlockDriverState
*bs
)
699 if (bs
== bs_snapshots
) {
702 if (bs
->backing_hd
) {
703 bdrv_delete(bs
->backing_hd
);
704 bs
->backing_hd
= NULL
;
706 bs
->drv
->bdrv_close(bs
);
709 if (bs
->is_temporary
) {
710 unlink(bs
->filename
);
716 if (bs
->file
!= NULL
) {
717 bdrv_close(bs
->file
);
720 bdrv_dev_change_media_cb(bs
, false);
724 void bdrv_close_all(void)
726 BlockDriverState
*bs
;
728 QTAILQ_FOREACH(bs
, &bdrv_states
, list
) {
733 /* make a BlockDriverState anonymous by removing from bdrv_state list.
734 Also, NULL terminate the device_name to prevent double remove */
735 void bdrv_make_anon(BlockDriverState
*bs
)
737 if (bs
->device_name
[0] != '\0') {
738 QTAILQ_REMOVE(&bdrv_states
, bs
, list
);
740 bs
->device_name
[0] = '\0';
743 void bdrv_delete(BlockDriverState
*bs
)
747 /* remove from list, if necessary */
751 if (bs
->file
!= NULL
) {
752 bdrv_delete(bs
->file
);
755 assert(bs
!= bs_snapshots
);
759 int bdrv_attach_dev(BlockDriverState
*bs
, void *dev
)
760 /* TODO change to DeviceState *dev when all users are qdevified */
766 bdrv_iostatus_reset(bs
);
770 /* TODO qdevified devices don't use this, remove when devices are qdevified */
771 void bdrv_attach_dev_nofail(BlockDriverState
*bs
, void *dev
)
773 if (bdrv_attach_dev(bs
, dev
) < 0) {
778 void bdrv_detach_dev(BlockDriverState
*bs
, void *dev
)
779 /* TODO change to DeviceState *dev when all users are qdevified */
781 assert(bs
->dev
== dev
);
784 bs
->dev_opaque
= NULL
;
785 bs
->buffer_alignment
= 512;
788 /* TODO change to return DeviceState * when all users are qdevified */
789 void *bdrv_get_attached_dev(BlockDriverState
*bs
)
794 void bdrv_set_dev_ops(BlockDriverState
*bs
, const BlockDevOps
*ops
,
798 bs
->dev_opaque
= opaque
;
799 if (bdrv_dev_has_removable_media(bs
) && bs
== bs_snapshots
) {
804 static void bdrv_dev_change_media_cb(BlockDriverState
*bs
, bool load
)
806 if (bs
->dev_ops
&& bs
->dev_ops
->change_media_cb
) {
807 bs
->dev_ops
->change_media_cb(bs
->dev_opaque
, load
);
811 bool bdrv_dev_has_removable_media(BlockDriverState
*bs
)
813 return !bs
->dev
|| (bs
->dev_ops
&& bs
->dev_ops
->change_media_cb
);
816 bool bdrv_dev_is_tray_open(BlockDriverState
*bs
)
818 if (bs
->dev_ops
&& bs
->dev_ops
->is_tray_open
) {
819 return bs
->dev_ops
->is_tray_open(bs
->dev_opaque
);
824 static void bdrv_dev_resize_cb(BlockDriverState
*bs
)
826 if (bs
->dev_ops
&& bs
->dev_ops
->resize_cb
) {
827 bs
->dev_ops
->resize_cb(bs
->dev_opaque
);
831 bool bdrv_dev_is_medium_locked(BlockDriverState
*bs
)
833 if (bs
->dev_ops
&& bs
->dev_ops
->is_medium_locked
) {
834 return bs
->dev_ops
->is_medium_locked(bs
->dev_opaque
);
840 * Run consistency checks on an image
842 * Returns 0 if the check could be completed (it doesn't mean that the image is
843 * free of errors) or -errno when an internal error occurred. The results of the
844 * check are stored in res.
846 int bdrv_check(BlockDriverState
*bs
, BdrvCheckResult
*res
)
848 if (bs
->drv
->bdrv_check
== NULL
) {
852 memset(res
, 0, sizeof(*res
));
853 return bs
->drv
->bdrv_check(bs
, res
);
856 #define COMMIT_BUF_SECTORS 2048
858 /* commit COW file into the raw image */
859 int bdrv_commit(BlockDriverState
*bs
)
861 BlockDriver
*drv
= bs
->drv
;
862 BlockDriver
*backing_drv
;
863 int64_t sector
, total_sectors
;
864 int n
, ro
, open_flags
;
865 int ret
= 0, rw_ret
= 0;
868 BlockDriverState
*bs_rw
, *bs_ro
;
873 if (!bs
->backing_hd
) {
877 if (bs
->backing_hd
->keep_read_only
) {
881 backing_drv
= bs
->backing_hd
->drv
;
882 ro
= bs
->backing_hd
->read_only
;
883 strncpy(filename
, bs
->backing_hd
->filename
, sizeof(filename
));
884 open_flags
= bs
->backing_hd
->open_flags
;
888 bdrv_delete(bs
->backing_hd
);
889 bs
->backing_hd
= NULL
;
890 bs_rw
= bdrv_new("");
891 rw_ret
= bdrv_open(bs_rw
, filename
, open_flags
| BDRV_O_RDWR
,
895 /* try to re-open read-only */
896 bs_ro
= bdrv_new("");
897 ret
= bdrv_open(bs_ro
, filename
, open_flags
& ~BDRV_O_RDWR
,
901 /* drive not functional anymore */
905 bs
->backing_hd
= bs_ro
;
908 bs
->backing_hd
= bs_rw
;
911 total_sectors
= bdrv_getlength(bs
) >> BDRV_SECTOR_BITS
;
912 buf
= g_malloc(COMMIT_BUF_SECTORS
* BDRV_SECTOR_SIZE
);
914 for (sector
= 0; sector
< total_sectors
; sector
+= n
) {
915 if (drv
->bdrv_is_allocated(bs
, sector
, COMMIT_BUF_SECTORS
, &n
)) {
917 if (bdrv_read(bs
, sector
, buf
, n
) != 0) {
922 if (bdrv_write(bs
->backing_hd
, sector
, buf
, n
) != 0) {
929 if (drv
->bdrv_make_empty
) {
930 ret
= drv
->bdrv_make_empty(bs
);
935 * Make sure all data we wrote to the backing device is actually
939 bdrv_flush(bs
->backing_hd
);
946 bdrv_delete(bs
->backing_hd
);
947 bs
->backing_hd
= NULL
;
948 bs_ro
= bdrv_new("");
949 ret
= bdrv_open(bs_ro
, filename
, open_flags
& ~BDRV_O_RDWR
,
953 /* drive not functional anymore */
957 bs
->backing_hd
= bs_ro
;
958 bs
->backing_hd
->keep_read_only
= 0;
964 void bdrv_commit_all(void)
966 BlockDriverState
*bs
;
968 QTAILQ_FOREACH(bs
, &bdrv_states
, list
) {
976 * -EINVAL - backing format specified, but no file
977 * -ENOSPC - can't update the backing file because no space is left in the
979 * -ENOTSUP - format driver doesn't support changing the backing file
981 int bdrv_change_backing_file(BlockDriverState
*bs
,
982 const char *backing_file
, const char *backing_fmt
)
984 BlockDriver
*drv
= bs
->drv
;
986 if (drv
->bdrv_change_backing_file
!= NULL
) {
987 return drv
->bdrv_change_backing_file(bs
, backing_file
, backing_fmt
);
993 static int bdrv_check_byte_request(BlockDriverState
*bs
, int64_t offset
,
998 if (!bdrv_is_inserted(bs
))
1004 len
= bdrv_getlength(bs
);
1009 if ((offset
> len
) || (len
- offset
< size
))
1015 static int bdrv_check_request(BlockDriverState
*bs
, int64_t sector_num
,
1018 return bdrv_check_byte_request(bs
, sector_num
* BDRV_SECTOR_SIZE
,
1019 nb_sectors
* BDRV_SECTOR_SIZE
);
1022 typedef struct RwCo
{
1023 BlockDriverState
*bs
;
1031 static void coroutine_fn
bdrv_rw_co_entry(void *opaque
)
1033 RwCo
*rwco
= opaque
;
1035 if (!rwco
->is_write
) {
1036 rwco
->ret
= bdrv_co_do_readv(rwco
->bs
, rwco
->sector_num
,
1037 rwco
->nb_sectors
, rwco
->qiov
);
1039 rwco
->ret
= bdrv_co_do_writev(rwco
->bs
, rwco
->sector_num
,
1040 rwco
->nb_sectors
, rwco
->qiov
);
1045 * Process a synchronous request using coroutines
1047 static int bdrv_rw_co(BlockDriverState
*bs
, int64_t sector_num
, uint8_t *buf
,
1048 int nb_sectors
, bool is_write
)
1051 struct iovec iov
= {
1052 .iov_base
= (void *)buf
,
1053 .iov_len
= nb_sectors
* BDRV_SECTOR_SIZE
,
1058 .sector_num
= sector_num
,
1059 .nb_sectors
= nb_sectors
,
1061 .is_write
= is_write
,
1065 qemu_iovec_init_external(&qiov
, &iov
, 1);
1067 if (qemu_in_coroutine()) {
1068 /* Fast-path if already in coroutine context */
1069 bdrv_rw_co_entry(&rwco
);
1071 co
= qemu_coroutine_create(bdrv_rw_co_entry
);
1072 qemu_coroutine_enter(co
, &rwco
);
1073 while (rwco
.ret
== NOT_DONE
) {
1080 /* return < 0 if error. See bdrv_write() for the return codes */
1081 int bdrv_read(BlockDriverState
*bs
, int64_t sector_num
,
1082 uint8_t *buf
, int nb_sectors
)
1084 return bdrv_rw_co(bs
, sector_num
, buf
, nb_sectors
, false);
1087 static void set_dirty_bitmap(BlockDriverState
*bs
, int64_t sector_num
,
1088 int nb_sectors
, int dirty
)
1091 unsigned long val
, idx
, bit
;
1093 start
= sector_num
/ BDRV_SECTORS_PER_DIRTY_CHUNK
;
1094 end
= (sector_num
+ nb_sectors
- 1) / BDRV_SECTORS_PER_DIRTY_CHUNK
;
1096 for (; start
<= end
; start
++) {
1097 idx
= start
/ (sizeof(unsigned long) * 8);
1098 bit
= start
% (sizeof(unsigned long) * 8);
1099 val
= bs
->dirty_bitmap
[idx
];
1101 if (!(val
& (1UL << bit
))) {
1106 if (val
& (1UL << bit
)) {
1108 val
&= ~(1UL << bit
);
1111 bs
->dirty_bitmap
[idx
] = val
;
1115 /* Return < 0 if error. Important errors are:
1116 -EIO generic I/O error (may happen for all errors)
1117 -ENOMEDIUM No media inserted.
1118 -EINVAL Invalid sector number or nb_sectors
1119 -EACCES Trying to write a read-only device
1121 int bdrv_write(BlockDriverState
*bs
, int64_t sector_num
,
1122 const uint8_t *buf
, int nb_sectors
)
1124 return bdrv_rw_co(bs
, sector_num
, (uint8_t *)buf
, nb_sectors
, true);
1127 int bdrv_pread(BlockDriverState
*bs
, int64_t offset
,
1128 void *buf
, int count1
)
1130 uint8_t tmp_buf
[BDRV_SECTOR_SIZE
];
1131 int len
, nb_sectors
, count
;
1136 /* first read to align to sector start */
1137 len
= (BDRV_SECTOR_SIZE
- offset
) & (BDRV_SECTOR_SIZE
- 1);
1140 sector_num
= offset
>> BDRV_SECTOR_BITS
;
1142 if ((ret
= bdrv_read(bs
, sector_num
, tmp_buf
, 1)) < 0)
1144 memcpy(buf
, tmp_buf
+ (offset
& (BDRV_SECTOR_SIZE
- 1)), len
);
1152 /* read the sectors "in place" */
1153 nb_sectors
= count
>> BDRV_SECTOR_BITS
;
1154 if (nb_sectors
> 0) {
1155 if ((ret
= bdrv_read(bs
, sector_num
, buf
, nb_sectors
)) < 0)
1157 sector_num
+= nb_sectors
;
1158 len
= nb_sectors
<< BDRV_SECTOR_BITS
;
1163 /* add data from the last sector */
1165 if ((ret
= bdrv_read(bs
, sector_num
, tmp_buf
, 1)) < 0)
1167 memcpy(buf
, tmp_buf
, count
);
1172 int bdrv_pwrite(BlockDriverState
*bs
, int64_t offset
,
1173 const void *buf
, int count1
)
1175 uint8_t tmp_buf
[BDRV_SECTOR_SIZE
];
1176 int len
, nb_sectors
, count
;
1181 /* first write to align to sector start */
1182 len
= (BDRV_SECTOR_SIZE
- offset
) & (BDRV_SECTOR_SIZE
- 1);
1185 sector_num
= offset
>> BDRV_SECTOR_BITS
;
1187 if ((ret
= bdrv_read(bs
, sector_num
, tmp_buf
, 1)) < 0)
1189 memcpy(tmp_buf
+ (offset
& (BDRV_SECTOR_SIZE
- 1)), buf
, len
);
1190 if ((ret
= bdrv_write(bs
, sector_num
, tmp_buf
, 1)) < 0)
1199 /* write the sectors "in place" */
1200 nb_sectors
= count
>> BDRV_SECTOR_BITS
;
1201 if (nb_sectors
> 0) {
1202 if ((ret
= bdrv_write(bs
, sector_num
, buf
, nb_sectors
)) < 0)
1204 sector_num
+= nb_sectors
;
1205 len
= nb_sectors
<< BDRV_SECTOR_BITS
;
1210 /* add data from the last sector */
1212 if ((ret
= bdrv_read(bs
, sector_num
, tmp_buf
, 1)) < 0)
1214 memcpy(tmp_buf
, buf
, count
);
1215 if ((ret
= bdrv_write(bs
, sector_num
, tmp_buf
, 1)) < 0)
1222 * Writes to the file and ensures that no writes are reordered across this
1223 * request (acts as a barrier)
1225 * Returns 0 on success, -errno in error cases.
1227 int bdrv_pwrite_sync(BlockDriverState
*bs
, int64_t offset
,
1228 const void *buf
, int count
)
1232 ret
= bdrv_pwrite(bs
, offset
, buf
, count
);
1237 /* No flush needed for cache modes that use O_DSYNC */
1238 if ((bs
->open_flags
& BDRV_O_CACHE_WB
) != 0) {
1246 * Handle a read request in coroutine context
1248 static int coroutine_fn
bdrv_co_do_readv(BlockDriverState
*bs
,
1249 int64_t sector_num
, int nb_sectors
, QEMUIOVector
*qiov
)
1251 BlockDriver
*drv
= bs
->drv
;
1256 if (bdrv_check_request(bs
, sector_num
, nb_sectors
)) {
1260 return drv
->bdrv_co_readv(bs
, sector_num
, nb_sectors
, qiov
);
1263 int coroutine_fn
bdrv_co_readv(BlockDriverState
*bs
, int64_t sector_num
,
1264 int nb_sectors
, QEMUIOVector
*qiov
)
1266 trace_bdrv_co_readv(bs
, sector_num
, nb_sectors
);
1268 return bdrv_co_do_readv(bs
, sector_num
, nb_sectors
, qiov
);
1272 * Handle a write request in coroutine context
1274 static int coroutine_fn
bdrv_co_do_writev(BlockDriverState
*bs
,
1275 int64_t sector_num
, int nb_sectors
, QEMUIOVector
*qiov
)
1277 BlockDriver
*drv
= bs
->drv
;
1283 if (bs
->read_only
) {
1286 if (bdrv_check_request(bs
, sector_num
, nb_sectors
)) {
1290 ret
= drv
->bdrv_co_writev(bs
, sector_num
, nb_sectors
, qiov
);
1292 if (bs
->dirty_bitmap
) {
1293 set_dirty_bitmap(bs
, sector_num
, nb_sectors
, 1);
1296 if (bs
->wr_highest_sector
< sector_num
+ nb_sectors
- 1) {
1297 bs
->wr_highest_sector
= sector_num
+ nb_sectors
- 1;
1303 int coroutine_fn
bdrv_co_writev(BlockDriverState
*bs
, int64_t sector_num
,
1304 int nb_sectors
, QEMUIOVector
*qiov
)
1306 trace_bdrv_co_writev(bs
, sector_num
, nb_sectors
);
1308 return bdrv_co_do_writev(bs
, sector_num
, nb_sectors
, qiov
);
1312 * Truncate file to 'offset' bytes (needed only for file protocols)
1314 int bdrv_truncate(BlockDriverState
*bs
, int64_t offset
)
1316 BlockDriver
*drv
= bs
->drv
;
1320 if (!drv
->bdrv_truncate
)
1324 if (bdrv_in_use(bs
))
1326 ret
= drv
->bdrv_truncate(bs
, offset
);
1328 ret
= refresh_total_sectors(bs
, offset
>> BDRV_SECTOR_BITS
);
1329 bdrv_dev_resize_cb(bs
);
1335 * Length of a allocated file in bytes. Sparse files are counted by actual
1336 * allocated space. Return < 0 if error or unknown.
1338 int64_t bdrv_get_allocated_file_size(BlockDriverState
*bs
)
1340 BlockDriver
*drv
= bs
->drv
;
1344 if (drv
->bdrv_get_allocated_file_size
) {
1345 return drv
->bdrv_get_allocated_file_size(bs
);
1348 return bdrv_get_allocated_file_size(bs
->file
);
1354 * Length of a file in bytes. Return < 0 if error or unknown.
1356 int64_t bdrv_getlength(BlockDriverState
*bs
)
1358 BlockDriver
*drv
= bs
->drv
;
1362 if (bs
->growable
|| bdrv_dev_has_removable_media(bs
)) {
1363 if (drv
->bdrv_getlength
) {
1364 return drv
->bdrv_getlength(bs
);
1367 return bs
->total_sectors
* BDRV_SECTOR_SIZE
;
1370 /* return 0 as number of sectors if no device present or error */
1371 void bdrv_get_geometry(BlockDriverState
*bs
, uint64_t *nb_sectors_ptr
)
1374 length
= bdrv_getlength(bs
);
1378 length
= length
>> BDRV_SECTOR_BITS
;
1379 *nb_sectors_ptr
= length
;
1383 uint8_t boot_ind
; /* 0x80 - active */
1384 uint8_t head
; /* starting head */
1385 uint8_t sector
; /* starting sector */
1386 uint8_t cyl
; /* starting cylinder */
1387 uint8_t sys_ind
; /* What partition type */
1388 uint8_t end_head
; /* end head */
1389 uint8_t end_sector
; /* end sector */
1390 uint8_t end_cyl
; /* end cylinder */
1391 uint32_t start_sect
; /* starting sector counting from 0 */
1392 uint32_t nr_sects
; /* nr of sectors in partition */
1395 /* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
1396 static int guess_disk_lchs(BlockDriverState
*bs
,
1397 int *pcylinders
, int *pheads
, int *psectors
)
1399 uint8_t buf
[BDRV_SECTOR_SIZE
];
1400 int ret
, i
, heads
, sectors
, cylinders
;
1401 struct partition
*p
;
1403 uint64_t nb_sectors
;
1405 bdrv_get_geometry(bs
, &nb_sectors
);
1407 ret
= bdrv_read(bs
, 0, buf
, 1);
1410 /* test msdos magic */
1411 if (buf
[510] != 0x55 || buf
[511] != 0xaa)
1413 for(i
= 0; i
< 4; i
++) {
1414 p
= ((struct partition
*)(buf
+ 0x1be)) + i
;
1415 nr_sects
= le32_to_cpu(p
->nr_sects
);
1416 if (nr_sects
&& p
->end_head
) {
1417 /* We make the assumption that the partition terminates on
1418 a cylinder boundary */
1419 heads
= p
->end_head
+ 1;
1420 sectors
= p
->end_sector
& 63;
1423 cylinders
= nb_sectors
/ (heads
* sectors
);
1424 if (cylinders
< 1 || cylinders
> 16383)
1427 *psectors
= sectors
;
1428 *pcylinders
= cylinders
;
1430 printf("guessed geometry: LCHS=%d %d %d\n",
1431 cylinders
, heads
, sectors
);
1439 void bdrv_guess_geometry(BlockDriverState
*bs
, int *pcyls
, int *pheads
, int *psecs
)
1441 int translation
, lba_detected
= 0;
1442 int cylinders
, heads
, secs
;
1443 uint64_t nb_sectors
;
1445 /* if a geometry hint is available, use it */
1446 bdrv_get_geometry(bs
, &nb_sectors
);
1447 bdrv_get_geometry_hint(bs
, &cylinders
, &heads
, &secs
);
1448 translation
= bdrv_get_translation_hint(bs
);
1449 if (cylinders
!= 0) {
1454 if (guess_disk_lchs(bs
, &cylinders
, &heads
, &secs
) == 0) {
1456 /* if heads > 16, it means that a BIOS LBA
1457 translation was active, so the default
1458 hardware geometry is OK */
1460 goto default_geometry
;
1465 /* disable any translation to be in sync with
1466 the logical geometry */
1467 if (translation
== BIOS_ATA_TRANSLATION_AUTO
) {
1468 bdrv_set_translation_hint(bs
,
1469 BIOS_ATA_TRANSLATION_NONE
);
1474 /* if no geometry, use a standard physical disk geometry */
1475 cylinders
= nb_sectors
/ (16 * 63);
1477 if (cylinders
> 16383)
1479 else if (cylinders
< 2)
1484 if ((lba_detected
== 1) && (translation
== BIOS_ATA_TRANSLATION_AUTO
)) {
1485 if ((*pcyls
* *pheads
) <= 131072) {
1486 bdrv_set_translation_hint(bs
,
1487 BIOS_ATA_TRANSLATION_LARGE
);
1489 bdrv_set_translation_hint(bs
,
1490 BIOS_ATA_TRANSLATION_LBA
);
1494 bdrv_set_geometry_hint(bs
, *pcyls
, *pheads
, *psecs
);
1498 void bdrv_set_geometry_hint(BlockDriverState
*bs
,
1499 int cyls
, int heads
, int secs
)
1506 void bdrv_set_translation_hint(BlockDriverState
*bs
, int translation
)
1508 bs
->translation
= translation
;
1511 void bdrv_get_geometry_hint(BlockDriverState
*bs
,
1512 int *pcyls
, int *pheads
, int *psecs
)
1515 *pheads
= bs
->heads
;
1519 /* Recognize floppy formats */
1520 typedef struct FDFormat
{
1527 static const FDFormat fd_formats
[] = {
1528 /* First entry is default format */
1529 /* 1.44 MB 3"1/2 floppy disks */
1530 { FDRIVE_DRV_144
, 18, 80, 1, },
1531 { FDRIVE_DRV_144
, 20, 80, 1, },
1532 { FDRIVE_DRV_144
, 21, 80, 1, },
1533 { FDRIVE_DRV_144
, 21, 82, 1, },
1534 { FDRIVE_DRV_144
, 21, 83, 1, },
1535 { FDRIVE_DRV_144
, 22, 80, 1, },
1536 { FDRIVE_DRV_144
, 23, 80, 1, },
1537 { FDRIVE_DRV_144
, 24, 80, 1, },
1538 /* 2.88 MB 3"1/2 floppy disks */
1539 { FDRIVE_DRV_288
, 36, 80, 1, },
1540 { FDRIVE_DRV_288
, 39, 80, 1, },
1541 { FDRIVE_DRV_288
, 40, 80, 1, },
1542 { FDRIVE_DRV_288
, 44, 80, 1, },
1543 { FDRIVE_DRV_288
, 48, 80, 1, },
1544 /* 720 kB 3"1/2 floppy disks */
1545 { FDRIVE_DRV_144
, 9, 80, 1, },
1546 { FDRIVE_DRV_144
, 10, 80, 1, },
1547 { FDRIVE_DRV_144
, 10, 82, 1, },
1548 { FDRIVE_DRV_144
, 10, 83, 1, },
1549 { FDRIVE_DRV_144
, 13, 80, 1, },
1550 { FDRIVE_DRV_144
, 14, 80, 1, },
1551 /* 1.2 MB 5"1/4 floppy disks */
1552 { FDRIVE_DRV_120
, 15, 80, 1, },
1553 { FDRIVE_DRV_120
, 18, 80, 1, },
1554 { FDRIVE_DRV_120
, 18, 82, 1, },
1555 { FDRIVE_DRV_120
, 18, 83, 1, },
1556 { FDRIVE_DRV_120
, 20, 80, 1, },
1557 /* 720 kB 5"1/4 floppy disks */
1558 { FDRIVE_DRV_120
, 9, 80, 1, },
1559 { FDRIVE_DRV_120
, 11, 80, 1, },
1560 /* 360 kB 5"1/4 floppy disks */
1561 { FDRIVE_DRV_120
, 9, 40, 1, },
1562 { FDRIVE_DRV_120
, 9, 40, 0, },
1563 { FDRIVE_DRV_120
, 10, 41, 1, },
1564 { FDRIVE_DRV_120
, 10, 42, 1, },
1565 /* 320 kB 5"1/4 floppy disks */
1566 { FDRIVE_DRV_120
, 8, 40, 1, },
1567 { FDRIVE_DRV_120
, 8, 40, 0, },
1568 /* 360 kB must match 5"1/4 better than 3"1/2... */
1569 { FDRIVE_DRV_144
, 9, 80, 0, },
1571 { FDRIVE_DRV_NONE
, -1, -1, 0, },
1574 void bdrv_get_floppy_geometry_hint(BlockDriverState
*bs
, int *nb_heads
,
1575 int *max_track
, int *last_sect
,
1576 FDriveType drive_in
, FDriveType
*drive
)
1578 const FDFormat
*parse
;
1579 uint64_t nb_sectors
, size
;
1580 int i
, first_match
, match
;
1582 bdrv_get_geometry_hint(bs
, nb_heads
, max_track
, last_sect
);
1583 if (*nb_heads
!= 0 && *max_track
!= 0 && *last_sect
!= 0) {
1584 /* User defined disk */
1586 bdrv_get_geometry(bs
, &nb_sectors
);
1589 for (i
= 0; ; i
++) {
1590 parse
= &fd_formats
[i
];
1591 if (parse
->drive
== FDRIVE_DRV_NONE
) {
1594 if (drive_in
== parse
->drive
||
1595 drive_in
== FDRIVE_DRV_NONE
) {
1596 size
= (parse
->max_head
+ 1) * parse
->max_track
*
1598 if (nb_sectors
== size
) {
1602 if (first_match
== -1) {
1608 if (first_match
== -1) {
1611 match
= first_match
;
1613 parse
= &fd_formats
[match
];
1615 *nb_heads
= parse
->max_head
+ 1;
1616 *max_track
= parse
->max_track
;
1617 *last_sect
= parse
->last_sect
;
1618 *drive
= parse
->drive
;
1622 int bdrv_get_translation_hint(BlockDriverState
*bs
)
1624 return bs
->translation
;
1627 void bdrv_set_on_error(BlockDriverState
*bs
, BlockErrorAction on_read_error
,
1628 BlockErrorAction on_write_error
)
1630 bs
->on_read_error
= on_read_error
;
1631 bs
->on_write_error
= on_write_error
;
1634 BlockErrorAction
bdrv_get_on_error(BlockDriverState
*bs
, int is_read
)
1636 return is_read
? bs
->on_read_error
: bs
->on_write_error
;
1639 int bdrv_is_read_only(BlockDriverState
*bs
)
1641 return bs
->read_only
;
1644 int bdrv_is_sg(BlockDriverState
*bs
)
1649 int bdrv_enable_write_cache(BlockDriverState
*bs
)
1651 return bs
->enable_write_cache
;
1654 int bdrv_is_encrypted(BlockDriverState
*bs
)
1656 if (bs
->backing_hd
&& bs
->backing_hd
->encrypted
)
1658 return bs
->encrypted
;
1661 int bdrv_key_required(BlockDriverState
*bs
)
1663 BlockDriverState
*backing_hd
= bs
->backing_hd
;
1665 if (backing_hd
&& backing_hd
->encrypted
&& !backing_hd
->valid_key
)
1667 return (bs
->encrypted
&& !bs
->valid_key
);
1670 int bdrv_set_key(BlockDriverState
*bs
, const char *key
)
1673 if (bs
->backing_hd
&& bs
->backing_hd
->encrypted
) {
1674 ret
= bdrv_set_key(bs
->backing_hd
, key
);
1680 if (!bs
->encrypted
) {
1682 } else if (!bs
->drv
|| !bs
->drv
->bdrv_set_key
) {
1685 ret
= bs
->drv
->bdrv_set_key(bs
, key
);
1688 } else if (!bs
->valid_key
) {
1690 /* call the change callback now, we skipped it on open */
1691 bdrv_dev_change_media_cb(bs
, true);
1696 void bdrv_get_format(BlockDriverState
*bs
, char *buf
, int buf_size
)
1701 pstrcpy(buf
, buf_size
, bs
->drv
->format_name
);
1705 void bdrv_iterate_format(void (*it
)(void *opaque
, const char *name
),
1710 QLIST_FOREACH(drv
, &bdrv_drivers
, list
) {
1711 it(opaque
, drv
->format_name
);
1715 BlockDriverState
*bdrv_find(const char *name
)
1717 BlockDriverState
*bs
;
1719 QTAILQ_FOREACH(bs
, &bdrv_states
, list
) {
1720 if (!strcmp(name
, bs
->device_name
)) {
1727 BlockDriverState
*bdrv_next(BlockDriverState
*bs
)
1730 return QTAILQ_FIRST(&bdrv_states
);
1732 return QTAILQ_NEXT(bs
, list
);
1735 void bdrv_iterate(void (*it
)(void *opaque
, BlockDriverState
*bs
), void *opaque
)
1737 BlockDriverState
*bs
;
1739 QTAILQ_FOREACH(bs
, &bdrv_states
, list
) {
1744 const char *bdrv_get_device_name(BlockDriverState
*bs
)
1746 return bs
->device_name
;
1749 void bdrv_flush_all(void)
1751 BlockDriverState
*bs
;
1753 QTAILQ_FOREACH(bs
, &bdrv_states
, list
) {
1754 if (!bdrv_is_read_only(bs
) && bdrv_is_inserted(bs
)) {
1760 int bdrv_has_zero_init(BlockDriverState
*bs
)
1764 if (bs
->drv
->bdrv_has_zero_init
) {
1765 return bs
->drv
->bdrv_has_zero_init(bs
);
1772 * Returns true iff the specified sector is present in the disk image. Drivers
1773 * not implementing the functionality are assumed to not support backing files,
1774 * hence all their sectors are reported as allocated.
1776 * 'pnum' is set to the number of sectors (including and immediately following
1777 * the specified sector) that are known to be in the same
1778 * allocated/unallocated state.
1780 * 'nb_sectors' is the max value 'pnum' should be set to.
1782 int bdrv_is_allocated(BlockDriverState
*bs
, int64_t sector_num
, int nb_sectors
,
1786 if (!bs
->drv
->bdrv_is_allocated
) {
1787 if (sector_num
>= bs
->total_sectors
) {
1791 n
= bs
->total_sectors
- sector_num
;
1792 *pnum
= (n
< nb_sectors
) ? (n
) : (nb_sectors
);
1795 return bs
->drv
->bdrv_is_allocated(bs
, sector_num
, nb_sectors
, pnum
);
1798 void bdrv_mon_event(const BlockDriverState
*bdrv
,
1799 BlockMonEventAction action
, int is_read
)
1802 const char *action_str
;
1805 case BDRV_ACTION_REPORT
:
1806 action_str
= "report";
1808 case BDRV_ACTION_IGNORE
:
1809 action_str
= "ignore";
1811 case BDRV_ACTION_STOP
:
1812 action_str
= "stop";
1818 data
= qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
1821 is_read
? "read" : "write");
1822 monitor_protocol_event(QEVENT_BLOCK_IO_ERROR
, data
);
1824 qobject_decref(data
);
1827 static void bdrv_print_dict(QObject
*obj
, void *opaque
)
1830 Monitor
*mon
= opaque
;
1832 bs_dict
= qobject_to_qdict(obj
);
1834 monitor_printf(mon
, "%s: removable=%d",
1835 qdict_get_str(bs_dict
, "device"),
1836 qdict_get_bool(bs_dict
, "removable"));
1838 if (qdict_get_bool(bs_dict
, "removable")) {
1839 monitor_printf(mon
, " locked=%d", qdict_get_bool(bs_dict
, "locked"));
1840 monitor_printf(mon
, " tray-open=%d",
1841 qdict_get_bool(bs_dict
, "tray-open"));
1844 if (qdict_haskey(bs_dict
, "io-status")) {
1845 monitor_printf(mon
, " io-status=%s", qdict_get_str(bs_dict
, "io-status"));
1848 if (qdict_haskey(bs_dict
, "inserted")) {
1849 QDict
*qdict
= qobject_to_qdict(qdict_get(bs_dict
, "inserted"));
1851 monitor_printf(mon
, " file=");
1852 monitor_print_filename(mon
, qdict_get_str(qdict
, "file"));
1853 if (qdict_haskey(qdict
, "backing_file")) {
1854 monitor_printf(mon
, " backing_file=");
1855 monitor_print_filename(mon
, qdict_get_str(qdict
, "backing_file"));
1857 monitor_printf(mon
, " ro=%d drv=%s encrypted=%d",
1858 qdict_get_bool(qdict
, "ro"),
1859 qdict_get_str(qdict
, "drv"),
1860 qdict_get_bool(qdict
, "encrypted"));
1862 monitor_printf(mon
, " [not inserted]");
1865 monitor_printf(mon
, "\n");
1868 void bdrv_info_print(Monitor
*mon
, const QObject
*data
)
1870 qlist_iter(qobject_to_qlist(data
), bdrv_print_dict
, mon
);
1873 static const char *const io_status_name
[BDRV_IOS_MAX
] = {
1874 [BDRV_IOS_OK
] = "ok",
1875 [BDRV_IOS_FAILED
] = "failed",
1876 [BDRV_IOS_ENOSPC
] = "nospace",
1879 void bdrv_info(Monitor
*mon
, QObject
**ret_data
)
1882 BlockDriverState
*bs
;
1884 bs_list
= qlist_new();
1886 QTAILQ_FOREACH(bs
, &bdrv_states
, list
) {
1890 bs_obj
= qobject_from_jsonf("{ 'device': %s, 'type': 'unknown', "
1891 "'removable': %i, 'locked': %i }",
1893 bdrv_dev_has_removable_media(bs
),
1894 bdrv_dev_is_medium_locked(bs
));
1895 bs_dict
= qobject_to_qdict(bs_obj
);
1897 if (bdrv_dev_has_removable_media(bs
)) {
1898 qdict_put(bs_dict
, "tray-open",
1899 qbool_from_int(bdrv_dev_is_tray_open(bs
)));
1902 if (bdrv_iostatus_is_enabled(bs
)) {
1903 qdict_put(bs_dict
, "io-status",
1904 qstring_from_str(io_status_name
[bs
->iostatus
]));
1910 obj
= qobject_from_jsonf("{ 'file': %s, 'ro': %i, 'drv': %s, "
1911 "'encrypted': %i }",
1912 bs
->filename
, bs
->read_only
,
1913 bs
->drv
->format_name
,
1914 bdrv_is_encrypted(bs
));
1915 if (bs
->backing_file
[0] != '\0') {
1916 QDict
*qdict
= qobject_to_qdict(obj
);
1917 qdict_put(qdict
, "backing_file",
1918 qstring_from_str(bs
->backing_file
));
1921 qdict_put_obj(bs_dict
, "inserted", obj
);
1923 qlist_append_obj(bs_list
, bs_obj
);
1926 *ret_data
= QOBJECT(bs_list
);
1929 static void bdrv_stats_iter(QObject
*data
, void *opaque
)
1932 Monitor
*mon
= opaque
;
1934 qdict
= qobject_to_qdict(data
);
1935 monitor_printf(mon
, "%s:", qdict_get_str(qdict
, "device"));
1937 qdict
= qobject_to_qdict(qdict_get(qdict
, "stats"));
1938 monitor_printf(mon
, " rd_bytes=%" PRId64
1939 " wr_bytes=%" PRId64
1940 " rd_operations=%" PRId64
1941 " wr_operations=%" PRId64
1942 " flush_operations=%" PRId64
1943 " wr_total_time_ns=%" PRId64
1944 " rd_total_time_ns=%" PRId64
1945 " flush_total_time_ns=%" PRId64
1947 qdict_get_int(qdict
, "rd_bytes"),
1948 qdict_get_int(qdict
, "wr_bytes"),
1949 qdict_get_int(qdict
, "rd_operations"),
1950 qdict_get_int(qdict
, "wr_operations"),
1951 qdict_get_int(qdict
, "flush_operations"),
1952 qdict_get_int(qdict
, "wr_total_time_ns"),
1953 qdict_get_int(qdict
, "rd_total_time_ns"),
1954 qdict_get_int(qdict
, "flush_total_time_ns"));
1957 void bdrv_stats_print(Monitor
*mon
, const QObject
*data
)
1959 qlist_iter(qobject_to_qlist(data
), bdrv_stats_iter
, mon
);
1962 static QObject
* bdrv_info_stats_bs(BlockDriverState
*bs
)
1967 res
= qobject_from_jsonf("{ 'stats': {"
1968 "'rd_bytes': %" PRId64
","
1969 "'wr_bytes': %" PRId64
","
1970 "'rd_operations': %" PRId64
","
1971 "'wr_operations': %" PRId64
","
1972 "'wr_highest_offset': %" PRId64
","
1973 "'flush_operations': %" PRId64
","
1974 "'wr_total_time_ns': %" PRId64
","
1975 "'rd_total_time_ns': %" PRId64
","
1976 "'flush_total_time_ns': %" PRId64
1978 bs
->nr_bytes
[BDRV_ACCT_READ
],
1979 bs
->nr_bytes
[BDRV_ACCT_WRITE
],
1980 bs
->nr_ops
[BDRV_ACCT_READ
],
1981 bs
->nr_ops
[BDRV_ACCT_WRITE
],
1982 bs
->wr_highest_sector
*
1983 (uint64_t)BDRV_SECTOR_SIZE
,
1984 bs
->nr_ops
[BDRV_ACCT_FLUSH
],
1985 bs
->total_time_ns
[BDRV_ACCT_WRITE
],
1986 bs
->total_time_ns
[BDRV_ACCT_READ
],
1987 bs
->total_time_ns
[BDRV_ACCT_FLUSH
]);
1988 dict
= qobject_to_qdict(res
);
1990 if (*bs
->device_name
) {
1991 qdict_put(dict
, "device", qstring_from_str(bs
->device_name
));
1995 QObject
*parent
= bdrv_info_stats_bs(bs
->file
);
1996 qdict_put_obj(dict
, "parent", parent
);
2002 void bdrv_info_stats(Monitor
*mon
, QObject
**ret_data
)
2006 BlockDriverState
*bs
;
2008 devices
= qlist_new();
2010 QTAILQ_FOREACH(bs
, &bdrv_states
, list
) {
2011 obj
= bdrv_info_stats_bs(bs
);
2012 qlist_append_obj(devices
, obj
);
2015 *ret_data
= QOBJECT(devices
);
2018 const char *bdrv_get_encrypted_filename(BlockDriverState
*bs
)
2020 if (bs
->backing_hd
&& bs
->backing_hd
->encrypted
)
2021 return bs
->backing_file
;
2022 else if (bs
->encrypted
)
2023 return bs
->filename
;
2028 void bdrv_get_backing_filename(BlockDriverState
*bs
,
2029 char *filename
, int filename_size
)
2031 if (!bs
->backing_file
) {
2032 pstrcpy(filename
, filename_size
, "");
2034 pstrcpy(filename
, filename_size
, bs
->backing_file
);
2038 int bdrv_write_compressed(BlockDriverState
*bs
, int64_t sector_num
,
2039 const uint8_t *buf
, int nb_sectors
)
2041 BlockDriver
*drv
= bs
->drv
;
2044 if (!drv
->bdrv_write_compressed
)
2046 if (bdrv_check_request(bs
, sector_num
, nb_sectors
))
2049 if (bs
->dirty_bitmap
) {
2050 set_dirty_bitmap(bs
, sector_num
, nb_sectors
, 1);
2053 return drv
->bdrv_write_compressed(bs
, sector_num
, buf
, nb_sectors
);
2056 int bdrv_get_info(BlockDriverState
*bs
, BlockDriverInfo
*bdi
)
2058 BlockDriver
*drv
= bs
->drv
;
2061 if (!drv
->bdrv_get_info
)
2063 memset(bdi
, 0, sizeof(*bdi
));
2064 return drv
->bdrv_get_info(bs
, bdi
);
2067 int bdrv_save_vmstate(BlockDriverState
*bs
, const uint8_t *buf
,
2068 int64_t pos
, int size
)
2070 BlockDriver
*drv
= bs
->drv
;
2073 if (drv
->bdrv_save_vmstate
)
2074 return drv
->bdrv_save_vmstate(bs
, buf
, pos
, size
);
2076 return bdrv_save_vmstate(bs
->file
, buf
, pos
, size
);
2080 int bdrv_load_vmstate(BlockDriverState
*bs
, uint8_t *buf
,
2081 int64_t pos
, int size
)
2083 BlockDriver
*drv
= bs
->drv
;
2086 if (drv
->bdrv_load_vmstate
)
2087 return drv
->bdrv_load_vmstate(bs
, buf
, pos
, size
);
2089 return bdrv_load_vmstate(bs
->file
, buf
, pos
, size
);
2093 void bdrv_debug_event(BlockDriverState
*bs
, BlkDebugEvent event
)
2095 BlockDriver
*drv
= bs
->drv
;
2097 if (!drv
|| !drv
->bdrv_debug_event
) {
2101 return drv
->bdrv_debug_event(bs
, event
);
2105 /**************************************************************/
2106 /* handling of snapshots */
2108 int bdrv_can_snapshot(BlockDriverState
*bs
)
2110 BlockDriver
*drv
= bs
->drv
;
2111 if (!drv
|| !bdrv_is_inserted(bs
) || bdrv_is_read_only(bs
)) {
2115 if (!drv
->bdrv_snapshot_create
) {
2116 if (bs
->file
!= NULL
) {
2117 return bdrv_can_snapshot(bs
->file
);
2125 int bdrv_is_snapshot(BlockDriverState
*bs
)
2127 return !!(bs
->open_flags
& BDRV_O_SNAPSHOT
);
2130 BlockDriverState
*bdrv_snapshots(void)
2132 BlockDriverState
*bs
;
2135 return bs_snapshots
;
2139 while ((bs
= bdrv_next(bs
))) {
2140 if (bdrv_can_snapshot(bs
)) {
2148 int bdrv_snapshot_create(BlockDriverState
*bs
,
2149 QEMUSnapshotInfo
*sn_info
)
2151 BlockDriver
*drv
= bs
->drv
;
2154 if (drv
->bdrv_snapshot_create
)
2155 return drv
->bdrv_snapshot_create(bs
, sn_info
);
2157 return bdrv_snapshot_create(bs
->file
, sn_info
);
2161 int bdrv_snapshot_goto(BlockDriverState
*bs
,
2162 const char *snapshot_id
)
2164 BlockDriver
*drv
= bs
->drv
;
2169 if (drv
->bdrv_snapshot_goto
)
2170 return drv
->bdrv_snapshot_goto(bs
, snapshot_id
);
2173 drv
->bdrv_close(bs
);
2174 ret
= bdrv_snapshot_goto(bs
->file
, snapshot_id
);
2175 open_ret
= drv
->bdrv_open(bs
, bs
->open_flags
);
2177 bdrv_delete(bs
->file
);
2187 int bdrv_snapshot_delete(BlockDriverState
*bs
, const char *snapshot_id
)
2189 BlockDriver
*drv
= bs
->drv
;
2192 if (drv
->bdrv_snapshot_delete
)
2193 return drv
->bdrv_snapshot_delete(bs
, snapshot_id
);
2195 return bdrv_snapshot_delete(bs
->file
, snapshot_id
);
2199 int bdrv_snapshot_list(BlockDriverState
*bs
,
2200 QEMUSnapshotInfo
**psn_info
)
2202 BlockDriver
*drv
= bs
->drv
;
2205 if (drv
->bdrv_snapshot_list
)
2206 return drv
->bdrv_snapshot_list(bs
, psn_info
);
2208 return bdrv_snapshot_list(bs
->file
, psn_info
);
2212 int bdrv_snapshot_load_tmp(BlockDriverState
*bs
,
2213 const char *snapshot_name
)
2215 BlockDriver
*drv
= bs
->drv
;
2219 if (!bs
->read_only
) {
2222 if (drv
->bdrv_snapshot_load_tmp
) {
2223 return drv
->bdrv_snapshot_load_tmp(bs
, snapshot_name
);
2228 #define NB_SUFFIXES 4
2230 char *get_human_readable_size(char *buf
, int buf_size
, int64_t size
)
2232 static const char suffixes
[NB_SUFFIXES
] = "KMGT";
2237 snprintf(buf
, buf_size
, "%" PRId64
, size
);
2240 for(i
= 0; i
< NB_SUFFIXES
; i
++) {
2241 if (size
< (10 * base
)) {
2242 snprintf(buf
, buf_size
, "%0.1f%c",
2243 (double)size
/ base
,
2246 } else if (size
< (1000 * base
) || i
== (NB_SUFFIXES
- 1)) {
2247 snprintf(buf
, buf_size
, "%" PRId64
"%c",
2248 ((size
+ (base
>> 1)) / base
),
2258 char *bdrv_snapshot_dump(char *buf
, int buf_size
, QEMUSnapshotInfo
*sn
)
2260 char buf1
[128], date_buf
[128], clock_buf
[128];
2270 snprintf(buf
, buf_size
,
2271 "%-10s%-20s%7s%20s%15s",
2272 "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
2276 ptm
= localtime(&ti
);
2277 strftime(date_buf
, sizeof(date_buf
),
2278 "%Y-%m-%d %H:%M:%S", ptm
);
2280 localtime_r(&ti
, &tm
);
2281 strftime(date_buf
, sizeof(date_buf
),
2282 "%Y-%m-%d %H:%M:%S", &tm
);
2284 secs
= sn
->vm_clock_nsec
/ 1000000000;
2285 snprintf(clock_buf
, sizeof(clock_buf
),
2286 "%02d:%02d:%02d.%03d",
2288 (int)((secs
/ 60) % 60),
2290 (int)((sn
->vm_clock_nsec
/ 1000000) % 1000));
2291 snprintf(buf
, buf_size
,
2292 "%-10s%-20s%7s%20s%15s",
2293 sn
->id_str
, sn
->name
,
2294 get_human_readable_size(buf1
, sizeof(buf1
), sn
->vm_state_size
),
2301 /**************************************************************/
2304 BlockDriverAIOCB
*bdrv_aio_readv(BlockDriverState
*bs
, int64_t sector_num
,
2305 QEMUIOVector
*qiov
, int nb_sectors
,
2306 BlockDriverCompletionFunc
*cb
, void *opaque
)
2308 trace_bdrv_aio_readv(bs
, sector_num
, nb_sectors
, opaque
);
2310 return bdrv_co_aio_rw_vector(bs
, sector_num
, qiov
, nb_sectors
,
2314 BlockDriverAIOCB
*bdrv_aio_writev(BlockDriverState
*bs
, int64_t sector_num
,
2315 QEMUIOVector
*qiov
, int nb_sectors
,
2316 BlockDriverCompletionFunc
*cb
, void *opaque
)
2318 trace_bdrv_aio_writev(bs
, sector_num
, nb_sectors
, opaque
);
2320 return bdrv_co_aio_rw_vector(bs
, sector_num
, qiov
, nb_sectors
,
2325 typedef struct MultiwriteCB
{
2330 BlockDriverCompletionFunc
*cb
;
2332 QEMUIOVector
*free_qiov
;
2337 static void multiwrite_user_cb(MultiwriteCB
*mcb
)
2341 for (i
= 0; i
< mcb
->num_callbacks
; i
++) {
2342 mcb
->callbacks
[i
].cb(mcb
->callbacks
[i
].opaque
, mcb
->error
);
2343 if (mcb
->callbacks
[i
].free_qiov
) {
2344 qemu_iovec_destroy(mcb
->callbacks
[i
].free_qiov
);
2346 g_free(mcb
->callbacks
[i
].free_qiov
);
2347 qemu_vfree(mcb
->callbacks
[i
].free_buf
);
2351 static void multiwrite_cb(void *opaque
, int ret
)
2353 MultiwriteCB
*mcb
= opaque
;
2355 trace_multiwrite_cb(mcb
, ret
);
2357 if (ret
< 0 && !mcb
->error
) {
2361 mcb
->num_requests
--;
2362 if (mcb
->num_requests
== 0) {
2363 multiwrite_user_cb(mcb
);
2368 static int multiwrite_req_compare(const void *a
, const void *b
)
2370 const BlockRequest
*req1
= a
, *req2
= b
;
2373 * Note that we can't simply subtract req2->sector from req1->sector
2374 * here as that could overflow the return value.
2376 if (req1
->sector
> req2
->sector
) {
2378 } else if (req1
->sector
< req2
->sector
) {
2386 * Takes a bunch of requests and tries to merge them. Returns the number of
2387 * requests that remain after merging.
2389 static int multiwrite_merge(BlockDriverState
*bs
, BlockRequest
*reqs
,
2390 int num_reqs
, MultiwriteCB
*mcb
)
2394 // Sort requests by start sector
2395 qsort(reqs
, num_reqs
, sizeof(*reqs
), &multiwrite_req_compare
);
2397 // Check if adjacent requests touch the same clusters. If so, combine them,
2398 // filling up gaps with zero sectors.
2400 for (i
= 1; i
< num_reqs
; i
++) {
2402 int64_t oldreq_last
= reqs
[outidx
].sector
+ reqs
[outidx
].nb_sectors
;
2404 // This handles the cases that are valid for all block drivers, namely
2405 // exactly sequential writes and overlapping writes.
2406 if (reqs
[i
].sector
<= oldreq_last
) {
2410 // The block driver may decide that it makes sense to combine requests
2411 // even if there is a gap of some sectors between them. In this case,
2412 // the gap is filled with zeros (therefore only applicable for yet
2413 // unused space in format like qcow2).
2414 if (!merge
&& bs
->drv
->bdrv_merge_requests
) {
2415 merge
= bs
->drv
->bdrv_merge_requests(bs
, &reqs
[outidx
], &reqs
[i
]);
2418 if (reqs
[outidx
].qiov
->niov
+ reqs
[i
].qiov
->niov
+ 1 > IOV_MAX
) {
2424 QEMUIOVector
*qiov
= g_malloc0(sizeof(*qiov
));
2425 qemu_iovec_init(qiov
,
2426 reqs
[outidx
].qiov
->niov
+ reqs
[i
].qiov
->niov
+ 1);
2428 // Add the first request to the merged one. If the requests are
2429 // overlapping, drop the last sectors of the first request.
2430 size
= (reqs
[i
].sector
- reqs
[outidx
].sector
) << 9;
2431 qemu_iovec_concat(qiov
, reqs
[outidx
].qiov
, size
);
2433 // We might need to add some zeros between the two requests
2434 if (reqs
[i
].sector
> oldreq_last
) {
2435 size_t zero_bytes
= (reqs
[i
].sector
- oldreq_last
) << 9;
2436 uint8_t *buf
= qemu_blockalign(bs
, zero_bytes
);
2437 memset(buf
, 0, zero_bytes
);
2438 qemu_iovec_add(qiov
, buf
, zero_bytes
);
2439 mcb
->callbacks
[i
].free_buf
= buf
;
2442 // Add the second request
2443 qemu_iovec_concat(qiov
, reqs
[i
].qiov
, reqs
[i
].qiov
->size
);
2445 reqs
[outidx
].nb_sectors
= qiov
->size
>> 9;
2446 reqs
[outidx
].qiov
= qiov
;
2448 mcb
->callbacks
[i
].free_qiov
= reqs
[outidx
].qiov
;
2451 reqs
[outidx
].sector
= reqs
[i
].sector
;
2452 reqs
[outidx
].nb_sectors
= reqs
[i
].nb_sectors
;
2453 reqs
[outidx
].qiov
= reqs
[i
].qiov
;
2461 * Submit multiple AIO write requests at once.
2463 * On success, the function returns 0 and all requests in the reqs array have
2464 * been submitted. In error case this function returns -1, and any of the
2465 * requests may or may not be submitted yet. In particular, this means that the
2466 * callback will be called for some of the requests, for others it won't. The
2467 * caller must check the error field of the BlockRequest to wait for the right
2468 * callbacks (if error != 0, no callback will be called).
2470 * The implementation may modify the contents of the reqs array, e.g. to merge
2471 * requests. However, the fields opaque and error are left unmodified as they
2472 * are used to signal failure for a single request to the caller.
2474 int bdrv_aio_multiwrite(BlockDriverState
*bs
, BlockRequest
*reqs
, int num_reqs
)
2476 BlockDriverAIOCB
*acb
;
2480 /* don't submit writes if we don't have a medium */
2481 if (bs
->drv
== NULL
) {
2482 for (i
= 0; i
< num_reqs
; i
++) {
2483 reqs
[i
].error
= -ENOMEDIUM
;
2488 if (num_reqs
== 0) {
2492 // Create MultiwriteCB structure
2493 mcb
= g_malloc0(sizeof(*mcb
) + num_reqs
* sizeof(*mcb
->callbacks
));
2494 mcb
->num_requests
= 0;
2495 mcb
->num_callbacks
= num_reqs
;
2497 for (i
= 0; i
< num_reqs
; i
++) {
2498 mcb
->callbacks
[i
].cb
= reqs
[i
].cb
;
2499 mcb
->callbacks
[i
].opaque
= reqs
[i
].opaque
;
2502 // Check for mergable requests
2503 num_reqs
= multiwrite_merge(bs
, reqs
, num_reqs
, mcb
);
2505 trace_bdrv_aio_multiwrite(mcb
, mcb
->num_callbacks
, num_reqs
);
2508 * Run the aio requests. As soon as one request can't be submitted
2509 * successfully, fail all requests that are not yet submitted (we must
2510 * return failure for all requests anyway)
2512 * num_requests cannot be set to the right value immediately: If
2513 * bdrv_aio_writev fails for some request, num_requests would be too high
2514 * and therefore multiwrite_cb() would never recognize the multiwrite
2515 * request as completed. We also cannot use the loop variable i to set it
2516 * when the first request fails because the callback may already have been
2517 * called for previously submitted requests. Thus, num_requests must be
2518 * incremented for each request that is submitted.
2520 * The problem that callbacks may be called early also means that we need
2521 * to take care that num_requests doesn't become 0 before all requests are
2522 * submitted - multiwrite_cb() would consider the multiwrite request
2523 * completed. A dummy request that is "completed" by a manual call to
2524 * multiwrite_cb() takes care of this.
2526 mcb
->num_requests
= 1;
2528 // Run the aio requests
2529 for (i
= 0; i
< num_reqs
; i
++) {
2530 mcb
->num_requests
++;
2531 acb
= bdrv_aio_writev(bs
, reqs
[i
].sector
, reqs
[i
].qiov
,
2532 reqs
[i
].nb_sectors
, multiwrite_cb
, mcb
);
2535 // We can only fail the whole thing if no request has been
2536 // submitted yet. Otherwise we'll wait for the submitted AIOs to
2537 // complete and report the error in the callback.
2539 trace_bdrv_aio_multiwrite_earlyfail(mcb
);
2542 trace_bdrv_aio_multiwrite_latefail(mcb
, i
);
2543 multiwrite_cb(mcb
, -EIO
);
2549 /* Complete the dummy request */
2550 multiwrite_cb(mcb
, 0);
2555 for (i
= 0; i
< mcb
->num_callbacks
; i
++) {
2556 reqs
[i
].error
= -EIO
;
2562 void bdrv_aio_cancel(BlockDriverAIOCB
*acb
)
2564 acb
->pool
->cancel(acb
);
2568 /**************************************************************/
2569 /* async block device emulation */
2571 typedef struct BlockDriverAIOCBSync
{
2572 BlockDriverAIOCB common
;
2575 /* vector translation state */
2579 } BlockDriverAIOCBSync
;
2581 static void bdrv_aio_cancel_em(BlockDriverAIOCB
*blockacb
)
2583 BlockDriverAIOCBSync
*acb
=
2584 container_of(blockacb
, BlockDriverAIOCBSync
, common
);
2585 qemu_bh_delete(acb
->bh
);
2587 qemu_aio_release(acb
);
2590 static AIOPool bdrv_em_aio_pool
= {
2591 .aiocb_size
= sizeof(BlockDriverAIOCBSync
),
2592 .cancel
= bdrv_aio_cancel_em
,
2595 static void bdrv_aio_bh_cb(void *opaque
)
2597 BlockDriverAIOCBSync
*acb
= opaque
;
2600 qemu_iovec_from_buffer(acb
->qiov
, acb
->bounce
, acb
->qiov
->size
);
2601 qemu_vfree(acb
->bounce
);
2602 acb
->common
.cb(acb
->common
.opaque
, acb
->ret
);
2603 qemu_bh_delete(acb
->bh
);
2605 qemu_aio_release(acb
);
2608 static BlockDriverAIOCB
*bdrv_aio_rw_vector(BlockDriverState
*bs
,
2612 BlockDriverCompletionFunc
*cb
,
2617 BlockDriverAIOCBSync
*acb
;
2619 acb
= qemu_aio_get(&bdrv_em_aio_pool
, bs
, cb
, opaque
);
2620 acb
->is_write
= is_write
;
2622 acb
->bounce
= qemu_blockalign(bs
, qiov
->size
);
2625 acb
->bh
= qemu_bh_new(bdrv_aio_bh_cb
, acb
);
2628 qemu_iovec_to_buffer(acb
->qiov
, acb
->bounce
);
2629 acb
->ret
= bs
->drv
->bdrv_write(bs
, sector_num
, acb
->bounce
, nb_sectors
);
2631 acb
->ret
= bs
->drv
->bdrv_read(bs
, sector_num
, acb
->bounce
, nb_sectors
);
2634 qemu_bh_schedule(acb
->bh
);
2636 return &acb
->common
;
2639 static BlockDriverAIOCB
*bdrv_aio_readv_em(BlockDriverState
*bs
,
2640 int64_t sector_num
, QEMUIOVector
*qiov
, int nb_sectors
,
2641 BlockDriverCompletionFunc
*cb
, void *opaque
)
2643 return bdrv_aio_rw_vector(bs
, sector_num
, qiov
, nb_sectors
, cb
, opaque
, 0);
2646 static BlockDriverAIOCB
*bdrv_aio_writev_em(BlockDriverState
*bs
,
2647 int64_t sector_num
, QEMUIOVector
*qiov
, int nb_sectors
,
2648 BlockDriverCompletionFunc
*cb
, void *opaque
)
2650 return bdrv_aio_rw_vector(bs
, sector_num
, qiov
, nb_sectors
, cb
, opaque
, 1);
2654 typedef struct BlockDriverAIOCBCoroutine
{
2655 BlockDriverAIOCB common
;
2659 } BlockDriverAIOCBCoroutine
;
2661 static void bdrv_aio_co_cancel_em(BlockDriverAIOCB
*blockacb
)
2666 static AIOPool bdrv_em_co_aio_pool
= {
2667 .aiocb_size
= sizeof(BlockDriverAIOCBCoroutine
),
2668 .cancel
= bdrv_aio_co_cancel_em
,
2671 static void bdrv_co_em_bh(void *opaque
)
2673 BlockDriverAIOCBCoroutine
*acb
= opaque
;
2675 acb
->common
.cb(acb
->common
.opaque
, acb
->req
.error
);
2676 qemu_bh_delete(acb
->bh
);
2677 qemu_aio_release(acb
);
2680 /* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
2681 static void coroutine_fn
bdrv_co_do_rw(void *opaque
)
2683 BlockDriverAIOCBCoroutine
*acb
= opaque
;
2684 BlockDriverState
*bs
= acb
->common
.bs
;
2686 if (!acb
->is_write
) {
2687 acb
->req
.error
= bdrv_co_do_readv(bs
, acb
->req
.sector
,
2688 acb
->req
.nb_sectors
, acb
->req
.qiov
);
2690 acb
->req
.error
= bdrv_co_do_writev(bs
, acb
->req
.sector
,
2691 acb
->req
.nb_sectors
, acb
->req
.qiov
);
2694 acb
->bh
= qemu_bh_new(bdrv_co_em_bh
, acb
);
2695 qemu_bh_schedule(acb
->bh
);
2698 static BlockDriverAIOCB
*bdrv_co_aio_rw_vector(BlockDriverState
*bs
,
2702 BlockDriverCompletionFunc
*cb
,
2707 BlockDriverAIOCBCoroutine
*acb
;
2709 acb
= qemu_aio_get(&bdrv_em_co_aio_pool
, bs
, cb
, opaque
);
2710 acb
->req
.sector
= sector_num
;
2711 acb
->req
.nb_sectors
= nb_sectors
;
2712 acb
->req
.qiov
= qiov
;
2713 acb
->is_write
= is_write
;
2715 co
= qemu_coroutine_create(bdrv_co_do_rw
);
2716 qemu_coroutine_enter(co
, acb
);
2718 return &acb
->common
;
2721 static void coroutine_fn
bdrv_aio_flush_co_entry(void *opaque
)
2723 BlockDriverAIOCBCoroutine
*acb
= opaque
;
2724 BlockDriverState
*bs
= acb
->common
.bs
;
2726 acb
->req
.error
= bdrv_co_flush(bs
);
2727 acb
->bh
= qemu_bh_new(bdrv_co_em_bh
, acb
);
2728 qemu_bh_schedule(acb
->bh
);
2731 BlockDriverAIOCB
*bdrv_aio_flush(BlockDriverState
*bs
,
2732 BlockDriverCompletionFunc
*cb
, void *opaque
)
2734 trace_bdrv_aio_flush(bs
, opaque
);
2737 BlockDriverAIOCBCoroutine
*acb
;
2739 acb
= qemu_aio_get(&bdrv_em_co_aio_pool
, bs
, cb
, opaque
);
2740 co
= qemu_coroutine_create(bdrv_aio_flush_co_entry
);
2741 qemu_coroutine_enter(co
, acb
);
2743 return &acb
->common
;
2746 static void coroutine_fn
bdrv_aio_discard_co_entry(void *opaque
)
2748 BlockDriverAIOCBCoroutine
*acb
= opaque
;
2749 BlockDriverState
*bs
= acb
->common
.bs
;
2751 acb
->req
.error
= bdrv_co_discard(bs
, acb
->req
.sector
, acb
->req
.nb_sectors
);
2752 acb
->bh
= qemu_bh_new(bdrv_co_em_bh
, acb
);
2753 qemu_bh_schedule(acb
->bh
);
2756 BlockDriverAIOCB
*bdrv_aio_discard(BlockDriverState
*bs
,
2757 int64_t sector_num
, int nb_sectors
,
2758 BlockDriverCompletionFunc
*cb
, void *opaque
)
2761 BlockDriverAIOCBCoroutine
*acb
;
2763 trace_bdrv_aio_discard(bs
, sector_num
, nb_sectors
, opaque
);
2765 acb
= qemu_aio_get(&bdrv_em_co_aio_pool
, bs
, cb
, opaque
);
2766 acb
->req
.sector
= sector_num
;
2767 acb
->req
.nb_sectors
= nb_sectors
;
2768 co
= qemu_coroutine_create(bdrv_aio_discard_co_entry
);
2769 qemu_coroutine_enter(co
, acb
);
2771 return &acb
->common
;
2774 void bdrv_init(void)
2776 module_call_init(MODULE_INIT_BLOCK
);
2779 void bdrv_init_with_whitelist(void)
2781 use_bdrv_whitelist
= 1;
2785 void *qemu_aio_get(AIOPool
*pool
, BlockDriverState
*bs
,
2786 BlockDriverCompletionFunc
*cb
, void *opaque
)
2788 BlockDriverAIOCB
*acb
;
2790 if (pool
->free_aiocb
) {
2791 acb
= pool
->free_aiocb
;
2792 pool
->free_aiocb
= acb
->next
;
2794 acb
= g_malloc0(pool
->aiocb_size
);
2799 acb
->opaque
= opaque
;
2803 void qemu_aio_release(void *p
)
2805 BlockDriverAIOCB
*acb
= (BlockDriverAIOCB
*)p
;
2806 AIOPool
*pool
= acb
->pool
;
2807 acb
->next
= pool
->free_aiocb
;
2808 pool
->free_aiocb
= acb
;
2811 /**************************************************************/
2812 /* Coroutine block device emulation */
2814 typedef struct CoroutineIOCompletion
{
2815 Coroutine
*coroutine
;
2817 } CoroutineIOCompletion
;
2819 static void bdrv_co_io_em_complete(void *opaque
, int ret
)
2821 CoroutineIOCompletion
*co
= opaque
;
2824 qemu_coroutine_enter(co
->coroutine
, NULL
);
2827 static int coroutine_fn
bdrv_co_io_em(BlockDriverState
*bs
, int64_t sector_num
,
2828 int nb_sectors
, QEMUIOVector
*iov
,
2831 CoroutineIOCompletion co
= {
2832 .coroutine
= qemu_coroutine_self(),
2834 BlockDriverAIOCB
*acb
;
2837 acb
= bs
->drv
->bdrv_aio_writev(bs
, sector_num
, iov
, nb_sectors
,
2838 bdrv_co_io_em_complete
, &co
);
2840 acb
= bs
->drv
->bdrv_aio_readv(bs
, sector_num
, iov
, nb_sectors
,
2841 bdrv_co_io_em_complete
, &co
);
2844 trace_bdrv_co_io_em(bs
, sector_num
, nb_sectors
, is_write
, acb
);
2848 qemu_coroutine_yield();
2853 static int coroutine_fn
bdrv_co_readv_em(BlockDriverState
*bs
,
2854 int64_t sector_num
, int nb_sectors
,
2857 return bdrv_co_io_em(bs
, sector_num
, nb_sectors
, iov
, false);
2860 static int coroutine_fn
bdrv_co_writev_em(BlockDriverState
*bs
,
2861 int64_t sector_num
, int nb_sectors
,
2864 return bdrv_co_io_em(bs
, sector_num
, nb_sectors
, iov
, true);
2867 static void coroutine_fn
bdrv_flush_co_entry(void *opaque
)
2869 RwCo
*rwco
= opaque
;
2871 rwco
->ret
= bdrv_co_flush(rwco
->bs
);
2874 int coroutine_fn
bdrv_co_flush(BlockDriverState
*bs
)
2876 if (bs
->open_flags
& BDRV_O_NO_FLUSH
) {
2878 } else if (!bs
->drv
) {
2880 } else if (bs
->drv
->bdrv_co_flush
) {
2881 return bs
->drv
->bdrv_co_flush(bs
);
2882 } else if (bs
->drv
->bdrv_aio_flush
) {
2883 BlockDriverAIOCB
*acb
;
2884 CoroutineIOCompletion co
= {
2885 .coroutine
= qemu_coroutine_self(),
2888 acb
= bs
->drv
->bdrv_aio_flush(bs
, bdrv_co_io_em_complete
, &co
);
2892 qemu_coroutine_yield();
2897 * Some block drivers always operate in either writethrough or unsafe
2898 * mode and don't support bdrv_flush therefore. Usually qemu doesn't
2899 * know how the server works (because the behaviour is hardcoded or
2900 * depends on server-side configuration), so we can't ensure that
2901 * everything is safe on disk. Returning an error doesn't work because
2902 * that would break guests even if the server operates in writethrough
2905 * Let's hope the user knows what he's doing.
2911 int bdrv_flush(BlockDriverState
*bs
)
2919 if (qemu_in_coroutine()) {
2920 /* Fast-path if already in coroutine context */
2921 bdrv_flush_co_entry(&rwco
);
2923 co
= qemu_coroutine_create(bdrv_flush_co_entry
);
2924 qemu_coroutine_enter(co
, &rwco
);
2925 while (rwco
.ret
== NOT_DONE
) {
2933 static void coroutine_fn
bdrv_discard_co_entry(void *opaque
)
2935 RwCo
*rwco
= opaque
;
2937 rwco
->ret
= bdrv_co_discard(rwco
->bs
, rwco
->sector_num
, rwco
->nb_sectors
);
2940 int coroutine_fn
bdrv_co_discard(BlockDriverState
*bs
, int64_t sector_num
,
2945 } else if (bdrv_check_request(bs
, sector_num
, nb_sectors
)) {
2947 } else if (bs
->read_only
) {
2949 } else if (bs
->drv
->bdrv_co_discard
) {
2950 return bs
->drv
->bdrv_co_discard(bs
, sector_num
, nb_sectors
);
2951 } else if (bs
->drv
->bdrv_aio_discard
) {
2952 BlockDriverAIOCB
*acb
;
2953 CoroutineIOCompletion co
= {
2954 .coroutine
= qemu_coroutine_self(),
2957 acb
= bs
->drv
->bdrv_aio_discard(bs
, sector_num
, nb_sectors
,
2958 bdrv_co_io_em_complete
, &co
);
2962 qemu_coroutine_yield();
2970 int bdrv_discard(BlockDriverState
*bs
, int64_t sector_num
, int nb_sectors
)
2975 .sector_num
= sector_num
,
2976 .nb_sectors
= nb_sectors
,
2980 if (qemu_in_coroutine()) {
2981 /* Fast-path if already in coroutine context */
2982 bdrv_discard_co_entry(&rwco
);
2984 co
= qemu_coroutine_create(bdrv_discard_co_entry
);
2985 qemu_coroutine_enter(co
, &rwco
);
2986 while (rwco
.ret
== NOT_DONE
) {
2994 /**************************************************************/
2995 /* removable device support */
2998 * Return TRUE if the media is present
3000 int bdrv_is_inserted(BlockDriverState
*bs
)
3002 BlockDriver
*drv
= bs
->drv
;
3006 if (!drv
->bdrv_is_inserted
)
3008 return drv
->bdrv_is_inserted(bs
);
3012 * Return whether the media changed since the last call to this
3013 * function, or -ENOTSUP if we don't know. Most drivers don't know.
3015 int bdrv_media_changed(BlockDriverState
*bs
)
3017 BlockDriver
*drv
= bs
->drv
;
3019 if (drv
&& drv
->bdrv_media_changed
) {
3020 return drv
->bdrv_media_changed(bs
);
3026 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3028 void bdrv_eject(BlockDriverState
*bs
, int eject_flag
)
3030 BlockDriver
*drv
= bs
->drv
;
3032 if (drv
&& drv
->bdrv_eject
) {
3033 drv
->bdrv_eject(bs
, eject_flag
);
3038 * Lock or unlock the media (if it is locked, the user won't be able
3039 * to eject it manually).
3041 void bdrv_lock_medium(BlockDriverState
*bs
, bool locked
)
3043 BlockDriver
*drv
= bs
->drv
;
3045 trace_bdrv_lock_medium(bs
, locked
);
3047 if (drv
&& drv
->bdrv_lock_medium
) {
3048 drv
->bdrv_lock_medium(bs
, locked
);
3052 /* needed for generic scsi interface */
3054 int bdrv_ioctl(BlockDriverState
*bs
, unsigned long int req
, void *buf
)
3056 BlockDriver
*drv
= bs
->drv
;
3058 if (drv
&& drv
->bdrv_ioctl
)
3059 return drv
->bdrv_ioctl(bs
, req
, buf
);
3063 BlockDriverAIOCB
*bdrv_aio_ioctl(BlockDriverState
*bs
,
3064 unsigned long int req
, void *buf
,
3065 BlockDriverCompletionFunc
*cb
, void *opaque
)
3067 BlockDriver
*drv
= bs
->drv
;
3069 if (drv
&& drv
->bdrv_aio_ioctl
)
3070 return drv
->bdrv_aio_ioctl(bs
, req
, buf
, cb
, opaque
);
3074 void bdrv_set_buffer_alignment(BlockDriverState
*bs
, int align
)
3076 bs
->buffer_alignment
= align
;
3079 void *qemu_blockalign(BlockDriverState
*bs
, size_t size
)
3081 return qemu_memalign((bs
&& bs
->buffer_alignment
) ? bs
->buffer_alignment
: 512, size
);
3084 void bdrv_set_dirty_tracking(BlockDriverState
*bs
, int enable
)
3086 int64_t bitmap_size
;
3088 bs
->dirty_count
= 0;
3090 if (!bs
->dirty_bitmap
) {
3091 bitmap_size
= (bdrv_getlength(bs
) >> BDRV_SECTOR_BITS
) +
3092 BDRV_SECTORS_PER_DIRTY_CHUNK
* 8 - 1;
3093 bitmap_size
/= BDRV_SECTORS_PER_DIRTY_CHUNK
* 8;
3095 bs
->dirty_bitmap
= g_malloc0(bitmap_size
);
3098 if (bs
->dirty_bitmap
) {
3099 g_free(bs
->dirty_bitmap
);
3100 bs
->dirty_bitmap
= NULL
;
3105 int bdrv_get_dirty(BlockDriverState
*bs
, int64_t sector
)
3107 int64_t chunk
= sector
/ (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK
;
3109 if (bs
->dirty_bitmap
&&
3110 (sector
<< BDRV_SECTOR_BITS
) < bdrv_getlength(bs
)) {
3111 return !!(bs
->dirty_bitmap
[chunk
/ (sizeof(unsigned long) * 8)] &
3112 (1UL << (chunk
% (sizeof(unsigned long) * 8))));
3118 void bdrv_reset_dirty(BlockDriverState
*bs
, int64_t cur_sector
,
3121 set_dirty_bitmap(bs
, cur_sector
, nr_sectors
, 0);
3124 int64_t bdrv_get_dirty_count(BlockDriverState
*bs
)
3126 return bs
->dirty_count
;
3129 void bdrv_set_in_use(BlockDriverState
*bs
, int in_use
)
3131 assert(bs
->in_use
!= in_use
);
3132 bs
->in_use
= in_use
;
3135 int bdrv_in_use(BlockDriverState
*bs
)
3140 void bdrv_iostatus_enable(BlockDriverState
*bs
)
3142 bs
->iostatus
= BDRV_IOS_OK
;
3145 /* The I/O status is only enabled if the drive explicitly
3146 * enables it _and_ the VM is configured to stop on errors */
3147 bool bdrv_iostatus_is_enabled(const BlockDriverState
*bs
)
3149 return (bs
->iostatus
!= BDRV_IOS_INVAL
&&
3150 (bs
->on_write_error
== BLOCK_ERR_STOP_ENOSPC
||
3151 bs
->on_write_error
== BLOCK_ERR_STOP_ANY
||
3152 bs
->on_read_error
== BLOCK_ERR_STOP_ANY
));
3155 void bdrv_iostatus_disable(BlockDriverState
*bs
)
3157 bs
->iostatus
= BDRV_IOS_INVAL
;
3160 void bdrv_iostatus_reset(BlockDriverState
*bs
)
3162 if (bdrv_iostatus_is_enabled(bs
)) {
3163 bs
->iostatus
= BDRV_IOS_OK
;
3167 /* XXX: Today this is set by device models because it makes the implementation
3168 quite simple. However, the block layer knows about the error, so it's
3169 possible to implement this without device models being involved */
3170 void bdrv_iostatus_set_err(BlockDriverState
*bs
, int error
)
3172 if (bdrv_iostatus_is_enabled(bs
) && bs
->iostatus
== BDRV_IOS_OK
) {
3174 bs
->iostatus
= error
== ENOSPC
? BDRV_IOS_ENOSPC
: BDRV_IOS_FAILED
;
3179 bdrv_acct_start(BlockDriverState
*bs
, BlockAcctCookie
*cookie
, int64_t bytes
,
3180 enum BlockAcctType type
)
3182 assert(type
< BDRV_MAX_IOTYPE
);
3184 cookie
->bytes
= bytes
;
3185 cookie
->start_time_ns
= get_clock();
3186 cookie
->type
= type
;
3190 bdrv_acct_done(BlockDriverState
*bs
, BlockAcctCookie
*cookie
)
3192 assert(cookie
->type
< BDRV_MAX_IOTYPE
);
3194 bs
->nr_bytes
[cookie
->type
] += cookie
->bytes
;
3195 bs
->nr_ops
[cookie
->type
]++;
3196 bs
->total_time_ns
[cookie
->type
] += get_clock() - cookie
->start_time_ns
;
3199 int bdrv_img_create(const char *filename
, const char *fmt
,
3200 const char *base_filename
, const char *base_fmt
,
3201 char *options
, uint64_t img_size
, int flags
)
3203 QEMUOptionParameter
*param
= NULL
, *create_options
= NULL
;
3204 QEMUOptionParameter
*backing_fmt
, *backing_file
, *size
;
3205 BlockDriverState
*bs
= NULL
;
3206 BlockDriver
*drv
, *proto_drv
;
3207 BlockDriver
*backing_drv
= NULL
;
3210 /* Find driver and parse its options */
3211 drv
= bdrv_find_format(fmt
);
3213 error_report("Unknown file format '%s'", fmt
);
3218 proto_drv
= bdrv_find_protocol(filename
);
3220 error_report("Unknown protocol '%s'", filename
);
3225 create_options
= append_option_parameters(create_options
,
3226 drv
->create_options
);
3227 create_options
= append_option_parameters(create_options
,
3228 proto_drv
->create_options
);
3230 /* Create parameter list with default values */
3231 param
= parse_option_parameters("", create_options
, param
);
3233 set_option_parameter_int(param
, BLOCK_OPT_SIZE
, img_size
);
3235 /* Parse -o options */
3237 param
= parse_option_parameters(options
, create_options
, param
);
3238 if (param
== NULL
) {
3239 error_report("Invalid options for file format '%s'.", fmt
);
3245 if (base_filename
) {
3246 if (set_option_parameter(param
, BLOCK_OPT_BACKING_FILE
,
3248 error_report("Backing file not supported for file format '%s'",
3256 if (set_option_parameter(param
, BLOCK_OPT_BACKING_FMT
, base_fmt
)) {
3257 error_report("Backing file format not supported for file "
3258 "format '%s'", fmt
);
3264 backing_file
= get_option_parameter(param
, BLOCK_OPT_BACKING_FILE
);
3265 if (backing_file
&& backing_file
->value
.s
) {
3266 if (!strcmp(filename
, backing_file
->value
.s
)) {
3267 error_report("Error: Trying to create an image with the "
3268 "same filename as the backing file");
3274 backing_fmt
= get_option_parameter(param
, BLOCK_OPT_BACKING_FMT
);
3275 if (backing_fmt
&& backing_fmt
->value
.s
) {
3276 backing_drv
= bdrv_find_format(backing_fmt
->value
.s
);
3278 error_report("Unknown backing file format '%s'",
3279 backing_fmt
->value
.s
);
3285 // The size for the image must always be specified, with one exception:
3286 // If we are using a backing file, we can obtain the size from there
3287 size
= get_option_parameter(param
, BLOCK_OPT_SIZE
);
3288 if (size
&& size
->value
.n
== -1) {
3289 if (backing_file
&& backing_file
->value
.s
) {
3295 ret
= bdrv_open(bs
, backing_file
->value
.s
, flags
, backing_drv
);
3297 error_report("Could not open '%s'", backing_file
->value
.s
);
3300 bdrv_get_geometry(bs
, &size
);
3303 snprintf(buf
, sizeof(buf
), "%" PRId64
, size
);
3304 set_option_parameter(param
, BLOCK_OPT_SIZE
, buf
);
3306 error_report("Image creation needs a size parameter");
3312 printf("Formatting '%s', fmt=%s ", filename
, fmt
);
3313 print_option_parameters(param
);
3316 ret
= bdrv_create(drv
, filename
, param
);
3319 if (ret
== -ENOTSUP
) {
3320 error_report("Formatting or formatting option not supported for "
3321 "file format '%s'", fmt
);
3322 } else if (ret
== -EFBIG
) {
3323 error_report("The image size is too large for file format '%s'",
3326 error_report("%s: error while creating %s: %s", filename
, fmt
,
3332 free_option_parameters(create_options
);
3333 free_option_parameters(param
);