2 * QEMU System Emulator block driver
4 * Copyright (c) 2003 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 #include "config-host.h"
25 #include "qemu-common.h"
28 #include "block_int.h"
31 #include "qemu-coroutine.h"
32 #include "qmp-commands.h"
35 #include <sys/types.h>
37 #include <sys/ioctl.h>
38 #include <sys/queue.h>
48 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
50 static void bdrv_dev_change_media_cb(BlockDriverState
*bs
, bool load
);
51 static BlockDriverAIOCB
*bdrv_aio_readv_em(BlockDriverState
*bs
,
52 int64_t sector_num
, QEMUIOVector
*qiov
, int nb_sectors
,
53 BlockDriverCompletionFunc
*cb
, void *opaque
);
54 static BlockDriverAIOCB
*bdrv_aio_writev_em(BlockDriverState
*bs
,
55 int64_t sector_num
, QEMUIOVector
*qiov
, int nb_sectors
,
56 BlockDriverCompletionFunc
*cb
, void *opaque
);
57 static int coroutine_fn
bdrv_co_readv_em(BlockDriverState
*bs
,
58 int64_t sector_num
, int nb_sectors
,
60 static int coroutine_fn
bdrv_co_writev_em(BlockDriverState
*bs
,
61 int64_t sector_num
, int nb_sectors
,
63 static int coroutine_fn
bdrv_co_do_readv(BlockDriverState
*bs
,
64 int64_t sector_num
, int nb_sectors
, QEMUIOVector
*qiov
);
65 static int coroutine_fn
bdrv_co_do_writev(BlockDriverState
*bs
,
66 int64_t sector_num
, int nb_sectors
, QEMUIOVector
*qiov
);
67 static BlockDriverAIOCB
*bdrv_co_aio_rw_vector(BlockDriverState
*bs
,
71 BlockDriverCompletionFunc
*cb
,
74 static void coroutine_fn
bdrv_co_do_rw(void *opaque
);
76 static QTAILQ_HEAD(, BlockDriverState
) bdrv_states
=
77 QTAILQ_HEAD_INITIALIZER(bdrv_states
);
79 static QLIST_HEAD(, BlockDriver
) bdrv_drivers
=
80 QLIST_HEAD_INITIALIZER(bdrv_drivers
);
82 /* The device to use for VM snapshots */
83 static BlockDriverState
*bs_snapshots
;
85 /* If non-zero, use only whitelisted block drivers */
86 static int use_bdrv_whitelist
;
89 static int is_windows_drive_prefix(const char *filename
)
91 return (((filename
[0] >= 'a' && filename
[0] <= 'z') ||
92 (filename
[0] >= 'A' && filename
[0] <= 'Z')) &&
96 int is_windows_drive(const char *filename
)
98 if (is_windows_drive_prefix(filename
) &&
101 if (strstart(filename
, "\\\\.\\", NULL
) ||
102 strstart(filename
, "//./", NULL
))
108 /* check if the path starts with "<protocol>:" */
109 static int path_has_protocol(const char *path
)
112 if (is_windows_drive(path
) ||
113 is_windows_drive_prefix(path
)) {
118 return strchr(path
, ':') != NULL
;
121 int path_is_absolute(const char *path
)
125 /* specific case for names like: "\\.\d:" */
126 if (*path
== '/' || *path
== '\\')
129 p
= strchr(path
, ':');
135 return (*p
== '/' || *p
== '\\');
141 /* if filename is absolute, just copy it to dest. Otherwise, build a
142 path to it by considering it is relative to base_path. URL are
144 void path_combine(char *dest
, int dest_size
,
145 const char *base_path
,
146 const char *filename
)
153 if (path_is_absolute(filename
)) {
154 pstrcpy(dest
, dest_size
, filename
);
156 p
= strchr(base_path
, ':');
161 p1
= strrchr(base_path
, '/');
165 p2
= strrchr(base_path
, '\\');
177 if (len
> dest_size
- 1)
179 memcpy(dest
, base_path
, len
);
181 pstrcat(dest
, dest_size
, filename
);
185 void bdrv_register(BlockDriver
*bdrv
)
187 /* Block drivers without coroutine functions need emulation */
188 if (!bdrv
->bdrv_co_readv
) {
189 bdrv
->bdrv_co_readv
= bdrv_co_readv_em
;
190 bdrv
->bdrv_co_writev
= bdrv_co_writev_em
;
192 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
193 * the block driver lacks aio we need to emulate that too.
195 if (!bdrv
->bdrv_aio_readv
) {
196 /* add AIO emulation layer */
197 bdrv
->bdrv_aio_readv
= bdrv_aio_readv_em
;
198 bdrv
->bdrv_aio_writev
= bdrv_aio_writev_em
;
202 QLIST_INSERT_HEAD(&bdrv_drivers
, bdrv
, list
);
205 /* create a new block device (by default it is empty) */
206 BlockDriverState
*bdrv_new(const char *device_name
)
208 BlockDriverState
*bs
;
210 bs
= g_malloc0(sizeof(BlockDriverState
));
211 pstrcpy(bs
->device_name
, sizeof(bs
->device_name
), device_name
);
212 if (device_name
[0] != '\0') {
213 QTAILQ_INSERT_TAIL(&bdrv_states
, bs
, list
);
215 bdrv_iostatus_disable(bs
);
219 BlockDriver
*bdrv_find_format(const char *format_name
)
222 QLIST_FOREACH(drv1
, &bdrv_drivers
, list
) {
223 if (!strcmp(drv1
->format_name
, format_name
)) {
230 static int bdrv_is_whitelisted(BlockDriver
*drv
)
232 static const char *whitelist
[] = {
233 CONFIG_BDRV_WHITELIST
238 return 1; /* no whitelist, anything goes */
240 for (p
= whitelist
; *p
; p
++) {
241 if (!strcmp(drv
->format_name
, *p
)) {
248 BlockDriver
*bdrv_find_whitelisted_format(const char *format_name
)
250 BlockDriver
*drv
= bdrv_find_format(format_name
);
251 return drv
&& bdrv_is_whitelisted(drv
) ? drv
: NULL
;
254 int bdrv_create(BlockDriver
*drv
, const char* filename
,
255 QEMUOptionParameter
*options
)
257 if (!drv
->bdrv_create
)
260 return drv
->bdrv_create(filename
, options
);
263 int bdrv_create_file(const char* filename
, QEMUOptionParameter
*options
)
267 drv
= bdrv_find_protocol(filename
);
272 return bdrv_create(drv
, filename
, options
);
276 void get_tmp_filename(char *filename
, int size
)
278 char temp_dir
[MAX_PATH
];
280 GetTempPath(MAX_PATH
, temp_dir
);
281 GetTempFileName(temp_dir
, "qem", 0, filename
);
284 void get_tmp_filename(char *filename
, int size
)
288 /* XXX: race condition possible */
289 tmpdir
= getenv("TMPDIR");
292 snprintf(filename
, size
, "%s/vl.XXXXXX", tmpdir
);
293 fd
= mkstemp(filename
);
299 * Detect host devices. By convention, /dev/cdrom[N] is always
300 * recognized as a host CDROM.
302 static BlockDriver
*find_hdev_driver(const char *filename
)
304 int score_max
= 0, score
;
305 BlockDriver
*drv
= NULL
, *d
;
307 QLIST_FOREACH(d
, &bdrv_drivers
, list
) {
308 if (d
->bdrv_probe_device
) {
309 score
= d
->bdrv_probe_device(filename
);
310 if (score
> score_max
) {
320 BlockDriver
*bdrv_find_protocol(const char *filename
)
327 /* TODO Drivers without bdrv_file_open must be specified explicitly */
330 * XXX(hch): we really should not let host device detection
331 * override an explicit protocol specification, but moving this
332 * later breaks access to device names with colons in them.
333 * Thanks to the brain-dead persistent naming schemes on udev-
334 * based Linux systems those actually are quite common.
336 drv1
= find_hdev_driver(filename
);
341 if (!path_has_protocol(filename
)) {
342 return bdrv_find_format("file");
344 p
= strchr(filename
, ':');
347 if (len
> sizeof(protocol
) - 1)
348 len
= sizeof(protocol
) - 1;
349 memcpy(protocol
, filename
, len
);
350 protocol
[len
] = '\0';
351 QLIST_FOREACH(drv1
, &bdrv_drivers
, list
) {
352 if (drv1
->protocol_name
&&
353 !strcmp(drv1
->protocol_name
, protocol
)) {
360 static int find_image_format(const char *filename
, BlockDriver
**pdrv
)
362 int ret
, score
, score_max
;
363 BlockDriver
*drv1
, *drv
;
365 BlockDriverState
*bs
;
367 ret
= bdrv_file_open(&bs
, filename
, 0);
373 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
374 if (bs
->sg
|| !bdrv_is_inserted(bs
)) {
376 drv
= bdrv_find_format("raw");
384 ret
= bdrv_pread(bs
, 0, buf
, sizeof(buf
));
393 QLIST_FOREACH(drv1
, &bdrv_drivers
, list
) {
394 if (drv1
->bdrv_probe
) {
395 score
= drv1
->bdrv_probe(buf
, ret
, filename
);
396 if (score
> score_max
) {
410 * Set the current 'total_sectors' value
412 static int refresh_total_sectors(BlockDriverState
*bs
, int64_t hint
)
414 BlockDriver
*drv
= bs
->drv
;
416 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
420 /* query actual device if possible, otherwise just trust the hint */
421 if (drv
->bdrv_getlength
) {
422 int64_t length
= drv
->bdrv_getlength(bs
);
426 hint
= length
>> BDRV_SECTOR_BITS
;
429 bs
->total_sectors
= hint
;
434 * Set open flags for a given cache mode
436 * Return 0 on success, -1 if the cache mode was invalid.
438 int bdrv_parse_cache_flags(const char *mode
, int *flags
)
440 *flags
&= ~BDRV_O_CACHE_MASK
;
442 if (!strcmp(mode
, "off") || !strcmp(mode
, "none")) {
443 *flags
|= BDRV_O_NOCACHE
| BDRV_O_CACHE_WB
;
444 } else if (!strcmp(mode
, "directsync")) {
445 *flags
|= BDRV_O_NOCACHE
;
446 } else if (!strcmp(mode
, "writeback")) {
447 *flags
|= BDRV_O_CACHE_WB
;
448 } else if (!strcmp(mode
, "unsafe")) {
449 *flags
|= BDRV_O_CACHE_WB
;
450 *flags
|= BDRV_O_NO_FLUSH
;
451 } else if (!strcmp(mode
, "writethrough")) {
452 /* this is the default */
461 * Common part for opening disk images and files
463 static int bdrv_open_common(BlockDriverState
*bs
, const char *filename
,
464 int flags
, BlockDriver
*drv
)
470 trace_bdrv_open_common(bs
, filename
, flags
, drv
->format_name
);
473 bs
->total_sectors
= 0;
477 bs
->open_flags
= flags
;
479 bs
->buffer_alignment
= 512;
481 pstrcpy(bs
->filename
, sizeof(bs
->filename
), filename
);
482 bs
->backing_file
[0] = '\0';
484 if (use_bdrv_whitelist
&& !bdrv_is_whitelisted(drv
)) {
489 bs
->opaque
= g_malloc0(drv
->instance_size
);
491 bs
->enable_write_cache
= !!(flags
& BDRV_O_CACHE_WB
);
494 * Clear flags that are internal to the block layer before opening the
497 open_flags
= flags
& ~(BDRV_O_SNAPSHOT
| BDRV_O_NO_BACKING
);
500 * Snapshots should be writable.
502 if (bs
->is_temporary
) {
503 open_flags
|= BDRV_O_RDWR
;
506 bs
->keep_read_only
= bs
->read_only
= !(open_flags
& BDRV_O_RDWR
);
508 /* Open the image, either directly or using a protocol */
509 if (drv
->bdrv_file_open
) {
510 ret
= drv
->bdrv_file_open(bs
, filename
, open_flags
);
512 ret
= bdrv_file_open(&bs
->file
, filename
, open_flags
);
514 ret
= drv
->bdrv_open(bs
, open_flags
);
522 ret
= refresh_total_sectors(bs
, bs
->total_sectors
);
528 if (bs
->is_temporary
) {
536 bdrv_delete(bs
->file
);
546 * Opens a file using a protocol (file, host_device, nbd, ...)
548 int bdrv_file_open(BlockDriverState
**pbs
, const char *filename
, int flags
)
550 BlockDriverState
*bs
;
554 drv
= bdrv_find_protocol(filename
);
560 ret
= bdrv_open_common(bs
, filename
, flags
, drv
);
571 * Opens a disk image (raw, qcow2, vmdk, ...)
573 int bdrv_open(BlockDriverState
*bs
, const char *filename
, int flags
,
577 char tmp_filename
[PATH_MAX
];
579 if (flags
& BDRV_O_SNAPSHOT
) {
580 BlockDriverState
*bs1
;
583 BlockDriver
*bdrv_qcow2
;
584 QEMUOptionParameter
*options
;
585 char backing_filename
[PATH_MAX
];
587 /* if snapshot, we create a temporary backing file and open it
588 instead of opening 'filename' directly */
590 /* if there is a backing file, use it */
592 ret
= bdrv_open(bs1
, filename
, 0, drv
);
597 total_size
= bdrv_getlength(bs1
) & BDRV_SECTOR_MASK
;
599 if (bs1
->drv
&& bs1
->drv
->protocol_name
)
604 get_tmp_filename(tmp_filename
, sizeof(tmp_filename
));
606 /* Real path is meaningless for protocols */
608 snprintf(backing_filename
, sizeof(backing_filename
),
610 else if (!realpath(filename
, backing_filename
))
613 bdrv_qcow2
= bdrv_find_format("qcow2");
614 options
= parse_option_parameters("", bdrv_qcow2
->create_options
, NULL
);
616 set_option_parameter_int(options
, BLOCK_OPT_SIZE
, total_size
);
617 set_option_parameter(options
, BLOCK_OPT_BACKING_FILE
, backing_filename
);
619 set_option_parameter(options
, BLOCK_OPT_BACKING_FMT
,
623 ret
= bdrv_create(bdrv_qcow2
, tmp_filename
, options
);
624 free_option_parameters(options
);
629 filename
= tmp_filename
;
631 bs
->is_temporary
= 1;
634 /* Find the right image format driver */
636 ret
= find_image_format(filename
, &drv
);
640 goto unlink_and_fail
;
644 ret
= bdrv_open_common(bs
, filename
, flags
, drv
);
646 goto unlink_and_fail
;
649 /* If there is a backing file, use it */
650 if ((flags
& BDRV_O_NO_BACKING
) == 0 && bs
->backing_file
[0] != '\0') {
651 char backing_filename
[PATH_MAX
];
653 BlockDriver
*back_drv
= NULL
;
655 bs
->backing_hd
= bdrv_new("");
657 if (path_has_protocol(bs
->backing_file
)) {
658 pstrcpy(backing_filename
, sizeof(backing_filename
),
661 path_combine(backing_filename
, sizeof(backing_filename
),
662 filename
, bs
->backing_file
);
665 if (bs
->backing_format
[0] != '\0') {
666 back_drv
= bdrv_find_format(bs
->backing_format
);
669 /* backing files always opened read-only */
671 flags
& ~(BDRV_O_RDWR
| BDRV_O_SNAPSHOT
| BDRV_O_NO_BACKING
);
673 ret
= bdrv_open(bs
->backing_hd
, backing_filename
, back_flags
, back_drv
);
678 if (bs
->is_temporary
) {
679 bs
->backing_hd
->keep_read_only
= !(flags
& BDRV_O_RDWR
);
681 /* base image inherits from "parent" */
682 bs
->backing_hd
->keep_read_only
= bs
->keep_read_only
;
686 if (!bdrv_key_required(bs
)) {
687 bdrv_dev_change_media_cb(bs
, true);
693 if (bs
->is_temporary
) {
699 void bdrv_close(BlockDriverState
*bs
)
702 if (bs
== bs_snapshots
) {
705 if (bs
->backing_hd
) {
706 bdrv_delete(bs
->backing_hd
);
707 bs
->backing_hd
= NULL
;
709 bs
->drv
->bdrv_close(bs
);
712 if (bs
->is_temporary
) {
713 unlink(bs
->filename
);
719 if (bs
->file
!= NULL
) {
720 bdrv_close(bs
->file
);
723 bdrv_dev_change_media_cb(bs
, false);
727 void bdrv_close_all(void)
729 BlockDriverState
*bs
;
731 QTAILQ_FOREACH(bs
, &bdrv_states
, list
) {
736 /* make a BlockDriverState anonymous by removing from bdrv_state list.
737 Also, NULL terminate the device_name to prevent double remove */
738 void bdrv_make_anon(BlockDriverState
*bs
)
740 if (bs
->device_name
[0] != '\0') {
741 QTAILQ_REMOVE(&bdrv_states
, bs
, list
);
743 bs
->device_name
[0] = '\0';
746 void bdrv_delete(BlockDriverState
*bs
)
750 /* remove from list, if necessary */
754 if (bs
->file
!= NULL
) {
755 bdrv_delete(bs
->file
);
758 assert(bs
!= bs_snapshots
);
762 int bdrv_attach_dev(BlockDriverState
*bs
, void *dev
)
763 /* TODO change to DeviceState *dev when all users are qdevified */
769 bdrv_iostatus_reset(bs
);
773 /* TODO qdevified devices don't use this, remove when devices are qdevified */
774 void bdrv_attach_dev_nofail(BlockDriverState
*bs
, void *dev
)
776 if (bdrv_attach_dev(bs
, dev
) < 0) {
781 void bdrv_detach_dev(BlockDriverState
*bs
, void *dev
)
782 /* TODO change to DeviceState *dev when all users are qdevified */
784 assert(bs
->dev
== dev
);
787 bs
->dev_opaque
= NULL
;
788 bs
->buffer_alignment
= 512;
791 /* TODO change to return DeviceState * when all users are qdevified */
792 void *bdrv_get_attached_dev(BlockDriverState
*bs
)
797 void bdrv_set_dev_ops(BlockDriverState
*bs
, const BlockDevOps
*ops
,
801 bs
->dev_opaque
= opaque
;
802 if (bdrv_dev_has_removable_media(bs
) && bs
== bs_snapshots
) {
807 static void bdrv_dev_change_media_cb(BlockDriverState
*bs
, bool load
)
809 if (bs
->dev_ops
&& bs
->dev_ops
->change_media_cb
) {
810 bs
->dev_ops
->change_media_cb(bs
->dev_opaque
, load
);
814 bool bdrv_dev_has_removable_media(BlockDriverState
*bs
)
816 return !bs
->dev
|| (bs
->dev_ops
&& bs
->dev_ops
->change_media_cb
);
819 bool bdrv_dev_is_tray_open(BlockDriverState
*bs
)
821 if (bs
->dev_ops
&& bs
->dev_ops
->is_tray_open
) {
822 return bs
->dev_ops
->is_tray_open(bs
->dev_opaque
);
827 static void bdrv_dev_resize_cb(BlockDriverState
*bs
)
829 if (bs
->dev_ops
&& bs
->dev_ops
->resize_cb
) {
830 bs
->dev_ops
->resize_cb(bs
->dev_opaque
);
834 bool bdrv_dev_is_medium_locked(BlockDriverState
*bs
)
836 if (bs
->dev_ops
&& bs
->dev_ops
->is_medium_locked
) {
837 return bs
->dev_ops
->is_medium_locked(bs
->dev_opaque
);
843 * Run consistency checks on an image
845 * Returns 0 if the check could be completed (it doesn't mean that the image is
846 * free of errors) or -errno when an internal error occurred. The results of the
847 * check are stored in res.
849 int bdrv_check(BlockDriverState
*bs
, BdrvCheckResult
*res
)
851 if (bs
->drv
->bdrv_check
== NULL
) {
855 memset(res
, 0, sizeof(*res
));
856 return bs
->drv
->bdrv_check(bs
, res
);
859 #define COMMIT_BUF_SECTORS 2048
861 /* commit COW file into the raw image */
862 int bdrv_commit(BlockDriverState
*bs
)
864 BlockDriver
*drv
= bs
->drv
;
865 BlockDriver
*backing_drv
;
866 int64_t sector
, total_sectors
;
867 int n
, ro
, open_flags
;
868 int ret
= 0, rw_ret
= 0;
871 BlockDriverState
*bs_rw
, *bs_ro
;
876 if (!bs
->backing_hd
) {
880 if (bs
->backing_hd
->keep_read_only
) {
884 backing_drv
= bs
->backing_hd
->drv
;
885 ro
= bs
->backing_hd
->read_only
;
886 strncpy(filename
, bs
->backing_hd
->filename
, sizeof(filename
));
887 open_flags
= bs
->backing_hd
->open_flags
;
891 bdrv_delete(bs
->backing_hd
);
892 bs
->backing_hd
= NULL
;
893 bs_rw
= bdrv_new("");
894 rw_ret
= bdrv_open(bs_rw
, filename
, open_flags
| BDRV_O_RDWR
,
898 /* try to re-open read-only */
899 bs_ro
= bdrv_new("");
900 ret
= bdrv_open(bs_ro
, filename
, open_flags
& ~BDRV_O_RDWR
,
904 /* drive not functional anymore */
908 bs
->backing_hd
= bs_ro
;
911 bs
->backing_hd
= bs_rw
;
914 total_sectors
= bdrv_getlength(bs
) >> BDRV_SECTOR_BITS
;
915 buf
= g_malloc(COMMIT_BUF_SECTORS
* BDRV_SECTOR_SIZE
);
917 for (sector
= 0; sector
< total_sectors
; sector
+= n
) {
918 if (drv
->bdrv_is_allocated(bs
, sector
, COMMIT_BUF_SECTORS
, &n
)) {
920 if (bdrv_read(bs
, sector
, buf
, n
) != 0) {
925 if (bdrv_write(bs
->backing_hd
, sector
, buf
, n
) != 0) {
932 if (drv
->bdrv_make_empty
) {
933 ret
= drv
->bdrv_make_empty(bs
);
938 * Make sure all data we wrote to the backing device is actually
942 bdrv_flush(bs
->backing_hd
);
949 bdrv_delete(bs
->backing_hd
);
950 bs
->backing_hd
= NULL
;
951 bs_ro
= bdrv_new("");
952 ret
= bdrv_open(bs_ro
, filename
, open_flags
& ~BDRV_O_RDWR
,
956 /* drive not functional anymore */
960 bs
->backing_hd
= bs_ro
;
961 bs
->backing_hd
->keep_read_only
= 0;
967 void bdrv_commit_all(void)
969 BlockDriverState
*bs
;
971 QTAILQ_FOREACH(bs
, &bdrv_states
, list
) {
979 * -EINVAL - backing format specified, but no file
980 * -ENOSPC - can't update the backing file because no space is left in the
982 * -ENOTSUP - format driver doesn't support changing the backing file
984 int bdrv_change_backing_file(BlockDriverState
*bs
,
985 const char *backing_file
, const char *backing_fmt
)
987 BlockDriver
*drv
= bs
->drv
;
989 if (drv
->bdrv_change_backing_file
!= NULL
) {
990 return drv
->bdrv_change_backing_file(bs
, backing_file
, backing_fmt
);
996 static int bdrv_check_byte_request(BlockDriverState
*bs
, int64_t offset
,
1001 if (!bdrv_is_inserted(bs
))
1007 len
= bdrv_getlength(bs
);
1012 if ((offset
> len
) || (len
- offset
< size
))
1018 static int bdrv_check_request(BlockDriverState
*bs
, int64_t sector_num
,
1021 return bdrv_check_byte_request(bs
, sector_num
* BDRV_SECTOR_SIZE
,
1022 nb_sectors
* BDRV_SECTOR_SIZE
);
1025 typedef struct RwCo
{
1026 BlockDriverState
*bs
;
1034 static void coroutine_fn
bdrv_rw_co_entry(void *opaque
)
1036 RwCo
*rwco
= opaque
;
1038 if (!rwco
->is_write
) {
1039 rwco
->ret
= bdrv_co_do_readv(rwco
->bs
, rwco
->sector_num
,
1040 rwco
->nb_sectors
, rwco
->qiov
);
1042 rwco
->ret
= bdrv_co_do_writev(rwco
->bs
, rwco
->sector_num
,
1043 rwco
->nb_sectors
, rwco
->qiov
);
1048 * Process a synchronous request using coroutines
1050 static int bdrv_rw_co(BlockDriverState
*bs
, int64_t sector_num
, uint8_t *buf
,
1051 int nb_sectors
, bool is_write
)
1054 struct iovec iov
= {
1055 .iov_base
= (void *)buf
,
1056 .iov_len
= nb_sectors
* BDRV_SECTOR_SIZE
,
1061 .sector_num
= sector_num
,
1062 .nb_sectors
= nb_sectors
,
1064 .is_write
= is_write
,
1068 qemu_iovec_init_external(&qiov
, &iov
, 1);
1070 if (qemu_in_coroutine()) {
1071 /* Fast-path if already in coroutine context */
1072 bdrv_rw_co_entry(&rwco
);
1074 co
= qemu_coroutine_create(bdrv_rw_co_entry
);
1075 qemu_coroutine_enter(co
, &rwco
);
1076 while (rwco
.ret
== NOT_DONE
) {
1083 /* return < 0 if error. See bdrv_write() for the return codes */
1084 int bdrv_read(BlockDriverState
*bs
, int64_t sector_num
,
1085 uint8_t *buf
, int nb_sectors
)
1087 return bdrv_rw_co(bs
, sector_num
, buf
, nb_sectors
, false);
1090 static void set_dirty_bitmap(BlockDriverState
*bs
, int64_t sector_num
,
1091 int nb_sectors
, int dirty
)
1094 unsigned long val
, idx
, bit
;
1096 start
= sector_num
/ BDRV_SECTORS_PER_DIRTY_CHUNK
;
1097 end
= (sector_num
+ nb_sectors
- 1) / BDRV_SECTORS_PER_DIRTY_CHUNK
;
1099 for (; start
<= end
; start
++) {
1100 idx
= start
/ (sizeof(unsigned long) * 8);
1101 bit
= start
% (sizeof(unsigned long) * 8);
1102 val
= bs
->dirty_bitmap
[idx
];
1104 if (!(val
& (1UL << bit
))) {
1109 if (val
& (1UL << bit
)) {
1111 val
&= ~(1UL << bit
);
1114 bs
->dirty_bitmap
[idx
] = val
;
1118 /* Return < 0 if error. Important errors are:
1119 -EIO generic I/O error (may happen for all errors)
1120 -ENOMEDIUM No media inserted.
1121 -EINVAL Invalid sector number or nb_sectors
1122 -EACCES Trying to write a read-only device
1124 int bdrv_write(BlockDriverState
*bs
, int64_t sector_num
,
1125 const uint8_t *buf
, int nb_sectors
)
1127 return bdrv_rw_co(bs
, sector_num
, (uint8_t *)buf
, nb_sectors
, true);
1130 int bdrv_pread(BlockDriverState
*bs
, int64_t offset
,
1131 void *buf
, int count1
)
1133 uint8_t tmp_buf
[BDRV_SECTOR_SIZE
];
1134 int len
, nb_sectors
, count
;
1139 /* first read to align to sector start */
1140 len
= (BDRV_SECTOR_SIZE
- offset
) & (BDRV_SECTOR_SIZE
- 1);
1143 sector_num
= offset
>> BDRV_SECTOR_BITS
;
1145 if ((ret
= bdrv_read(bs
, sector_num
, tmp_buf
, 1)) < 0)
1147 memcpy(buf
, tmp_buf
+ (offset
& (BDRV_SECTOR_SIZE
- 1)), len
);
1155 /* read the sectors "in place" */
1156 nb_sectors
= count
>> BDRV_SECTOR_BITS
;
1157 if (nb_sectors
> 0) {
1158 if ((ret
= bdrv_read(bs
, sector_num
, buf
, nb_sectors
)) < 0)
1160 sector_num
+= nb_sectors
;
1161 len
= nb_sectors
<< BDRV_SECTOR_BITS
;
1166 /* add data from the last sector */
1168 if ((ret
= bdrv_read(bs
, sector_num
, tmp_buf
, 1)) < 0)
1170 memcpy(buf
, tmp_buf
, count
);
1175 int bdrv_pwrite(BlockDriverState
*bs
, int64_t offset
,
1176 const void *buf
, int count1
)
1178 uint8_t tmp_buf
[BDRV_SECTOR_SIZE
];
1179 int len
, nb_sectors
, count
;
1184 /* first write to align to sector start */
1185 len
= (BDRV_SECTOR_SIZE
- offset
) & (BDRV_SECTOR_SIZE
- 1);
1188 sector_num
= offset
>> BDRV_SECTOR_BITS
;
1190 if ((ret
= bdrv_read(bs
, sector_num
, tmp_buf
, 1)) < 0)
1192 memcpy(tmp_buf
+ (offset
& (BDRV_SECTOR_SIZE
- 1)), buf
, len
);
1193 if ((ret
= bdrv_write(bs
, sector_num
, tmp_buf
, 1)) < 0)
1202 /* write the sectors "in place" */
1203 nb_sectors
= count
>> BDRV_SECTOR_BITS
;
1204 if (nb_sectors
> 0) {
1205 if ((ret
= bdrv_write(bs
, sector_num
, buf
, nb_sectors
)) < 0)
1207 sector_num
+= nb_sectors
;
1208 len
= nb_sectors
<< BDRV_SECTOR_BITS
;
1213 /* add data from the last sector */
1215 if ((ret
= bdrv_read(bs
, sector_num
, tmp_buf
, 1)) < 0)
1217 memcpy(tmp_buf
, buf
, count
);
1218 if ((ret
= bdrv_write(bs
, sector_num
, tmp_buf
, 1)) < 0)
1225 * Writes to the file and ensures that no writes are reordered across this
1226 * request (acts as a barrier)
1228 * Returns 0 on success, -errno in error cases.
1230 int bdrv_pwrite_sync(BlockDriverState
*bs
, int64_t offset
,
1231 const void *buf
, int count
)
1235 ret
= bdrv_pwrite(bs
, offset
, buf
, count
);
1240 /* No flush needed for cache modes that use O_DSYNC */
1241 if ((bs
->open_flags
& BDRV_O_CACHE_WB
) != 0) {
1249 * Handle a read request in coroutine context
1251 static int coroutine_fn
bdrv_co_do_readv(BlockDriverState
*bs
,
1252 int64_t sector_num
, int nb_sectors
, QEMUIOVector
*qiov
)
1254 BlockDriver
*drv
= bs
->drv
;
1259 if (bdrv_check_request(bs
, sector_num
, nb_sectors
)) {
1263 return drv
->bdrv_co_readv(bs
, sector_num
, nb_sectors
, qiov
);
1266 int coroutine_fn
bdrv_co_readv(BlockDriverState
*bs
, int64_t sector_num
,
1267 int nb_sectors
, QEMUIOVector
*qiov
)
1269 trace_bdrv_co_readv(bs
, sector_num
, nb_sectors
);
1271 return bdrv_co_do_readv(bs
, sector_num
, nb_sectors
, qiov
);
1275 * Handle a write request in coroutine context
1277 static int coroutine_fn
bdrv_co_do_writev(BlockDriverState
*bs
,
1278 int64_t sector_num
, int nb_sectors
, QEMUIOVector
*qiov
)
1280 BlockDriver
*drv
= bs
->drv
;
1286 if (bs
->read_only
) {
1289 if (bdrv_check_request(bs
, sector_num
, nb_sectors
)) {
1293 ret
= drv
->bdrv_co_writev(bs
, sector_num
, nb_sectors
, qiov
);
1295 if (bs
->dirty_bitmap
) {
1296 set_dirty_bitmap(bs
, sector_num
, nb_sectors
, 1);
1299 if (bs
->wr_highest_sector
< sector_num
+ nb_sectors
- 1) {
1300 bs
->wr_highest_sector
= sector_num
+ nb_sectors
- 1;
1306 int coroutine_fn
bdrv_co_writev(BlockDriverState
*bs
, int64_t sector_num
,
1307 int nb_sectors
, QEMUIOVector
*qiov
)
1309 trace_bdrv_co_writev(bs
, sector_num
, nb_sectors
);
1311 return bdrv_co_do_writev(bs
, sector_num
, nb_sectors
, qiov
);
1315 * Truncate file to 'offset' bytes (needed only for file protocols)
1317 int bdrv_truncate(BlockDriverState
*bs
, int64_t offset
)
1319 BlockDriver
*drv
= bs
->drv
;
1323 if (!drv
->bdrv_truncate
)
1327 if (bdrv_in_use(bs
))
1329 ret
= drv
->bdrv_truncate(bs
, offset
);
1331 ret
= refresh_total_sectors(bs
, offset
>> BDRV_SECTOR_BITS
);
1332 bdrv_dev_resize_cb(bs
);
1338 * Length of a allocated file in bytes. Sparse files are counted by actual
1339 * allocated space. Return < 0 if error or unknown.
1341 int64_t bdrv_get_allocated_file_size(BlockDriverState
*bs
)
1343 BlockDriver
*drv
= bs
->drv
;
1347 if (drv
->bdrv_get_allocated_file_size
) {
1348 return drv
->bdrv_get_allocated_file_size(bs
);
1351 return bdrv_get_allocated_file_size(bs
->file
);
1357 * Length of a file in bytes. Return < 0 if error or unknown.
1359 int64_t bdrv_getlength(BlockDriverState
*bs
)
1361 BlockDriver
*drv
= bs
->drv
;
1365 if (bs
->growable
|| bdrv_dev_has_removable_media(bs
)) {
1366 if (drv
->bdrv_getlength
) {
1367 return drv
->bdrv_getlength(bs
);
1370 return bs
->total_sectors
* BDRV_SECTOR_SIZE
;
1373 /* return 0 as number of sectors if no device present or error */
1374 void bdrv_get_geometry(BlockDriverState
*bs
, uint64_t *nb_sectors_ptr
)
1377 length
= bdrv_getlength(bs
);
1381 length
= length
>> BDRV_SECTOR_BITS
;
1382 *nb_sectors_ptr
= length
;
1386 uint8_t boot_ind
; /* 0x80 - active */
1387 uint8_t head
; /* starting head */
1388 uint8_t sector
; /* starting sector */
1389 uint8_t cyl
; /* starting cylinder */
1390 uint8_t sys_ind
; /* What partition type */
1391 uint8_t end_head
; /* end head */
1392 uint8_t end_sector
; /* end sector */
1393 uint8_t end_cyl
; /* end cylinder */
1394 uint32_t start_sect
; /* starting sector counting from 0 */
1395 uint32_t nr_sects
; /* nr of sectors in partition */
1398 /* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
1399 static int guess_disk_lchs(BlockDriverState
*bs
,
1400 int *pcylinders
, int *pheads
, int *psectors
)
1402 uint8_t buf
[BDRV_SECTOR_SIZE
];
1403 int ret
, i
, heads
, sectors
, cylinders
;
1404 struct partition
*p
;
1406 uint64_t nb_sectors
;
1408 bdrv_get_geometry(bs
, &nb_sectors
);
1410 ret
= bdrv_read(bs
, 0, buf
, 1);
1413 /* test msdos magic */
1414 if (buf
[510] != 0x55 || buf
[511] != 0xaa)
1416 for(i
= 0; i
< 4; i
++) {
1417 p
= ((struct partition
*)(buf
+ 0x1be)) + i
;
1418 nr_sects
= le32_to_cpu(p
->nr_sects
);
1419 if (nr_sects
&& p
->end_head
) {
1420 /* We make the assumption that the partition terminates on
1421 a cylinder boundary */
1422 heads
= p
->end_head
+ 1;
1423 sectors
= p
->end_sector
& 63;
1426 cylinders
= nb_sectors
/ (heads
* sectors
);
1427 if (cylinders
< 1 || cylinders
> 16383)
1430 *psectors
= sectors
;
1431 *pcylinders
= cylinders
;
1433 printf("guessed geometry: LCHS=%d %d %d\n",
1434 cylinders
, heads
, sectors
);
1442 void bdrv_guess_geometry(BlockDriverState
*bs
, int *pcyls
, int *pheads
, int *psecs
)
1444 int translation
, lba_detected
= 0;
1445 int cylinders
, heads
, secs
;
1446 uint64_t nb_sectors
;
1448 /* if a geometry hint is available, use it */
1449 bdrv_get_geometry(bs
, &nb_sectors
);
1450 bdrv_get_geometry_hint(bs
, &cylinders
, &heads
, &secs
);
1451 translation
= bdrv_get_translation_hint(bs
);
1452 if (cylinders
!= 0) {
1457 if (guess_disk_lchs(bs
, &cylinders
, &heads
, &secs
) == 0) {
1459 /* if heads > 16, it means that a BIOS LBA
1460 translation was active, so the default
1461 hardware geometry is OK */
1463 goto default_geometry
;
1468 /* disable any translation to be in sync with
1469 the logical geometry */
1470 if (translation
== BIOS_ATA_TRANSLATION_AUTO
) {
1471 bdrv_set_translation_hint(bs
,
1472 BIOS_ATA_TRANSLATION_NONE
);
1477 /* if no geometry, use a standard physical disk geometry */
1478 cylinders
= nb_sectors
/ (16 * 63);
1480 if (cylinders
> 16383)
1482 else if (cylinders
< 2)
1487 if ((lba_detected
== 1) && (translation
== BIOS_ATA_TRANSLATION_AUTO
)) {
1488 if ((*pcyls
* *pheads
) <= 131072) {
1489 bdrv_set_translation_hint(bs
,
1490 BIOS_ATA_TRANSLATION_LARGE
);
1492 bdrv_set_translation_hint(bs
,
1493 BIOS_ATA_TRANSLATION_LBA
);
1497 bdrv_set_geometry_hint(bs
, *pcyls
, *pheads
, *psecs
);
1501 void bdrv_set_geometry_hint(BlockDriverState
*bs
,
1502 int cyls
, int heads
, int secs
)
1509 void bdrv_set_translation_hint(BlockDriverState
*bs
, int translation
)
1511 bs
->translation
= translation
;
1514 void bdrv_get_geometry_hint(BlockDriverState
*bs
,
1515 int *pcyls
, int *pheads
, int *psecs
)
1518 *pheads
= bs
->heads
;
1522 /* Recognize floppy formats */
1523 typedef struct FDFormat
{
1530 static const FDFormat fd_formats
[] = {
1531 /* First entry is default format */
1532 /* 1.44 MB 3"1/2 floppy disks */
1533 { FDRIVE_DRV_144
, 18, 80, 1, },
1534 { FDRIVE_DRV_144
, 20, 80, 1, },
1535 { FDRIVE_DRV_144
, 21, 80, 1, },
1536 { FDRIVE_DRV_144
, 21, 82, 1, },
1537 { FDRIVE_DRV_144
, 21, 83, 1, },
1538 { FDRIVE_DRV_144
, 22, 80, 1, },
1539 { FDRIVE_DRV_144
, 23, 80, 1, },
1540 { FDRIVE_DRV_144
, 24, 80, 1, },
1541 /* 2.88 MB 3"1/2 floppy disks */
1542 { FDRIVE_DRV_288
, 36, 80, 1, },
1543 { FDRIVE_DRV_288
, 39, 80, 1, },
1544 { FDRIVE_DRV_288
, 40, 80, 1, },
1545 { FDRIVE_DRV_288
, 44, 80, 1, },
1546 { FDRIVE_DRV_288
, 48, 80, 1, },
1547 /* 720 kB 3"1/2 floppy disks */
1548 { FDRIVE_DRV_144
, 9, 80, 1, },
1549 { FDRIVE_DRV_144
, 10, 80, 1, },
1550 { FDRIVE_DRV_144
, 10, 82, 1, },
1551 { FDRIVE_DRV_144
, 10, 83, 1, },
1552 { FDRIVE_DRV_144
, 13, 80, 1, },
1553 { FDRIVE_DRV_144
, 14, 80, 1, },
1554 /* 1.2 MB 5"1/4 floppy disks */
1555 { FDRIVE_DRV_120
, 15, 80, 1, },
1556 { FDRIVE_DRV_120
, 18, 80, 1, },
1557 { FDRIVE_DRV_120
, 18, 82, 1, },
1558 { FDRIVE_DRV_120
, 18, 83, 1, },
1559 { FDRIVE_DRV_120
, 20, 80, 1, },
1560 /* 720 kB 5"1/4 floppy disks */
1561 { FDRIVE_DRV_120
, 9, 80, 1, },
1562 { FDRIVE_DRV_120
, 11, 80, 1, },
1563 /* 360 kB 5"1/4 floppy disks */
1564 { FDRIVE_DRV_120
, 9, 40, 1, },
1565 { FDRIVE_DRV_120
, 9, 40, 0, },
1566 { FDRIVE_DRV_120
, 10, 41, 1, },
1567 { FDRIVE_DRV_120
, 10, 42, 1, },
1568 /* 320 kB 5"1/4 floppy disks */
1569 { FDRIVE_DRV_120
, 8, 40, 1, },
1570 { FDRIVE_DRV_120
, 8, 40, 0, },
1571 /* 360 kB must match 5"1/4 better than 3"1/2... */
1572 { FDRIVE_DRV_144
, 9, 80, 0, },
1574 { FDRIVE_DRV_NONE
, -1, -1, 0, },
1577 void bdrv_get_floppy_geometry_hint(BlockDriverState
*bs
, int *nb_heads
,
1578 int *max_track
, int *last_sect
,
1579 FDriveType drive_in
, FDriveType
*drive
)
1581 const FDFormat
*parse
;
1582 uint64_t nb_sectors
, size
;
1583 int i
, first_match
, match
;
1585 bdrv_get_geometry_hint(bs
, nb_heads
, max_track
, last_sect
);
1586 if (*nb_heads
!= 0 && *max_track
!= 0 && *last_sect
!= 0) {
1587 /* User defined disk */
1589 bdrv_get_geometry(bs
, &nb_sectors
);
1592 for (i
= 0; ; i
++) {
1593 parse
= &fd_formats
[i
];
1594 if (parse
->drive
== FDRIVE_DRV_NONE
) {
1597 if (drive_in
== parse
->drive
||
1598 drive_in
== FDRIVE_DRV_NONE
) {
1599 size
= (parse
->max_head
+ 1) * parse
->max_track
*
1601 if (nb_sectors
== size
) {
1605 if (first_match
== -1) {
1611 if (first_match
== -1) {
1614 match
= first_match
;
1616 parse
= &fd_formats
[match
];
1618 *nb_heads
= parse
->max_head
+ 1;
1619 *max_track
= parse
->max_track
;
1620 *last_sect
= parse
->last_sect
;
1621 *drive
= parse
->drive
;
1625 int bdrv_get_translation_hint(BlockDriverState
*bs
)
1627 return bs
->translation
;
1630 void bdrv_set_on_error(BlockDriverState
*bs
, BlockErrorAction on_read_error
,
1631 BlockErrorAction on_write_error
)
1633 bs
->on_read_error
= on_read_error
;
1634 bs
->on_write_error
= on_write_error
;
1637 BlockErrorAction
bdrv_get_on_error(BlockDriverState
*bs
, int is_read
)
1639 return is_read
? bs
->on_read_error
: bs
->on_write_error
;
1642 int bdrv_is_read_only(BlockDriverState
*bs
)
1644 return bs
->read_only
;
1647 int bdrv_is_sg(BlockDriverState
*bs
)
1652 int bdrv_enable_write_cache(BlockDriverState
*bs
)
1654 return bs
->enable_write_cache
;
1657 int bdrv_is_encrypted(BlockDriverState
*bs
)
1659 if (bs
->backing_hd
&& bs
->backing_hd
->encrypted
)
1661 return bs
->encrypted
;
1664 int bdrv_key_required(BlockDriverState
*bs
)
1666 BlockDriverState
*backing_hd
= bs
->backing_hd
;
1668 if (backing_hd
&& backing_hd
->encrypted
&& !backing_hd
->valid_key
)
1670 return (bs
->encrypted
&& !bs
->valid_key
);
1673 int bdrv_set_key(BlockDriverState
*bs
, const char *key
)
1676 if (bs
->backing_hd
&& bs
->backing_hd
->encrypted
) {
1677 ret
= bdrv_set_key(bs
->backing_hd
, key
);
1683 if (!bs
->encrypted
) {
1685 } else if (!bs
->drv
|| !bs
->drv
->bdrv_set_key
) {
1688 ret
= bs
->drv
->bdrv_set_key(bs
, key
);
1691 } else if (!bs
->valid_key
) {
1693 /* call the change callback now, we skipped it on open */
1694 bdrv_dev_change_media_cb(bs
, true);
1699 void bdrv_get_format(BlockDriverState
*bs
, char *buf
, int buf_size
)
1704 pstrcpy(buf
, buf_size
, bs
->drv
->format_name
);
1708 void bdrv_iterate_format(void (*it
)(void *opaque
, const char *name
),
1713 QLIST_FOREACH(drv
, &bdrv_drivers
, list
) {
1714 it(opaque
, drv
->format_name
);
1718 BlockDriverState
*bdrv_find(const char *name
)
1720 BlockDriverState
*bs
;
1722 QTAILQ_FOREACH(bs
, &bdrv_states
, list
) {
1723 if (!strcmp(name
, bs
->device_name
)) {
1730 BlockDriverState
*bdrv_next(BlockDriverState
*bs
)
1733 return QTAILQ_FIRST(&bdrv_states
);
1735 return QTAILQ_NEXT(bs
, list
);
1738 void bdrv_iterate(void (*it
)(void *opaque
, BlockDriverState
*bs
), void *opaque
)
1740 BlockDriverState
*bs
;
1742 QTAILQ_FOREACH(bs
, &bdrv_states
, list
) {
1747 const char *bdrv_get_device_name(BlockDriverState
*bs
)
1749 return bs
->device_name
;
1752 void bdrv_flush_all(void)
1754 BlockDriverState
*bs
;
1756 QTAILQ_FOREACH(bs
, &bdrv_states
, list
) {
1757 if (!bdrv_is_read_only(bs
) && bdrv_is_inserted(bs
)) {
1763 int bdrv_has_zero_init(BlockDriverState
*bs
)
1767 if (bs
->drv
->bdrv_has_zero_init
) {
1768 return bs
->drv
->bdrv_has_zero_init(bs
);
1775 * Returns true iff the specified sector is present in the disk image. Drivers
1776 * not implementing the functionality are assumed to not support backing files,
1777 * hence all their sectors are reported as allocated.
1779 * 'pnum' is set to the number of sectors (including and immediately following
1780 * the specified sector) that are known to be in the same
1781 * allocated/unallocated state.
1783 * 'nb_sectors' is the max value 'pnum' should be set to.
1785 int bdrv_is_allocated(BlockDriverState
*bs
, int64_t sector_num
, int nb_sectors
,
1789 if (!bs
->drv
->bdrv_is_allocated
) {
1790 if (sector_num
>= bs
->total_sectors
) {
1794 n
= bs
->total_sectors
- sector_num
;
1795 *pnum
= (n
< nb_sectors
) ? (n
) : (nb_sectors
);
1798 return bs
->drv
->bdrv_is_allocated(bs
, sector_num
, nb_sectors
, pnum
);
1801 void bdrv_mon_event(const BlockDriverState
*bdrv
,
1802 BlockMonEventAction action
, int is_read
)
1805 const char *action_str
;
1808 case BDRV_ACTION_REPORT
:
1809 action_str
= "report";
1811 case BDRV_ACTION_IGNORE
:
1812 action_str
= "ignore";
1814 case BDRV_ACTION_STOP
:
1815 action_str
= "stop";
1821 data
= qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
1824 is_read
? "read" : "write");
1825 monitor_protocol_event(QEVENT_BLOCK_IO_ERROR
, data
);
1827 qobject_decref(data
);
1830 BlockInfoList
*qmp_query_block(Error
**errp
)
1832 BlockInfoList
*head
= NULL
, *cur_item
= NULL
;
1833 BlockDriverState
*bs
;
1835 QTAILQ_FOREACH(bs
, &bdrv_states
, list
) {
1836 BlockInfoList
*info
= g_malloc0(sizeof(*info
));
1838 info
->value
= g_malloc0(sizeof(*info
->value
));
1839 info
->value
->device
= g_strdup(bs
->device_name
);
1840 info
->value
->type
= g_strdup("unknown");
1841 info
->value
->locked
= bdrv_dev_is_medium_locked(bs
);
1842 info
->value
->removable
= bdrv_dev_has_removable_media(bs
);
1844 if (bdrv_dev_has_removable_media(bs
)) {
1845 info
->value
->has_tray_open
= true;
1846 info
->value
->tray_open
= bdrv_dev_is_tray_open(bs
);
1849 if (bdrv_iostatus_is_enabled(bs
)) {
1850 info
->value
->has_io_status
= true;
1851 info
->value
->io_status
= bs
->iostatus
;
1855 info
->value
->has_inserted
= true;
1856 info
->value
->inserted
= g_malloc0(sizeof(*info
->value
->inserted
));
1857 info
->value
->inserted
->file
= g_strdup(bs
->filename
);
1858 info
->value
->inserted
->ro
= bs
->read_only
;
1859 info
->value
->inserted
->drv
= g_strdup(bs
->drv
->format_name
);
1860 info
->value
->inserted
->encrypted
= bs
->encrypted
;
1861 if (bs
->backing_file
[0]) {
1862 info
->value
->inserted
->has_backing_file
= true;
1863 info
->value
->inserted
->backing_file
= g_strdup(bs
->backing_file
);
1867 /* XXX: waiting for the qapi to support GSList */
1869 head
= cur_item
= info
;
1871 cur_item
->next
= info
;
1879 /* Consider exposing this as a full fledged QMP command */
1880 static BlockStats
*qmp_query_blockstat(const BlockDriverState
*bs
, Error
**errp
)
1884 s
= g_malloc0(sizeof(*s
));
1886 if (bs
->device_name
[0]) {
1887 s
->has_device
= true;
1888 s
->device
= g_strdup(bs
->device_name
);
1891 s
->stats
= g_malloc0(sizeof(*s
->stats
));
1892 s
->stats
->rd_bytes
= bs
->nr_bytes
[BDRV_ACCT_READ
];
1893 s
->stats
->wr_bytes
= bs
->nr_bytes
[BDRV_ACCT_WRITE
];
1894 s
->stats
->rd_operations
= bs
->nr_ops
[BDRV_ACCT_READ
];
1895 s
->stats
->wr_operations
= bs
->nr_ops
[BDRV_ACCT_WRITE
];
1896 s
->stats
->wr_highest_offset
= bs
->wr_highest_sector
* BDRV_SECTOR_SIZE
;
1897 s
->stats
->flush_operations
= bs
->nr_ops
[BDRV_ACCT_FLUSH
];
1898 s
->stats
->wr_total_time_ns
= bs
->total_time_ns
[BDRV_ACCT_WRITE
];
1899 s
->stats
->rd_total_time_ns
= bs
->total_time_ns
[BDRV_ACCT_READ
];
1900 s
->stats
->flush_total_time_ns
= bs
->total_time_ns
[BDRV_ACCT_FLUSH
];
1903 s
->has_parent
= true;
1904 s
->parent
= qmp_query_blockstat(bs
->file
, NULL
);
1910 BlockStatsList
*qmp_query_blockstats(Error
**errp
)
1912 BlockStatsList
*head
= NULL
, *cur_item
= NULL
;
1913 BlockDriverState
*bs
;
1915 QTAILQ_FOREACH(bs
, &bdrv_states
, list
) {
1916 BlockStatsList
*info
= g_malloc0(sizeof(*info
));
1917 info
->value
= qmp_query_blockstat(bs
, NULL
);
1919 /* XXX: waiting for the qapi to support GSList */
1921 head
= cur_item
= info
;
1923 cur_item
->next
= info
;
1931 const char *bdrv_get_encrypted_filename(BlockDriverState
*bs
)
1933 if (bs
->backing_hd
&& bs
->backing_hd
->encrypted
)
1934 return bs
->backing_file
;
1935 else if (bs
->encrypted
)
1936 return bs
->filename
;
1941 void bdrv_get_backing_filename(BlockDriverState
*bs
,
1942 char *filename
, int filename_size
)
1944 pstrcpy(filename
, filename_size
, bs
->backing_file
);
1947 int bdrv_write_compressed(BlockDriverState
*bs
, int64_t sector_num
,
1948 const uint8_t *buf
, int nb_sectors
)
1950 BlockDriver
*drv
= bs
->drv
;
1953 if (!drv
->bdrv_write_compressed
)
1955 if (bdrv_check_request(bs
, sector_num
, nb_sectors
))
1958 if (bs
->dirty_bitmap
) {
1959 set_dirty_bitmap(bs
, sector_num
, nb_sectors
, 1);
1962 return drv
->bdrv_write_compressed(bs
, sector_num
, buf
, nb_sectors
);
1965 int bdrv_get_info(BlockDriverState
*bs
, BlockDriverInfo
*bdi
)
1967 BlockDriver
*drv
= bs
->drv
;
1970 if (!drv
->bdrv_get_info
)
1972 memset(bdi
, 0, sizeof(*bdi
));
1973 return drv
->bdrv_get_info(bs
, bdi
);
1976 int bdrv_save_vmstate(BlockDriverState
*bs
, const uint8_t *buf
,
1977 int64_t pos
, int size
)
1979 BlockDriver
*drv
= bs
->drv
;
1982 if (drv
->bdrv_save_vmstate
)
1983 return drv
->bdrv_save_vmstate(bs
, buf
, pos
, size
);
1985 return bdrv_save_vmstate(bs
->file
, buf
, pos
, size
);
1989 int bdrv_load_vmstate(BlockDriverState
*bs
, uint8_t *buf
,
1990 int64_t pos
, int size
)
1992 BlockDriver
*drv
= bs
->drv
;
1995 if (drv
->bdrv_load_vmstate
)
1996 return drv
->bdrv_load_vmstate(bs
, buf
, pos
, size
);
1998 return bdrv_load_vmstate(bs
->file
, buf
, pos
, size
);
2002 void bdrv_debug_event(BlockDriverState
*bs
, BlkDebugEvent event
)
2004 BlockDriver
*drv
= bs
->drv
;
2006 if (!drv
|| !drv
->bdrv_debug_event
) {
2010 return drv
->bdrv_debug_event(bs
, event
);
2014 /**************************************************************/
2015 /* handling of snapshots */
2017 int bdrv_can_snapshot(BlockDriverState
*bs
)
2019 BlockDriver
*drv
= bs
->drv
;
2020 if (!drv
|| !bdrv_is_inserted(bs
) || bdrv_is_read_only(bs
)) {
2024 if (!drv
->bdrv_snapshot_create
) {
2025 if (bs
->file
!= NULL
) {
2026 return bdrv_can_snapshot(bs
->file
);
2034 int bdrv_is_snapshot(BlockDriverState
*bs
)
2036 return !!(bs
->open_flags
& BDRV_O_SNAPSHOT
);
2039 BlockDriverState
*bdrv_snapshots(void)
2041 BlockDriverState
*bs
;
2044 return bs_snapshots
;
2048 while ((bs
= bdrv_next(bs
))) {
2049 if (bdrv_can_snapshot(bs
)) {
2057 int bdrv_snapshot_create(BlockDriverState
*bs
,
2058 QEMUSnapshotInfo
*sn_info
)
2060 BlockDriver
*drv
= bs
->drv
;
2063 if (drv
->bdrv_snapshot_create
)
2064 return drv
->bdrv_snapshot_create(bs
, sn_info
);
2066 return bdrv_snapshot_create(bs
->file
, sn_info
);
2070 int bdrv_snapshot_goto(BlockDriverState
*bs
,
2071 const char *snapshot_id
)
2073 BlockDriver
*drv
= bs
->drv
;
2078 if (drv
->bdrv_snapshot_goto
)
2079 return drv
->bdrv_snapshot_goto(bs
, snapshot_id
);
2082 drv
->bdrv_close(bs
);
2083 ret
= bdrv_snapshot_goto(bs
->file
, snapshot_id
);
2084 open_ret
= drv
->bdrv_open(bs
, bs
->open_flags
);
2086 bdrv_delete(bs
->file
);
2096 int bdrv_snapshot_delete(BlockDriverState
*bs
, const char *snapshot_id
)
2098 BlockDriver
*drv
= bs
->drv
;
2101 if (drv
->bdrv_snapshot_delete
)
2102 return drv
->bdrv_snapshot_delete(bs
, snapshot_id
);
2104 return bdrv_snapshot_delete(bs
->file
, snapshot_id
);
2108 int bdrv_snapshot_list(BlockDriverState
*bs
,
2109 QEMUSnapshotInfo
**psn_info
)
2111 BlockDriver
*drv
= bs
->drv
;
2114 if (drv
->bdrv_snapshot_list
)
2115 return drv
->bdrv_snapshot_list(bs
, psn_info
);
2117 return bdrv_snapshot_list(bs
->file
, psn_info
);
2121 int bdrv_snapshot_load_tmp(BlockDriverState
*bs
,
2122 const char *snapshot_name
)
2124 BlockDriver
*drv
= bs
->drv
;
2128 if (!bs
->read_only
) {
2131 if (drv
->bdrv_snapshot_load_tmp
) {
2132 return drv
->bdrv_snapshot_load_tmp(bs
, snapshot_name
);
2137 #define NB_SUFFIXES 4
2139 char *get_human_readable_size(char *buf
, int buf_size
, int64_t size
)
2141 static const char suffixes
[NB_SUFFIXES
] = "KMGT";
2146 snprintf(buf
, buf_size
, "%" PRId64
, size
);
2149 for(i
= 0; i
< NB_SUFFIXES
; i
++) {
2150 if (size
< (10 * base
)) {
2151 snprintf(buf
, buf_size
, "%0.1f%c",
2152 (double)size
/ base
,
2155 } else if (size
< (1000 * base
) || i
== (NB_SUFFIXES
- 1)) {
2156 snprintf(buf
, buf_size
, "%" PRId64
"%c",
2157 ((size
+ (base
>> 1)) / base
),
2167 char *bdrv_snapshot_dump(char *buf
, int buf_size
, QEMUSnapshotInfo
*sn
)
2169 char buf1
[128], date_buf
[128], clock_buf
[128];
2179 snprintf(buf
, buf_size
,
2180 "%-10s%-20s%7s%20s%15s",
2181 "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
2185 ptm
= localtime(&ti
);
2186 strftime(date_buf
, sizeof(date_buf
),
2187 "%Y-%m-%d %H:%M:%S", ptm
);
2189 localtime_r(&ti
, &tm
);
2190 strftime(date_buf
, sizeof(date_buf
),
2191 "%Y-%m-%d %H:%M:%S", &tm
);
2193 secs
= sn
->vm_clock_nsec
/ 1000000000;
2194 snprintf(clock_buf
, sizeof(clock_buf
),
2195 "%02d:%02d:%02d.%03d",
2197 (int)((secs
/ 60) % 60),
2199 (int)((sn
->vm_clock_nsec
/ 1000000) % 1000));
2200 snprintf(buf
, buf_size
,
2201 "%-10s%-20s%7s%20s%15s",
2202 sn
->id_str
, sn
->name
,
2203 get_human_readable_size(buf1
, sizeof(buf1
), sn
->vm_state_size
),
2210 /**************************************************************/
2213 BlockDriverAIOCB
*bdrv_aio_readv(BlockDriverState
*bs
, int64_t sector_num
,
2214 QEMUIOVector
*qiov
, int nb_sectors
,
2215 BlockDriverCompletionFunc
*cb
, void *opaque
)
2217 trace_bdrv_aio_readv(bs
, sector_num
, nb_sectors
, opaque
);
2219 return bdrv_co_aio_rw_vector(bs
, sector_num
, qiov
, nb_sectors
,
2223 BlockDriverAIOCB
*bdrv_aio_writev(BlockDriverState
*bs
, int64_t sector_num
,
2224 QEMUIOVector
*qiov
, int nb_sectors
,
2225 BlockDriverCompletionFunc
*cb
, void *opaque
)
2227 trace_bdrv_aio_writev(bs
, sector_num
, nb_sectors
, opaque
);
2229 return bdrv_co_aio_rw_vector(bs
, sector_num
, qiov
, nb_sectors
,
2234 typedef struct MultiwriteCB
{
2239 BlockDriverCompletionFunc
*cb
;
2241 QEMUIOVector
*free_qiov
;
2246 static void multiwrite_user_cb(MultiwriteCB
*mcb
)
2250 for (i
= 0; i
< mcb
->num_callbacks
; i
++) {
2251 mcb
->callbacks
[i
].cb(mcb
->callbacks
[i
].opaque
, mcb
->error
);
2252 if (mcb
->callbacks
[i
].free_qiov
) {
2253 qemu_iovec_destroy(mcb
->callbacks
[i
].free_qiov
);
2255 g_free(mcb
->callbacks
[i
].free_qiov
);
2256 qemu_vfree(mcb
->callbacks
[i
].free_buf
);
2260 static void multiwrite_cb(void *opaque
, int ret
)
2262 MultiwriteCB
*mcb
= opaque
;
2264 trace_multiwrite_cb(mcb
, ret
);
2266 if (ret
< 0 && !mcb
->error
) {
2270 mcb
->num_requests
--;
2271 if (mcb
->num_requests
== 0) {
2272 multiwrite_user_cb(mcb
);
2277 static int multiwrite_req_compare(const void *a
, const void *b
)
2279 const BlockRequest
*req1
= a
, *req2
= b
;
2282 * Note that we can't simply subtract req2->sector from req1->sector
2283 * here as that could overflow the return value.
2285 if (req1
->sector
> req2
->sector
) {
2287 } else if (req1
->sector
< req2
->sector
) {
2295 * Takes a bunch of requests and tries to merge them. Returns the number of
2296 * requests that remain after merging.
2298 static int multiwrite_merge(BlockDriverState
*bs
, BlockRequest
*reqs
,
2299 int num_reqs
, MultiwriteCB
*mcb
)
2303 // Sort requests by start sector
2304 qsort(reqs
, num_reqs
, sizeof(*reqs
), &multiwrite_req_compare
);
2306 // Check if adjacent requests touch the same clusters. If so, combine them,
2307 // filling up gaps with zero sectors.
2309 for (i
= 1; i
< num_reqs
; i
++) {
2311 int64_t oldreq_last
= reqs
[outidx
].sector
+ reqs
[outidx
].nb_sectors
;
2313 // This handles the cases that are valid for all block drivers, namely
2314 // exactly sequential writes and overlapping writes.
2315 if (reqs
[i
].sector
<= oldreq_last
) {
2319 // The block driver may decide that it makes sense to combine requests
2320 // even if there is a gap of some sectors between them. In this case,
2321 // the gap is filled with zeros (therefore only applicable for yet
2322 // unused space in format like qcow2).
2323 if (!merge
&& bs
->drv
->bdrv_merge_requests
) {
2324 merge
= bs
->drv
->bdrv_merge_requests(bs
, &reqs
[outidx
], &reqs
[i
]);
2327 if (reqs
[outidx
].qiov
->niov
+ reqs
[i
].qiov
->niov
+ 1 > IOV_MAX
) {
2333 QEMUIOVector
*qiov
= g_malloc0(sizeof(*qiov
));
2334 qemu_iovec_init(qiov
,
2335 reqs
[outidx
].qiov
->niov
+ reqs
[i
].qiov
->niov
+ 1);
2337 // Add the first request to the merged one. If the requests are
2338 // overlapping, drop the last sectors of the first request.
2339 size
= (reqs
[i
].sector
- reqs
[outidx
].sector
) << 9;
2340 qemu_iovec_concat(qiov
, reqs
[outidx
].qiov
, size
);
2342 // We might need to add some zeros between the two requests
2343 if (reqs
[i
].sector
> oldreq_last
) {
2344 size_t zero_bytes
= (reqs
[i
].sector
- oldreq_last
) << 9;
2345 uint8_t *buf
= qemu_blockalign(bs
, zero_bytes
);
2346 memset(buf
, 0, zero_bytes
);
2347 qemu_iovec_add(qiov
, buf
, zero_bytes
);
2348 mcb
->callbacks
[i
].free_buf
= buf
;
2351 // Add the second request
2352 qemu_iovec_concat(qiov
, reqs
[i
].qiov
, reqs
[i
].qiov
->size
);
2354 reqs
[outidx
].nb_sectors
= qiov
->size
>> 9;
2355 reqs
[outidx
].qiov
= qiov
;
2357 mcb
->callbacks
[i
].free_qiov
= reqs
[outidx
].qiov
;
2360 reqs
[outidx
].sector
= reqs
[i
].sector
;
2361 reqs
[outidx
].nb_sectors
= reqs
[i
].nb_sectors
;
2362 reqs
[outidx
].qiov
= reqs
[i
].qiov
;
2370 * Submit multiple AIO write requests at once.
2372 * On success, the function returns 0 and all requests in the reqs array have
2373 * been submitted. In error case this function returns -1, and any of the
2374 * requests may or may not be submitted yet. In particular, this means that the
2375 * callback will be called for some of the requests, for others it won't. The
2376 * caller must check the error field of the BlockRequest to wait for the right
2377 * callbacks (if error != 0, no callback will be called).
2379 * The implementation may modify the contents of the reqs array, e.g. to merge
2380 * requests. However, the fields opaque and error are left unmodified as they
2381 * are used to signal failure for a single request to the caller.
2383 int bdrv_aio_multiwrite(BlockDriverState
*bs
, BlockRequest
*reqs
, int num_reqs
)
2385 BlockDriverAIOCB
*acb
;
2389 /* don't submit writes if we don't have a medium */
2390 if (bs
->drv
== NULL
) {
2391 for (i
= 0; i
< num_reqs
; i
++) {
2392 reqs
[i
].error
= -ENOMEDIUM
;
2397 if (num_reqs
== 0) {
2401 // Create MultiwriteCB structure
2402 mcb
= g_malloc0(sizeof(*mcb
) + num_reqs
* sizeof(*mcb
->callbacks
));
2403 mcb
->num_requests
= 0;
2404 mcb
->num_callbacks
= num_reqs
;
2406 for (i
= 0; i
< num_reqs
; i
++) {
2407 mcb
->callbacks
[i
].cb
= reqs
[i
].cb
;
2408 mcb
->callbacks
[i
].opaque
= reqs
[i
].opaque
;
2411 // Check for mergable requests
2412 num_reqs
= multiwrite_merge(bs
, reqs
, num_reqs
, mcb
);
2414 trace_bdrv_aio_multiwrite(mcb
, mcb
->num_callbacks
, num_reqs
);
2417 * Run the aio requests. As soon as one request can't be submitted
2418 * successfully, fail all requests that are not yet submitted (we must
2419 * return failure for all requests anyway)
2421 * num_requests cannot be set to the right value immediately: If
2422 * bdrv_aio_writev fails for some request, num_requests would be too high
2423 * and therefore multiwrite_cb() would never recognize the multiwrite
2424 * request as completed. We also cannot use the loop variable i to set it
2425 * when the first request fails because the callback may already have been
2426 * called for previously submitted requests. Thus, num_requests must be
2427 * incremented for each request that is submitted.
2429 * The problem that callbacks may be called early also means that we need
2430 * to take care that num_requests doesn't become 0 before all requests are
2431 * submitted - multiwrite_cb() would consider the multiwrite request
2432 * completed. A dummy request that is "completed" by a manual call to
2433 * multiwrite_cb() takes care of this.
2435 mcb
->num_requests
= 1;
2437 // Run the aio requests
2438 for (i
= 0; i
< num_reqs
; i
++) {
2439 mcb
->num_requests
++;
2440 acb
= bdrv_aio_writev(bs
, reqs
[i
].sector
, reqs
[i
].qiov
,
2441 reqs
[i
].nb_sectors
, multiwrite_cb
, mcb
);
2444 // We can only fail the whole thing if no request has been
2445 // submitted yet. Otherwise we'll wait for the submitted AIOs to
2446 // complete and report the error in the callback.
2448 trace_bdrv_aio_multiwrite_earlyfail(mcb
);
2451 trace_bdrv_aio_multiwrite_latefail(mcb
, i
);
2452 multiwrite_cb(mcb
, -EIO
);
2458 /* Complete the dummy request */
2459 multiwrite_cb(mcb
, 0);
2464 for (i
= 0; i
< mcb
->num_callbacks
; i
++) {
2465 reqs
[i
].error
= -EIO
;
2471 void bdrv_aio_cancel(BlockDriverAIOCB
*acb
)
2473 acb
->pool
->cancel(acb
);
2477 /**************************************************************/
2478 /* async block device emulation */
2480 typedef struct BlockDriverAIOCBSync
{
2481 BlockDriverAIOCB common
;
2484 /* vector translation state */
2488 } BlockDriverAIOCBSync
;
2490 static void bdrv_aio_cancel_em(BlockDriverAIOCB
*blockacb
)
2492 BlockDriverAIOCBSync
*acb
=
2493 container_of(blockacb
, BlockDriverAIOCBSync
, common
);
2494 qemu_bh_delete(acb
->bh
);
2496 qemu_aio_release(acb
);
2499 static AIOPool bdrv_em_aio_pool
= {
2500 .aiocb_size
= sizeof(BlockDriverAIOCBSync
),
2501 .cancel
= bdrv_aio_cancel_em
,
2504 static void bdrv_aio_bh_cb(void *opaque
)
2506 BlockDriverAIOCBSync
*acb
= opaque
;
2509 qemu_iovec_from_buffer(acb
->qiov
, acb
->bounce
, acb
->qiov
->size
);
2510 qemu_vfree(acb
->bounce
);
2511 acb
->common
.cb(acb
->common
.opaque
, acb
->ret
);
2512 qemu_bh_delete(acb
->bh
);
2514 qemu_aio_release(acb
);
2517 static BlockDriverAIOCB
*bdrv_aio_rw_vector(BlockDriverState
*bs
,
2521 BlockDriverCompletionFunc
*cb
,
2526 BlockDriverAIOCBSync
*acb
;
2528 acb
= qemu_aio_get(&bdrv_em_aio_pool
, bs
, cb
, opaque
);
2529 acb
->is_write
= is_write
;
2531 acb
->bounce
= qemu_blockalign(bs
, qiov
->size
);
2534 acb
->bh
= qemu_bh_new(bdrv_aio_bh_cb
, acb
);
2537 qemu_iovec_to_buffer(acb
->qiov
, acb
->bounce
);
2538 acb
->ret
= bs
->drv
->bdrv_write(bs
, sector_num
, acb
->bounce
, nb_sectors
);
2540 acb
->ret
= bs
->drv
->bdrv_read(bs
, sector_num
, acb
->bounce
, nb_sectors
);
2543 qemu_bh_schedule(acb
->bh
);
2545 return &acb
->common
;
2548 static BlockDriverAIOCB
*bdrv_aio_readv_em(BlockDriverState
*bs
,
2549 int64_t sector_num
, QEMUIOVector
*qiov
, int nb_sectors
,
2550 BlockDriverCompletionFunc
*cb
, void *opaque
)
2552 return bdrv_aio_rw_vector(bs
, sector_num
, qiov
, nb_sectors
, cb
, opaque
, 0);
2555 static BlockDriverAIOCB
*bdrv_aio_writev_em(BlockDriverState
*bs
,
2556 int64_t sector_num
, QEMUIOVector
*qiov
, int nb_sectors
,
2557 BlockDriverCompletionFunc
*cb
, void *opaque
)
2559 return bdrv_aio_rw_vector(bs
, sector_num
, qiov
, nb_sectors
, cb
, opaque
, 1);
2563 typedef struct BlockDriverAIOCBCoroutine
{
2564 BlockDriverAIOCB common
;
2568 } BlockDriverAIOCBCoroutine
;
2570 static void bdrv_aio_co_cancel_em(BlockDriverAIOCB
*blockacb
)
2575 static AIOPool bdrv_em_co_aio_pool
= {
2576 .aiocb_size
= sizeof(BlockDriverAIOCBCoroutine
),
2577 .cancel
= bdrv_aio_co_cancel_em
,
2580 static void bdrv_co_em_bh(void *opaque
)
2582 BlockDriverAIOCBCoroutine
*acb
= opaque
;
2584 acb
->common
.cb(acb
->common
.opaque
, acb
->req
.error
);
2585 qemu_bh_delete(acb
->bh
);
2586 qemu_aio_release(acb
);
2589 /* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
2590 static void coroutine_fn
bdrv_co_do_rw(void *opaque
)
2592 BlockDriverAIOCBCoroutine
*acb
= opaque
;
2593 BlockDriverState
*bs
= acb
->common
.bs
;
2595 if (!acb
->is_write
) {
2596 acb
->req
.error
= bdrv_co_do_readv(bs
, acb
->req
.sector
,
2597 acb
->req
.nb_sectors
, acb
->req
.qiov
);
2599 acb
->req
.error
= bdrv_co_do_writev(bs
, acb
->req
.sector
,
2600 acb
->req
.nb_sectors
, acb
->req
.qiov
);
2603 acb
->bh
= qemu_bh_new(bdrv_co_em_bh
, acb
);
2604 qemu_bh_schedule(acb
->bh
);
2607 static BlockDriverAIOCB
*bdrv_co_aio_rw_vector(BlockDriverState
*bs
,
2611 BlockDriverCompletionFunc
*cb
,
2616 BlockDriverAIOCBCoroutine
*acb
;
2618 acb
= qemu_aio_get(&bdrv_em_co_aio_pool
, bs
, cb
, opaque
);
2619 acb
->req
.sector
= sector_num
;
2620 acb
->req
.nb_sectors
= nb_sectors
;
2621 acb
->req
.qiov
= qiov
;
2622 acb
->is_write
= is_write
;
2624 co
= qemu_coroutine_create(bdrv_co_do_rw
);
2625 qemu_coroutine_enter(co
, acb
);
2627 return &acb
->common
;
2630 static void coroutine_fn
bdrv_aio_flush_co_entry(void *opaque
)
2632 BlockDriverAIOCBCoroutine
*acb
= opaque
;
2633 BlockDriverState
*bs
= acb
->common
.bs
;
2635 acb
->req
.error
= bdrv_co_flush(bs
);
2636 acb
->bh
= qemu_bh_new(bdrv_co_em_bh
, acb
);
2637 qemu_bh_schedule(acb
->bh
);
2640 BlockDriverAIOCB
*bdrv_aio_flush(BlockDriverState
*bs
,
2641 BlockDriverCompletionFunc
*cb
, void *opaque
)
2643 trace_bdrv_aio_flush(bs
, opaque
);
2646 BlockDriverAIOCBCoroutine
*acb
;
2648 acb
= qemu_aio_get(&bdrv_em_co_aio_pool
, bs
, cb
, opaque
);
2649 co
= qemu_coroutine_create(bdrv_aio_flush_co_entry
);
2650 qemu_coroutine_enter(co
, acb
);
2652 return &acb
->common
;
2655 static void coroutine_fn
bdrv_aio_discard_co_entry(void *opaque
)
2657 BlockDriverAIOCBCoroutine
*acb
= opaque
;
2658 BlockDriverState
*bs
= acb
->common
.bs
;
2660 acb
->req
.error
= bdrv_co_discard(bs
, acb
->req
.sector
, acb
->req
.nb_sectors
);
2661 acb
->bh
= qemu_bh_new(bdrv_co_em_bh
, acb
);
2662 qemu_bh_schedule(acb
->bh
);
2665 BlockDriverAIOCB
*bdrv_aio_discard(BlockDriverState
*bs
,
2666 int64_t sector_num
, int nb_sectors
,
2667 BlockDriverCompletionFunc
*cb
, void *opaque
)
2670 BlockDriverAIOCBCoroutine
*acb
;
2672 trace_bdrv_aio_discard(bs
, sector_num
, nb_sectors
, opaque
);
2674 acb
= qemu_aio_get(&bdrv_em_co_aio_pool
, bs
, cb
, opaque
);
2675 acb
->req
.sector
= sector_num
;
2676 acb
->req
.nb_sectors
= nb_sectors
;
2677 co
= qemu_coroutine_create(bdrv_aio_discard_co_entry
);
2678 qemu_coroutine_enter(co
, acb
);
2680 return &acb
->common
;
2683 void bdrv_init(void)
2685 module_call_init(MODULE_INIT_BLOCK
);
2688 void bdrv_init_with_whitelist(void)
2690 use_bdrv_whitelist
= 1;
2694 void *qemu_aio_get(AIOPool
*pool
, BlockDriverState
*bs
,
2695 BlockDriverCompletionFunc
*cb
, void *opaque
)
2697 BlockDriverAIOCB
*acb
;
2699 if (pool
->free_aiocb
) {
2700 acb
= pool
->free_aiocb
;
2701 pool
->free_aiocb
= acb
->next
;
2703 acb
= g_malloc0(pool
->aiocb_size
);
2708 acb
->opaque
= opaque
;
2712 void qemu_aio_release(void *p
)
2714 BlockDriverAIOCB
*acb
= (BlockDriverAIOCB
*)p
;
2715 AIOPool
*pool
= acb
->pool
;
2716 acb
->next
= pool
->free_aiocb
;
2717 pool
->free_aiocb
= acb
;
2720 /**************************************************************/
2721 /* Coroutine block device emulation */
2723 typedef struct CoroutineIOCompletion
{
2724 Coroutine
*coroutine
;
2726 } CoroutineIOCompletion
;
2728 static void bdrv_co_io_em_complete(void *opaque
, int ret
)
2730 CoroutineIOCompletion
*co
= opaque
;
2733 qemu_coroutine_enter(co
->coroutine
, NULL
);
2736 static int coroutine_fn
bdrv_co_io_em(BlockDriverState
*bs
, int64_t sector_num
,
2737 int nb_sectors
, QEMUIOVector
*iov
,
2740 CoroutineIOCompletion co
= {
2741 .coroutine
= qemu_coroutine_self(),
2743 BlockDriverAIOCB
*acb
;
2746 acb
= bs
->drv
->bdrv_aio_writev(bs
, sector_num
, iov
, nb_sectors
,
2747 bdrv_co_io_em_complete
, &co
);
2749 acb
= bs
->drv
->bdrv_aio_readv(bs
, sector_num
, iov
, nb_sectors
,
2750 bdrv_co_io_em_complete
, &co
);
2753 trace_bdrv_co_io_em(bs
, sector_num
, nb_sectors
, is_write
, acb
);
2757 qemu_coroutine_yield();
2762 static int coroutine_fn
bdrv_co_readv_em(BlockDriverState
*bs
,
2763 int64_t sector_num
, int nb_sectors
,
2766 return bdrv_co_io_em(bs
, sector_num
, nb_sectors
, iov
, false);
2769 static int coroutine_fn
bdrv_co_writev_em(BlockDriverState
*bs
,
2770 int64_t sector_num
, int nb_sectors
,
2773 return bdrv_co_io_em(bs
, sector_num
, nb_sectors
, iov
, true);
2776 static void coroutine_fn
bdrv_flush_co_entry(void *opaque
)
2778 RwCo
*rwco
= opaque
;
2780 rwco
->ret
= bdrv_co_flush(rwco
->bs
);
2783 int coroutine_fn
bdrv_co_flush(BlockDriverState
*bs
)
2785 if (bs
->open_flags
& BDRV_O_NO_FLUSH
) {
2787 } else if (!bs
->drv
) {
2789 } else if (bs
->drv
->bdrv_co_flush
) {
2790 return bs
->drv
->bdrv_co_flush(bs
);
2791 } else if (bs
->drv
->bdrv_aio_flush
) {
2792 BlockDriverAIOCB
*acb
;
2793 CoroutineIOCompletion co
= {
2794 .coroutine
= qemu_coroutine_self(),
2797 acb
= bs
->drv
->bdrv_aio_flush(bs
, bdrv_co_io_em_complete
, &co
);
2801 qemu_coroutine_yield();
2806 * Some block drivers always operate in either writethrough or unsafe
2807 * mode and don't support bdrv_flush therefore. Usually qemu doesn't
2808 * know how the server works (because the behaviour is hardcoded or
2809 * depends on server-side configuration), so we can't ensure that
2810 * everything is safe on disk. Returning an error doesn't work because
2811 * that would break guests even if the server operates in writethrough
2814 * Let's hope the user knows what he's doing.
2820 int bdrv_flush(BlockDriverState
*bs
)
2828 if (qemu_in_coroutine()) {
2829 /* Fast-path if already in coroutine context */
2830 bdrv_flush_co_entry(&rwco
);
2832 co
= qemu_coroutine_create(bdrv_flush_co_entry
);
2833 qemu_coroutine_enter(co
, &rwco
);
2834 while (rwco
.ret
== NOT_DONE
) {
2842 static void coroutine_fn
bdrv_discard_co_entry(void *opaque
)
2844 RwCo
*rwco
= opaque
;
2846 rwco
->ret
= bdrv_co_discard(rwco
->bs
, rwco
->sector_num
, rwco
->nb_sectors
);
2849 int coroutine_fn
bdrv_co_discard(BlockDriverState
*bs
, int64_t sector_num
,
2854 } else if (bdrv_check_request(bs
, sector_num
, nb_sectors
)) {
2856 } else if (bs
->read_only
) {
2858 } else if (bs
->drv
->bdrv_co_discard
) {
2859 return bs
->drv
->bdrv_co_discard(bs
, sector_num
, nb_sectors
);
2860 } else if (bs
->drv
->bdrv_aio_discard
) {
2861 BlockDriverAIOCB
*acb
;
2862 CoroutineIOCompletion co
= {
2863 .coroutine
= qemu_coroutine_self(),
2866 acb
= bs
->drv
->bdrv_aio_discard(bs
, sector_num
, nb_sectors
,
2867 bdrv_co_io_em_complete
, &co
);
2871 qemu_coroutine_yield();
2879 int bdrv_discard(BlockDriverState
*bs
, int64_t sector_num
, int nb_sectors
)
2884 .sector_num
= sector_num
,
2885 .nb_sectors
= nb_sectors
,
2889 if (qemu_in_coroutine()) {
2890 /* Fast-path if already in coroutine context */
2891 bdrv_discard_co_entry(&rwco
);
2893 co
= qemu_coroutine_create(bdrv_discard_co_entry
);
2894 qemu_coroutine_enter(co
, &rwco
);
2895 while (rwco
.ret
== NOT_DONE
) {
2903 /**************************************************************/
2904 /* removable device support */
2907 * Return TRUE if the media is present
2909 int bdrv_is_inserted(BlockDriverState
*bs
)
2911 BlockDriver
*drv
= bs
->drv
;
2915 if (!drv
->bdrv_is_inserted
)
2917 return drv
->bdrv_is_inserted(bs
);
2921 * Return whether the media changed since the last call to this
2922 * function, or -ENOTSUP if we don't know. Most drivers don't know.
2924 int bdrv_media_changed(BlockDriverState
*bs
)
2926 BlockDriver
*drv
= bs
->drv
;
2928 if (drv
&& drv
->bdrv_media_changed
) {
2929 return drv
->bdrv_media_changed(bs
);
2935 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
2937 void bdrv_eject(BlockDriverState
*bs
, int eject_flag
)
2939 BlockDriver
*drv
= bs
->drv
;
2941 if (drv
&& drv
->bdrv_eject
) {
2942 drv
->bdrv_eject(bs
, eject_flag
);
2947 * Lock or unlock the media (if it is locked, the user won't be able
2948 * to eject it manually).
2950 void bdrv_lock_medium(BlockDriverState
*bs
, bool locked
)
2952 BlockDriver
*drv
= bs
->drv
;
2954 trace_bdrv_lock_medium(bs
, locked
);
2956 if (drv
&& drv
->bdrv_lock_medium
) {
2957 drv
->bdrv_lock_medium(bs
, locked
);
2961 /* needed for generic scsi interface */
2963 int bdrv_ioctl(BlockDriverState
*bs
, unsigned long int req
, void *buf
)
2965 BlockDriver
*drv
= bs
->drv
;
2967 if (drv
&& drv
->bdrv_ioctl
)
2968 return drv
->bdrv_ioctl(bs
, req
, buf
);
2972 BlockDriverAIOCB
*bdrv_aio_ioctl(BlockDriverState
*bs
,
2973 unsigned long int req
, void *buf
,
2974 BlockDriverCompletionFunc
*cb
, void *opaque
)
2976 BlockDriver
*drv
= bs
->drv
;
2978 if (drv
&& drv
->bdrv_aio_ioctl
)
2979 return drv
->bdrv_aio_ioctl(bs
, req
, buf
, cb
, opaque
);
2983 void bdrv_set_buffer_alignment(BlockDriverState
*bs
, int align
)
2985 bs
->buffer_alignment
= align
;
2988 void *qemu_blockalign(BlockDriverState
*bs
, size_t size
)
2990 return qemu_memalign((bs
&& bs
->buffer_alignment
) ? bs
->buffer_alignment
: 512, size
);
2993 void bdrv_set_dirty_tracking(BlockDriverState
*bs
, int enable
)
2995 int64_t bitmap_size
;
2997 bs
->dirty_count
= 0;
2999 if (!bs
->dirty_bitmap
) {
3000 bitmap_size
= (bdrv_getlength(bs
) >> BDRV_SECTOR_BITS
) +
3001 BDRV_SECTORS_PER_DIRTY_CHUNK
* 8 - 1;
3002 bitmap_size
/= BDRV_SECTORS_PER_DIRTY_CHUNK
* 8;
3004 bs
->dirty_bitmap
= g_malloc0(bitmap_size
);
3007 if (bs
->dirty_bitmap
) {
3008 g_free(bs
->dirty_bitmap
);
3009 bs
->dirty_bitmap
= NULL
;
3014 int bdrv_get_dirty(BlockDriverState
*bs
, int64_t sector
)
3016 int64_t chunk
= sector
/ (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK
;
3018 if (bs
->dirty_bitmap
&&
3019 (sector
<< BDRV_SECTOR_BITS
) < bdrv_getlength(bs
)) {
3020 return !!(bs
->dirty_bitmap
[chunk
/ (sizeof(unsigned long) * 8)] &
3021 (1UL << (chunk
% (sizeof(unsigned long) * 8))));
3027 void bdrv_reset_dirty(BlockDriverState
*bs
, int64_t cur_sector
,
3030 set_dirty_bitmap(bs
, cur_sector
, nr_sectors
, 0);
3033 int64_t bdrv_get_dirty_count(BlockDriverState
*bs
)
3035 return bs
->dirty_count
;
3038 void bdrv_set_in_use(BlockDriverState
*bs
, int in_use
)
3040 assert(bs
->in_use
!= in_use
);
3041 bs
->in_use
= in_use
;
3044 int bdrv_in_use(BlockDriverState
*bs
)
3049 void bdrv_iostatus_enable(BlockDriverState
*bs
)
3051 bs
->iostatus_enabled
= true;
3052 bs
->iostatus
= BLOCK_DEVICE_IO_STATUS_OK
;
3055 /* The I/O status is only enabled if the drive explicitly
3056 * enables it _and_ the VM is configured to stop on errors */
3057 bool bdrv_iostatus_is_enabled(const BlockDriverState
*bs
)
3059 return (bs
->iostatus_enabled
&&
3060 (bs
->on_write_error
== BLOCK_ERR_STOP_ENOSPC
||
3061 bs
->on_write_error
== BLOCK_ERR_STOP_ANY
||
3062 bs
->on_read_error
== BLOCK_ERR_STOP_ANY
));
3065 void bdrv_iostatus_disable(BlockDriverState
*bs
)
3067 bs
->iostatus_enabled
= false;
3070 void bdrv_iostatus_reset(BlockDriverState
*bs
)
3072 if (bdrv_iostatus_is_enabled(bs
)) {
3073 bs
->iostatus
= BLOCK_DEVICE_IO_STATUS_OK
;
3077 /* XXX: Today this is set by device models because it makes the implementation
3078 quite simple. However, the block layer knows about the error, so it's
3079 possible to implement this without device models being involved */
3080 void bdrv_iostatus_set_err(BlockDriverState
*bs
, int error
)
3082 if (bdrv_iostatus_is_enabled(bs
) &&
3083 bs
->iostatus
== BLOCK_DEVICE_IO_STATUS_OK
) {
3085 bs
->iostatus
= error
== ENOSPC
? BLOCK_DEVICE_IO_STATUS_NOSPACE
:
3086 BLOCK_DEVICE_IO_STATUS_FAILED
;
3091 bdrv_acct_start(BlockDriverState
*bs
, BlockAcctCookie
*cookie
, int64_t bytes
,
3092 enum BlockAcctType type
)
3094 assert(type
< BDRV_MAX_IOTYPE
);
3096 cookie
->bytes
= bytes
;
3097 cookie
->start_time_ns
= get_clock();
3098 cookie
->type
= type
;
3102 bdrv_acct_done(BlockDriverState
*bs
, BlockAcctCookie
*cookie
)
3104 assert(cookie
->type
< BDRV_MAX_IOTYPE
);
3106 bs
->nr_bytes
[cookie
->type
] += cookie
->bytes
;
3107 bs
->nr_ops
[cookie
->type
]++;
3108 bs
->total_time_ns
[cookie
->type
] += get_clock() - cookie
->start_time_ns
;
3111 int bdrv_img_create(const char *filename
, const char *fmt
,
3112 const char *base_filename
, const char *base_fmt
,
3113 char *options
, uint64_t img_size
, int flags
)
3115 QEMUOptionParameter
*param
= NULL
, *create_options
= NULL
;
3116 QEMUOptionParameter
*backing_fmt
, *backing_file
, *size
;
3117 BlockDriverState
*bs
= NULL
;
3118 BlockDriver
*drv
, *proto_drv
;
3119 BlockDriver
*backing_drv
= NULL
;
3122 /* Find driver and parse its options */
3123 drv
= bdrv_find_format(fmt
);
3125 error_report("Unknown file format '%s'", fmt
);
3130 proto_drv
= bdrv_find_protocol(filename
);
3132 error_report("Unknown protocol '%s'", filename
);
3137 create_options
= append_option_parameters(create_options
,
3138 drv
->create_options
);
3139 create_options
= append_option_parameters(create_options
,
3140 proto_drv
->create_options
);
3142 /* Create parameter list with default values */
3143 param
= parse_option_parameters("", create_options
, param
);
3145 set_option_parameter_int(param
, BLOCK_OPT_SIZE
, img_size
);
3147 /* Parse -o options */
3149 param
= parse_option_parameters(options
, create_options
, param
);
3150 if (param
== NULL
) {
3151 error_report("Invalid options for file format '%s'.", fmt
);
3157 if (base_filename
) {
3158 if (set_option_parameter(param
, BLOCK_OPT_BACKING_FILE
,
3160 error_report("Backing file not supported for file format '%s'",
3168 if (set_option_parameter(param
, BLOCK_OPT_BACKING_FMT
, base_fmt
)) {
3169 error_report("Backing file format not supported for file "
3170 "format '%s'", fmt
);
3176 backing_file
= get_option_parameter(param
, BLOCK_OPT_BACKING_FILE
);
3177 if (backing_file
&& backing_file
->value
.s
) {
3178 if (!strcmp(filename
, backing_file
->value
.s
)) {
3179 error_report("Error: Trying to create an image with the "
3180 "same filename as the backing file");
3186 backing_fmt
= get_option_parameter(param
, BLOCK_OPT_BACKING_FMT
);
3187 if (backing_fmt
&& backing_fmt
->value
.s
) {
3188 backing_drv
= bdrv_find_format(backing_fmt
->value
.s
);
3190 error_report("Unknown backing file format '%s'",
3191 backing_fmt
->value
.s
);
3197 // The size for the image must always be specified, with one exception:
3198 // If we are using a backing file, we can obtain the size from there
3199 size
= get_option_parameter(param
, BLOCK_OPT_SIZE
);
3200 if (size
&& size
->value
.n
== -1) {
3201 if (backing_file
&& backing_file
->value
.s
) {
3207 ret
= bdrv_open(bs
, backing_file
->value
.s
, flags
, backing_drv
);
3209 error_report("Could not open '%s'", backing_file
->value
.s
);
3212 bdrv_get_geometry(bs
, &size
);
3215 snprintf(buf
, sizeof(buf
), "%" PRId64
, size
);
3216 set_option_parameter(param
, BLOCK_OPT_SIZE
, buf
);
3218 error_report("Image creation needs a size parameter");
3224 printf("Formatting '%s', fmt=%s ", filename
, fmt
);
3225 print_option_parameters(param
);
3228 ret
= bdrv_create(drv
, filename
, param
);
3231 if (ret
== -ENOTSUP
) {
3232 error_report("Formatting or formatting option not supported for "
3233 "file format '%s'", fmt
);
3234 } else if (ret
== -EFBIG
) {
3235 error_report("The image size is too large for file format '%s'",
3238 error_report("%s: error while creating %s: %s", filename
, fmt
,
3244 free_option_parameters(create_options
);
3245 free_option_parameters(param
);