2 * QEMU System Emulator block driver
4 * Copyright (c) 2003 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 #include "config-host.h"
25 #include "qemu-common.h"
28 #include "block_int.h"
31 #include "qemu-coroutine.h"
32 #include "qmp-commands.h"
33 #include "qemu-timer.h"
36 #include <sys/types.h>
38 #include <sys/ioctl.h>
39 #include <sys/queue.h>
49 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
51 static void bdrv_dev_change_media_cb(BlockDriverState
*bs
, bool load
);
52 static BlockDriverAIOCB
*bdrv_aio_readv_em(BlockDriverState
*bs
,
53 int64_t sector_num
, QEMUIOVector
*qiov
, int nb_sectors
,
54 BlockDriverCompletionFunc
*cb
, void *opaque
);
55 static BlockDriverAIOCB
*bdrv_aio_writev_em(BlockDriverState
*bs
,
56 int64_t sector_num
, QEMUIOVector
*qiov
, int nb_sectors
,
57 BlockDriverCompletionFunc
*cb
, void *opaque
);
58 static int coroutine_fn
bdrv_co_readv_em(BlockDriverState
*bs
,
59 int64_t sector_num
, int nb_sectors
,
61 static int coroutine_fn
bdrv_co_writev_em(BlockDriverState
*bs
,
62 int64_t sector_num
, int nb_sectors
,
64 static int coroutine_fn
bdrv_co_do_readv(BlockDriverState
*bs
,
65 int64_t sector_num
, int nb_sectors
, QEMUIOVector
*qiov
);
66 static int coroutine_fn
bdrv_co_do_writev(BlockDriverState
*bs
,
67 int64_t sector_num
, int nb_sectors
, QEMUIOVector
*qiov
);
68 static BlockDriverAIOCB
*bdrv_co_aio_rw_vector(BlockDriverState
*bs
,
72 BlockDriverCompletionFunc
*cb
,
75 static void coroutine_fn
bdrv_co_do_rw(void *opaque
);
77 static bool bdrv_exceed_bps_limits(BlockDriverState
*bs
, int nb_sectors
,
78 bool is_write
, double elapsed_time
, uint64_t *wait
);
79 static bool bdrv_exceed_iops_limits(BlockDriverState
*bs
, bool is_write
,
80 double elapsed_time
, uint64_t *wait
);
81 static bool bdrv_exceed_io_limits(BlockDriverState
*bs
, int nb_sectors
,
82 bool is_write
, int64_t *wait
);
84 static QTAILQ_HEAD(, BlockDriverState
) bdrv_states
=
85 QTAILQ_HEAD_INITIALIZER(bdrv_states
);
87 static QLIST_HEAD(, BlockDriver
) bdrv_drivers
=
88 QLIST_HEAD_INITIALIZER(bdrv_drivers
);
90 /* The device to use for VM snapshots */
91 static BlockDriverState
*bs_snapshots
;
93 /* If non-zero, use only whitelisted block drivers */
94 static int use_bdrv_whitelist
;
97 static int is_windows_drive_prefix(const char *filename
)
99 return (((filename
[0] >= 'a' && filename
[0] <= 'z') ||
100 (filename
[0] >= 'A' && filename
[0] <= 'Z')) &&
104 int is_windows_drive(const char *filename
)
106 if (is_windows_drive_prefix(filename
) &&
109 if (strstart(filename
, "\\\\.\\", NULL
) ||
110 strstart(filename
, "//./", NULL
))
116 /* throttling disk I/O limits */
117 void bdrv_io_limits_disable(BlockDriverState
*bs
)
119 bs
->io_limits_enabled
= false;
121 while (qemu_co_queue_next(&bs
->throttled_reqs
));
123 if (bs
->block_timer
) {
124 qemu_del_timer(bs
->block_timer
);
125 qemu_free_timer(bs
->block_timer
);
126 bs
->block_timer
= NULL
;
132 memset(&bs
->io_base
, 0, sizeof(bs
->io_base
));
135 static void bdrv_block_timer(void *opaque
)
137 BlockDriverState
*bs
= opaque
;
139 qemu_co_queue_next(&bs
->throttled_reqs
);
142 void bdrv_io_limits_enable(BlockDriverState
*bs
)
144 qemu_co_queue_init(&bs
->throttled_reqs
);
145 bs
->block_timer
= qemu_new_timer_ns(vm_clock
, bdrv_block_timer
, bs
);
146 bs
->slice_time
= 5 * BLOCK_IO_SLICE_TIME
;
147 bs
->slice_start
= qemu_get_clock_ns(vm_clock
);
148 bs
->slice_end
= bs
->slice_start
+ bs
->slice_time
;
149 memset(&bs
->io_base
, 0, sizeof(bs
->io_base
));
150 bs
->io_limits_enabled
= true;
153 bool bdrv_io_limits_enabled(BlockDriverState
*bs
)
155 BlockIOLimit
*io_limits
= &bs
->io_limits
;
156 return io_limits
->bps
[BLOCK_IO_LIMIT_READ
]
157 || io_limits
->bps
[BLOCK_IO_LIMIT_WRITE
]
158 || io_limits
->bps
[BLOCK_IO_LIMIT_TOTAL
]
159 || io_limits
->iops
[BLOCK_IO_LIMIT_READ
]
160 || io_limits
->iops
[BLOCK_IO_LIMIT_WRITE
]
161 || io_limits
->iops
[BLOCK_IO_LIMIT_TOTAL
];
164 static void bdrv_io_limits_intercept(BlockDriverState
*bs
,
165 bool is_write
, int nb_sectors
)
167 int64_t wait_time
= -1;
169 if (!qemu_co_queue_empty(&bs
->throttled_reqs
)) {
170 qemu_co_queue_wait(&bs
->throttled_reqs
);
173 /* In fact, we hope to keep each request's timing, in FIFO mode. The next
174 * throttled requests will not be dequeued until the current request is
175 * allowed to be serviced. So if the current request still exceeds the
176 * limits, it will be inserted to the head. All requests followed it will
177 * be still in throttled_reqs queue.
180 while (bdrv_exceed_io_limits(bs
, nb_sectors
, is_write
, &wait_time
)) {
181 qemu_mod_timer(bs
->block_timer
,
182 wait_time
+ qemu_get_clock_ns(vm_clock
));
183 qemu_co_queue_wait_insert_head(&bs
->throttled_reqs
);
186 qemu_co_queue_next(&bs
->throttled_reqs
);
189 /* check if the path starts with "<protocol>:" */
190 static int path_has_protocol(const char *path
)
193 if (is_windows_drive(path
) ||
194 is_windows_drive_prefix(path
)) {
199 return strchr(path
, ':') != NULL
;
202 int path_is_absolute(const char *path
)
206 /* specific case for names like: "\\.\d:" */
207 if (*path
== '/' || *path
== '\\')
210 p
= strchr(path
, ':');
216 return (*p
== '/' || *p
== '\\');
222 /* if filename is absolute, just copy it to dest. Otherwise, build a
223 path to it by considering it is relative to base_path. URL are
225 void path_combine(char *dest
, int dest_size
,
226 const char *base_path
,
227 const char *filename
)
234 if (path_is_absolute(filename
)) {
235 pstrcpy(dest
, dest_size
, filename
);
237 p
= strchr(base_path
, ':');
242 p1
= strrchr(base_path
, '/');
246 p2
= strrchr(base_path
, '\\');
258 if (len
> dest_size
- 1)
260 memcpy(dest
, base_path
, len
);
262 pstrcat(dest
, dest_size
, filename
);
266 void bdrv_register(BlockDriver
*bdrv
)
268 /* Block drivers without coroutine functions need emulation */
269 if (!bdrv
->bdrv_co_readv
) {
270 bdrv
->bdrv_co_readv
= bdrv_co_readv_em
;
271 bdrv
->bdrv_co_writev
= bdrv_co_writev_em
;
273 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
274 * the block driver lacks aio we need to emulate that too.
276 if (!bdrv
->bdrv_aio_readv
) {
277 /* add AIO emulation layer */
278 bdrv
->bdrv_aio_readv
= bdrv_aio_readv_em
;
279 bdrv
->bdrv_aio_writev
= bdrv_aio_writev_em
;
283 QLIST_INSERT_HEAD(&bdrv_drivers
, bdrv
, list
);
286 /* create a new block device (by default it is empty) */
287 BlockDriverState
*bdrv_new(const char *device_name
)
289 BlockDriverState
*bs
;
291 bs
= g_malloc0(sizeof(BlockDriverState
));
292 pstrcpy(bs
->device_name
, sizeof(bs
->device_name
), device_name
);
293 if (device_name
[0] != '\0') {
294 QTAILQ_INSERT_TAIL(&bdrv_states
, bs
, list
);
296 bdrv_iostatus_disable(bs
);
300 BlockDriver
*bdrv_find_format(const char *format_name
)
303 QLIST_FOREACH(drv1
, &bdrv_drivers
, list
) {
304 if (!strcmp(drv1
->format_name
, format_name
)) {
311 static int bdrv_is_whitelisted(BlockDriver
*drv
)
313 static const char *whitelist
[] = {
314 CONFIG_BDRV_WHITELIST
319 return 1; /* no whitelist, anything goes */
321 for (p
= whitelist
; *p
; p
++) {
322 if (!strcmp(drv
->format_name
, *p
)) {
329 BlockDriver
*bdrv_find_whitelisted_format(const char *format_name
)
331 BlockDriver
*drv
= bdrv_find_format(format_name
);
332 return drv
&& bdrv_is_whitelisted(drv
) ? drv
: NULL
;
335 int bdrv_create(BlockDriver
*drv
, const char* filename
,
336 QEMUOptionParameter
*options
)
338 if (!drv
->bdrv_create
)
341 return drv
->bdrv_create(filename
, options
);
344 int bdrv_create_file(const char* filename
, QEMUOptionParameter
*options
)
348 drv
= bdrv_find_protocol(filename
);
353 return bdrv_create(drv
, filename
, options
);
357 void get_tmp_filename(char *filename
, int size
)
359 char temp_dir
[MAX_PATH
];
361 GetTempPath(MAX_PATH
, temp_dir
);
362 GetTempFileName(temp_dir
, "qem", 0, filename
);
365 void get_tmp_filename(char *filename
, int size
)
369 /* XXX: race condition possible */
370 tmpdir
= getenv("TMPDIR");
373 snprintf(filename
, size
, "%s/vl.XXXXXX", tmpdir
);
374 fd
= mkstemp(filename
);
380 * Detect host devices. By convention, /dev/cdrom[N] is always
381 * recognized as a host CDROM.
383 static BlockDriver
*find_hdev_driver(const char *filename
)
385 int score_max
= 0, score
;
386 BlockDriver
*drv
= NULL
, *d
;
388 QLIST_FOREACH(d
, &bdrv_drivers
, list
) {
389 if (d
->bdrv_probe_device
) {
390 score
= d
->bdrv_probe_device(filename
);
391 if (score
> score_max
) {
401 BlockDriver
*bdrv_find_protocol(const char *filename
)
408 /* TODO Drivers without bdrv_file_open must be specified explicitly */
411 * XXX(hch): we really should not let host device detection
412 * override an explicit protocol specification, but moving this
413 * later breaks access to device names with colons in them.
414 * Thanks to the brain-dead persistent naming schemes on udev-
415 * based Linux systems those actually are quite common.
417 drv1
= find_hdev_driver(filename
);
422 if (!path_has_protocol(filename
)) {
423 return bdrv_find_format("file");
425 p
= strchr(filename
, ':');
428 if (len
> sizeof(protocol
) - 1)
429 len
= sizeof(protocol
) - 1;
430 memcpy(protocol
, filename
, len
);
431 protocol
[len
] = '\0';
432 QLIST_FOREACH(drv1
, &bdrv_drivers
, list
) {
433 if (drv1
->protocol_name
&&
434 !strcmp(drv1
->protocol_name
, protocol
)) {
441 static int find_image_format(const char *filename
, BlockDriver
**pdrv
)
443 int ret
, score
, score_max
;
444 BlockDriver
*drv1
, *drv
;
446 BlockDriverState
*bs
;
448 ret
= bdrv_file_open(&bs
, filename
, 0);
454 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
455 if (bs
->sg
|| !bdrv_is_inserted(bs
)) {
457 drv
= bdrv_find_format("raw");
465 ret
= bdrv_pread(bs
, 0, buf
, sizeof(buf
));
474 QLIST_FOREACH(drv1
, &bdrv_drivers
, list
) {
475 if (drv1
->bdrv_probe
) {
476 score
= drv1
->bdrv_probe(buf
, ret
, filename
);
477 if (score
> score_max
) {
491 * Set the current 'total_sectors' value
493 static int refresh_total_sectors(BlockDriverState
*bs
, int64_t hint
)
495 BlockDriver
*drv
= bs
->drv
;
497 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
501 /* query actual device if possible, otherwise just trust the hint */
502 if (drv
->bdrv_getlength
) {
503 int64_t length
= drv
->bdrv_getlength(bs
);
507 hint
= length
>> BDRV_SECTOR_BITS
;
510 bs
->total_sectors
= hint
;
515 * Set open flags for a given cache mode
517 * Return 0 on success, -1 if the cache mode was invalid.
519 int bdrv_parse_cache_flags(const char *mode
, int *flags
)
521 *flags
&= ~BDRV_O_CACHE_MASK
;
523 if (!strcmp(mode
, "off") || !strcmp(mode
, "none")) {
524 *flags
|= BDRV_O_NOCACHE
| BDRV_O_CACHE_WB
;
525 } else if (!strcmp(mode
, "directsync")) {
526 *flags
|= BDRV_O_NOCACHE
;
527 } else if (!strcmp(mode
, "writeback")) {
528 *flags
|= BDRV_O_CACHE_WB
;
529 } else if (!strcmp(mode
, "unsafe")) {
530 *flags
|= BDRV_O_CACHE_WB
;
531 *flags
|= BDRV_O_NO_FLUSH
;
532 } else if (!strcmp(mode
, "writethrough")) {
533 /* this is the default */
542 * The copy-on-read flag is actually a reference count so multiple users may
543 * use the feature without worrying about clobbering its previous state.
544 * Copy-on-read stays enabled until all users have called to disable it.
546 void bdrv_enable_copy_on_read(BlockDriverState
*bs
)
551 void bdrv_disable_copy_on_read(BlockDriverState
*bs
)
553 assert(bs
->copy_on_read
> 0);
558 * Common part for opening disk images and files
560 static int bdrv_open_common(BlockDriverState
*bs
, const char *filename
,
561 int flags
, BlockDriver
*drv
)
567 trace_bdrv_open_common(bs
, filename
, flags
, drv
->format_name
);
570 bs
->total_sectors
= 0;
574 bs
->open_flags
= flags
;
576 bs
->buffer_alignment
= 512;
578 assert(bs
->copy_on_read
== 0); /* bdrv_new() and bdrv_close() make it so */
579 if ((flags
& BDRV_O_RDWR
) && (flags
& BDRV_O_COPY_ON_READ
)) {
580 bdrv_enable_copy_on_read(bs
);
583 pstrcpy(bs
->filename
, sizeof(bs
->filename
), filename
);
584 bs
->backing_file
[0] = '\0';
586 if (use_bdrv_whitelist
&& !bdrv_is_whitelisted(drv
)) {
591 bs
->opaque
= g_malloc0(drv
->instance_size
);
593 bs
->enable_write_cache
= !!(flags
& BDRV_O_CACHE_WB
);
596 * Clear flags that are internal to the block layer before opening the
599 open_flags
= flags
& ~(BDRV_O_SNAPSHOT
| BDRV_O_NO_BACKING
);
602 * Snapshots should be writable.
604 if (bs
->is_temporary
) {
605 open_flags
|= BDRV_O_RDWR
;
608 bs
->keep_read_only
= bs
->read_only
= !(open_flags
& BDRV_O_RDWR
);
610 /* Open the image, either directly or using a protocol */
611 if (drv
->bdrv_file_open
) {
612 ret
= drv
->bdrv_file_open(bs
, filename
, open_flags
);
614 ret
= bdrv_file_open(&bs
->file
, filename
, open_flags
);
616 ret
= drv
->bdrv_open(bs
, open_flags
);
624 ret
= refresh_total_sectors(bs
, bs
->total_sectors
);
630 if (bs
->is_temporary
) {
638 bdrv_delete(bs
->file
);
648 * Opens a file using a protocol (file, host_device, nbd, ...)
650 int bdrv_file_open(BlockDriverState
**pbs
, const char *filename
, int flags
)
652 BlockDriverState
*bs
;
656 drv
= bdrv_find_protocol(filename
);
662 ret
= bdrv_open_common(bs
, filename
, flags
, drv
);
673 * Opens a disk image (raw, qcow2, vmdk, ...)
675 int bdrv_open(BlockDriverState
*bs
, const char *filename
, int flags
,
679 char tmp_filename
[PATH_MAX
];
681 if (flags
& BDRV_O_SNAPSHOT
) {
682 BlockDriverState
*bs1
;
685 BlockDriver
*bdrv_qcow2
;
686 QEMUOptionParameter
*options
;
687 char backing_filename
[PATH_MAX
];
689 /* if snapshot, we create a temporary backing file and open it
690 instead of opening 'filename' directly */
692 /* if there is a backing file, use it */
694 ret
= bdrv_open(bs1
, filename
, 0, drv
);
699 total_size
= bdrv_getlength(bs1
) & BDRV_SECTOR_MASK
;
701 if (bs1
->drv
&& bs1
->drv
->protocol_name
)
706 get_tmp_filename(tmp_filename
, sizeof(tmp_filename
));
708 /* Real path is meaningless for protocols */
710 snprintf(backing_filename
, sizeof(backing_filename
),
712 else if (!realpath(filename
, backing_filename
))
715 bdrv_qcow2
= bdrv_find_format("qcow2");
716 options
= parse_option_parameters("", bdrv_qcow2
->create_options
, NULL
);
718 set_option_parameter_int(options
, BLOCK_OPT_SIZE
, total_size
);
719 set_option_parameter(options
, BLOCK_OPT_BACKING_FILE
, backing_filename
);
721 set_option_parameter(options
, BLOCK_OPT_BACKING_FMT
,
725 ret
= bdrv_create(bdrv_qcow2
, tmp_filename
, options
);
726 free_option_parameters(options
);
731 filename
= tmp_filename
;
733 bs
->is_temporary
= 1;
736 /* Find the right image format driver */
738 ret
= find_image_format(filename
, &drv
);
742 goto unlink_and_fail
;
746 ret
= bdrv_open_common(bs
, filename
, flags
, drv
);
748 goto unlink_and_fail
;
751 /* If there is a backing file, use it */
752 if ((flags
& BDRV_O_NO_BACKING
) == 0 && bs
->backing_file
[0] != '\0') {
753 char backing_filename
[PATH_MAX
];
755 BlockDriver
*back_drv
= NULL
;
757 bs
->backing_hd
= bdrv_new("");
759 if (path_has_protocol(bs
->backing_file
)) {
760 pstrcpy(backing_filename
, sizeof(backing_filename
),
763 path_combine(backing_filename
, sizeof(backing_filename
),
764 filename
, bs
->backing_file
);
767 if (bs
->backing_format
[0] != '\0') {
768 back_drv
= bdrv_find_format(bs
->backing_format
);
771 /* backing files always opened read-only */
773 flags
& ~(BDRV_O_RDWR
| BDRV_O_SNAPSHOT
| BDRV_O_NO_BACKING
);
775 ret
= bdrv_open(bs
->backing_hd
, backing_filename
, back_flags
, back_drv
);
780 if (bs
->is_temporary
) {
781 bs
->backing_hd
->keep_read_only
= !(flags
& BDRV_O_RDWR
);
783 /* base image inherits from "parent" */
784 bs
->backing_hd
->keep_read_only
= bs
->keep_read_only
;
788 if (!bdrv_key_required(bs
)) {
789 bdrv_dev_change_media_cb(bs
, true);
792 /* throttling disk I/O limits */
793 if (bs
->io_limits_enabled
) {
794 bdrv_io_limits_enable(bs
);
800 if (bs
->is_temporary
) {
806 void bdrv_close(BlockDriverState
*bs
)
809 if (bs
== bs_snapshots
) {
812 if (bs
->backing_hd
) {
813 bdrv_delete(bs
->backing_hd
);
814 bs
->backing_hd
= NULL
;
816 bs
->drv
->bdrv_close(bs
);
819 if (bs
->is_temporary
) {
820 unlink(bs
->filename
);
825 bs
->copy_on_read
= 0;
827 if (bs
->file
!= NULL
) {
828 bdrv_close(bs
->file
);
831 bdrv_dev_change_media_cb(bs
, false);
834 /*throttling disk I/O limits*/
835 if (bs
->io_limits_enabled
) {
836 bdrv_io_limits_disable(bs
);
840 void bdrv_close_all(void)
842 BlockDriverState
*bs
;
844 QTAILQ_FOREACH(bs
, &bdrv_states
, list
) {
850 * Wait for pending requests to complete across all BlockDriverStates
852 * This function does not flush data to disk, use bdrv_flush_all() for that
853 * after calling this function.
855 void bdrv_drain_all(void)
857 BlockDriverState
*bs
;
861 /* If requests are still pending there is a bug somewhere */
862 QTAILQ_FOREACH(bs
, &bdrv_states
, list
) {
863 assert(QLIST_EMPTY(&bs
->tracked_requests
));
864 assert(qemu_co_queue_empty(&bs
->throttled_reqs
));
868 /* make a BlockDriverState anonymous by removing from bdrv_state list.
869 Also, NULL terminate the device_name to prevent double remove */
870 void bdrv_make_anon(BlockDriverState
*bs
)
872 if (bs
->device_name
[0] != '\0') {
873 QTAILQ_REMOVE(&bdrv_states
, bs
, list
);
875 bs
->device_name
[0] = '\0';
878 void bdrv_delete(BlockDriverState
*bs
)
882 /* remove from list, if necessary */
886 if (bs
->file
!= NULL
) {
887 bdrv_delete(bs
->file
);
890 assert(bs
!= bs_snapshots
);
894 int bdrv_attach_dev(BlockDriverState
*bs
, void *dev
)
895 /* TODO change to DeviceState *dev when all users are qdevified */
901 bdrv_iostatus_reset(bs
);
905 /* TODO qdevified devices don't use this, remove when devices are qdevified */
906 void bdrv_attach_dev_nofail(BlockDriverState
*bs
, void *dev
)
908 if (bdrv_attach_dev(bs
, dev
) < 0) {
913 void bdrv_detach_dev(BlockDriverState
*bs
, void *dev
)
914 /* TODO change to DeviceState *dev when all users are qdevified */
916 assert(bs
->dev
== dev
);
919 bs
->dev_opaque
= NULL
;
920 bs
->buffer_alignment
= 512;
923 /* TODO change to return DeviceState * when all users are qdevified */
924 void *bdrv_get_attached_dev(BlockDriverState
*bs
)
929 void bdrv_set_dev_ops(BlockDriverState
*bs
, const BlockDevOps
*ops
,
933 bs
->dev_opaque
= opaque
;
934 if (bdrv_dev_has_removable_media(bs
) && bs
== bs_snapshots
) {
939 static void bdrv_dev_change_media_cb(BlockDriverState
*bs
, bool load
)
941 if (bs
->dev_ops
&& bs
->dev_ops
->change_media_cb
) {
942 bs
->dev_ops
->change_media_cb(bs
->dev_opaque
, load
);
946 bool bdrv_dev_has_removable_media(BlockDriverState
*bs
)
948 return !bs
->dev
|| (bs
->dev_ops
&& bs
->dev_ops
->change_media_cb
);
951 void bdrv_dev_eject_request(BlockDriverState
*bs
, bool force
)
953 if (bs
->dev_ops
&& bs
->dev_ops
->eject_request_cb
) {
954 bs
->dev_ops
->eject_request_cb(bs
->dev_opaque
, force
);
958 bool bdrv_dev_is_tray_open(BlockDriverState
*bs
)
960 if (bs
->dev_ops
&& bs
->dev_ops
->is_tray_open
) {
961 return bs
->dev_ops
->is_tray_open(bs
->dev_opaque
);
966 static void bdrv_dev_resize_cb(BlockDriverState
*bs
)
968 if (bs
->dev_ops
&& bs
->dev_ops
->resize_cb
) {
969 bs
->dev_ops
->resize_cb(bs
->dev_opaque
);
973 bool bdrv_dev_is_medium_locked(BlockDriverState
*bs
)
975 if (bs
->dev_ops
&& bs
->dev_ops
->is_medium_locked
) {
976 return bs
->dev_ops
->is_medium_locked(bs
->dev_opaque
);
982 * Run consistency checks on an image
984 * Returns 0 if the check could be completed (it doesn't mean that the image is
985 * free of errors) or -errno when an internal error occurred. The results of the
986 * check are stored in res.
988 int bdrv_check(BlockDriverState
*bs
, BdrvCheckResult
*res
)
990 if (bs
->drv
->bdrv_check
== NULL
) {
994 memset(res
, 0, sizeof(*res
));
995 return bs
->drv
->bdrv_check(bs
, res
);
998 #define COMMIT_BUF_SECTORS 2048
1000 /* commit COW file into the raw image */
1001 int bdrv_commit(BlockDriverState
*bs
)
1003 BlockDriver
*drv
= bs
->drv
;
1004 BlockDriver
*backing_drv
;
1005 int64_t sector
, total_sectors
;
1006 int n
, ro
, open_flags
;
1007 int ret
= 0, rw_ret
= 0;
1009 char filename
[1024];
1010 BlockDriverState
*bs_rw
, *bs_ro
;
1015 if (!bs
->backing_hd
) {
1019 if (bs
->backing_hd
->keep_read_only
) {
1023 backing_drv
= bs
->backing_hd
->drv
;
1024 ro
= bs
->backing_hd
->read_only
;
1025 strncpy(filename
, bs
->backing_hd
->filename
, sizeof(filename
));
1026 open_flags
= bs
->backing_hd
->open_flags
;
1030 bdrv_delete(bs
->backing_hd
);
1031 bs
->backing_hd
= NULL
;
1032 bs_rw
= bdrv_new("");
1033 rw_ret
= bdrv_open(bs_rw
, filename
, open_flags
| BDRV_O_RDWR
,
1037 /* try to re-open read-only */
1038 bs_ro
= bdrv_new("");
1039 ret
= bdrv_open(bs_ro
, filename
, open_flags
& ~BDRV_O_RDWR
,
1043 /* drive not functional anymore */
1047 bs
->backing_hd
= bs_ro
;
1050 bs
->backing_hd
= bs_rw
;
1053 total_sectors
= bdrv_getlength(bs
) >> BDRV_SECTOR_BITS
;
1054 buf
= g_malloc(COMMIT_BUF_SECTORS
* BDRV_SECTOR_SIZE
);
1056 for (sector
= 0; sector
< total_sectors
; sector
+= n
) {
1057 if (bdrv_is_allocated(bs
, sector
, COMMIT_BUF_SECTORS
, &n
)) {
1059 if (bdrv_read(bs
, sector
, buf
, n
) != 0) {
1064 if (bdrv_write(bs
->backing_hd
, sector
, buf
, n
) != 0) {
1071 if (drv
->bdrv_make_empty
) {
1072 ret
= drv
->bdrv_make_empty(bs
);
1077 * Make sure all data we wrote to the backing device is actually
1081 bdrv_flush(bs
->backing_hd
);
1088 bdrv_delete(bs
->backing_hd
);
1089 bs
->backing_hd
= NULL
;
1090 bs_ro
= bdrv_new("");
1091 ret
= bdrv_open(bs_ro
, filename
, open_flags
& ~BDRV_O_RDWR
,
1095 /* drive not functional anymore */
1099 bs
->backing_hd
= bs_ro
;
1100 bs
->backing_hd
->keep_read_only
= 0;
1106 void bdrv_commit_all(void)
1108 BlockDriverState
*bs
;
1110 QTAILQ_FOREACH(bs
, &bdrv_states
, list
) {
1115 struct BdrvTrackedRequest
{
1116 BlockDriverState
*bs
;
1120 QLIST_ENTRY(BdrvTrackedRequest
) list
;
1121 Coroutine
*co
; /* owner, used for deadlock detection */
1122 CoQueue wait_queue
; /* coroutines blocked on this request */
1126 * Remove an active request from the tracked requests list
1128 * This function should be called when a tracked request is completing.
1130 static void tracked_request_end(BdrvTrackedRequest
*req
)
1132 QLIST_REMOVE(req
, list
);
1133 qemu_co_queue_restart_all(&req
->wait_queue
);
1137 * Add an active request to the tracked requests list
1139 static void tracked_request_begin(BdrvTrackedRequest
*req
,
1140 BlockDriverState
*bs
,
1142 int nb_sectors
, bool is_write
)
1144 *req
= (BdrvTrackedRequest
){
1146 .sector_num
= sector_num
,
1147 .nb_sectors
= nb_sectors
,
1148 .is_write
= is_write
,
1149 .co
= qemu_coroutine_self(),
1152 qemu_co_queue_init(&req
->wait_queue
);
1154 QLIST_INSERT_HEAD(&bs
->tracked_requests
, req
, list
);
1158 * Round a region to cluster boundaries
1160 static void round_to_clusters(BlockDriverState
*bs
,
1161 int64_t sector_num
, int nb_sectors
,
1162 int64_t *cluster_sector_num
,
1163 int *cluster_nb_sectors
)
1165 BlockDriverInfo bdi
;
1167 if (bdrv_get_info(bs
, &bdi
) < 0 || bdi
.cluster_size
== 0) {
1168 *cluster_sector_num
= sector_num
;
1169 *cluster_nb_sectors
= nb_sectors
;
1171 int64_t c
= bdi
.cluster_size
/ BDRV_SECTOR_SIZE
;
1172 *cluster_sector_num
= QEMU_ALIGN_DOWN(sector_num
, c
);
1173 *cluster_nb_sectors
= QEMU_ALIGN_UP(sector_num
- *cluster_sector_num
+
1178 static bool tracked_request_overlaps(BdrvTrackedRequest
*req
,
1179 int64_t sector_num
, int nb_sectors
) {
1181 if (sector_num
>= req
->sector_num
+ req
->nb_sectors
) {
1185 if (req
->sector_num
>= sector_num
+ nb_sectors
) {
1191 static void coroutine_fn
wait_for_overlapping_requests(BlockDriverState
*bs
,
1192 int64_t sector_num
, int nb_sectors
)
1194 BdrvTrackedRequest
*req
;
1195 int64_t cluster_sector_num
;
1196 int cluster_nb_sectors
;
1199 /* If we touch the same cluster it counts as an overlap. This guarantees
1200 * that allocating writes will be serialized and not race with each other
1201 * for the same cluster. For example, in copy-on-read it ensures that the
1202 * CoR read and write operations are atomic and guest writes cannot
1203 * interleave between them.
1205 round_to_clusters(bs
, sector_num
, nb_sectors
,
1206 &cluster_sector_num
, &cluster_nb_sectors
);
1210 QLIST_FOREACH(req
, &bs
->tracked_requests
, list
) {
1211 if (tracked_request_overlaps(req
, cluster_sector_num
,
1212 cluster_nb_sectors
)) {
1213 /* Hitting this means there was a reentrant request, for
1214 * example, a block driver issuing nested requests. This must
1215 * never happen since it means deadlock.
1217 assert(qemu_coroutine_self() != req
->co
);
1219 qemu_co_queue_wait(&req
->wait_queue
);
1230 * -EINVAL - backing format specified, but no file
1231 * -ENOSPC - can't update the backing file because no space is left in the
1233 * -ENOTSUP - format driver doesn't support changing the backing file
1235 int bdrv_change_backing_file(BlockDriverState
*bs
,
1236 const char *backing_file
, const char *backing_fmt
)
1238 BlockDriver
*drv
= bs
->drv
;
1240 if (drv
->bdrv_change_backing_file
!= NULL
) {
1241 return drv
->bdrv_change_backing_file(bs
, backing_file
, backing_fmt
);
1247 static int bdrv_check_byte_request(BlockDriverState
*bs
, int64_t offset
,
1252 if (!bdrv_is_inserted(bs
))
1258 len
= bdrv_getlength(bs
);
1263 if ((offset
> len
) || (len
- offset
< size
))
1269 static int bdrv_check_request(BlockDriverState
*bs
, int64_t sector_num
,
1272 return bdrv_check_byte_request(bs
, sector_num
* BDRV_SECTOR_SIZE
,
1273 nb_sectors
* BDRV_SECTOR_SIZE
);
1276 typedef struct RwCo
{
1277 BlockDriverState
*bs
;
1285 static void coroutine_fn
bdrv_rw_co_entry(void *opaque
)
1287 RwCo
*rwco
= opaque
;
1289 if (!rwco
->is_write
) {
1290 rwco
->ret
= bdrv_co_do_readv(rwco
->bs
, rwco
->sector_num
,
1291 rwco
->nb_sectors
, rwco
->qiov
);
1293 rwco
->ret
= bdrv_co_do_writev(rwco
->bs
, rwco
->sector_num
,
1294 rwco
->nb_sectors
, rwco
->qiov
);
1299 * Process a synchronous request using coroutines
1301 static int bdrv_rw_co(BlockDriverState
*bs
, int64_t sector_num
, uint8_t *buf
,
1302 int nb_sectors
, bool is_write
)
1305 struct iovec iov
= {
1306 .iov_base
= (void *)buf
,
1307 .iov_len
= nb_sectors
* BDRV_SECTOR_SIZE
,
1312 .sector_num
= sector_num
,
1313 .nb_sectors
= nb_sectors
,
1315 .is_write
= is_write
,
1319 qemu_iovec_init_external(&qiov
, &iov
, 1);
1321 if (qemu_in_coroutine()) {
1322 /* Fast-path if already in coroutine context */
1323 bdrv_rw_co_entry(&rwco
);
1325 co
= qemu_coroutine_create(bdrv_rw_co_entry
);
1326 qemu_coroutine_enter(co
, &rwco
);
1327 while (rwco
.ret
== NOT_DONE
) {
1334 /* return < 0 if error. See bdrv_write() for the return codes */
1335 int bdrv_read(BlockDriverState
*bs
, int64_t sector_num
,
1336 uint8_t *buf
, int nb_sectors
)
1338 return bdrv_rw_co(bs
, sector_num
, buf
, nb_sectors
, false);
1341 static void set_dirty_bitmap(BlockDriverState
*bs
, int64_t sector_num
,
1342 int nb_sectors
, int dirty
)
1345 unsigned long val
, idx
, bit
;
1347 start
= sector_num
/ BDRV_SECTORS_PER_DIRTY_CHUNK
;
1348 end
= (sector_num
+ nb_sectors
- 1) / BDRV_SECTORS_PER_DIRTY_CHUNK
;
1350 for (; start
<= end
; start
++) {
1351 idx
= start
/ (sizeof(unsigned long) * 8);
1352 bit
= start
% (sizeof(unsigned long) * 8);
1353 val
= bs
->dirty_bitmap
[idx
];
1355 if (!(val
& (1UL << bit
))) {
1360 if (val
& (1UL << bit
)) {
1362 val
&= ~(1UL << bit
);
1365 bs
->dirty_bitmap
[idx
] = val
;
1369 /* Return < 0 if error. Important errors are:
1370 -EIO generic I/O error (may happen for all errors)
1371 -ENOMEDIUM No media inserted.
1372 -EINVAL Invalid sector number or nb_sectors
1373 -EACCES Trying to write a read-only device
1375 int bdrv_write(BlockDriverState
*bs
, int64_t sector_num
,
1376 const uint8_t *buf
, int nb_sectors
)
1378 return bdrv_rw_co(bs
, sector_num
, (uint8_t *)buf
, nb_sectors
, true);
1381 int bdrv_pread(BlockDriverState
*bs
, int64_t offset
,
1382 void *buf
, int count1
)
1384 uint8_t tmp_buf
[BDRV_SECTOR_SIZE
];
1385 int len
, nb_sectors
, count
;
1390 /* first read to align to sector start */
1391 len
= (BDRV_SECTOR_SIZE
- offset
) & (BDRV_SECTOR_SIZE
- 1);
1394 sector_num
= offset
>> BDRV_SECTOR_BITS
;
1396 if ((ret
= bdrv_read(bs
, sector_num
, tmp_buf
, 1)) < 0)
1398 memcpy(buf
, tmp_buf
+ (offset
& (BDRV_SECTOR_SIZE
- 1)), len
);
1406 /* read the sectors "in place" */
1407 nb_sectors
= count
>> BDRV_SECTOR_BITS
;
1408 if (nb_sectors
> 0) {
1409 if ((ret
= bdrv_read(bs
, sector_num
, buf
, nb_sectors
)) < 0)
1411 sector_num
+= nb_sectors
;
1412 len
= nb_sectors
<< BDRV_SECTOR_BITS
;
1417 /* add data from the last sector */
1419 if ((ret
= bdrv_read(bs
, sector_num
, tmp_buf
, 1)) < 0)
1421 memcpy(buf
, tmp_buf
, count
);
1426 int bdrv_pwrite(BlockDriverState
*bs
, int64_t offset
,
1427 const void *buf
, int count1
)
1429 uint8_t tmp_buf
[BDRV_SECTOR_SIZE
];
1430 int len
, nb_sectors
, count
;
1435 /* first write to align to sector start */
1436 len
= (BDRV_SECTOR_SIZE
- offset
) & (BDRV_SECTOR_SIZE
- 1);
1439 sector_num
= offset
>> BDRV_SECTOR_BITS
;
1441 if ((ret
= bdrv_read(bs
, sector_num
, tmp_buf
, 1)) < 0)
1443 memcpy(tmp_buf
+ (offset
& (BDRV_SECTOR_SIZE
- 1)), buf
, len
);
1444 if ((ret
= bdrv_write(bs
, sector_num
, tmp_buf
, 1)) < 0)
1453 /* write the sectors "in place" */
1454 nb_sectors
= count
>> BDRV_SECTOR_BITS
;
1455 if (nb_sectors
> 0) {
1456 if ((ret
= bdrv_write(bs
, sector_num
, buf
, nb_sectors
)) < 0)
1458 sector_num
+= nb_sectors
;
1459 len
= nb_sectors
<< BDRV_SECTOR_BITS
;
1464 /* add data from the last sector */
1466 if ((ret
= bdrv_read(bs
, sector_num
, tmp_buf
, 1)) < 0)
1468 memcpy(tmp_buf
, buf
, count
);
1469 if ((ret
= bdrv_write(bs
, sector_num
, tmp_buf
, 1)) < 0)
1476 * Writes to the file and ensures that no writes are reordered across this
1477 * request (acts as a barrier)
1479 * Returns 0 on success, -errno in error cases.
1481 int bdrv_pwrite_sync(BlockDriverState
*bs
, int64_t offset
,
1482 const void *buf
, int count
)
1486 ret
= bdrv_pwrite(bs
, offset
, buf
, count
);
1491 /* No flush needed for cache modes that use O_DSYNC */
1492 if ((bs
->open_flags
& BDRV_O_CACHE_WB
) != 0) {
1499 static int coroutine_fn
bdrv_co_copy_on_readv(BlockDriverState
*bs
,
1500 int64_t sector_num
, int nb_sectors
, QEMUIOVector
*qiov
)
1502 /* Perform I/O through a temporary buffer so that users who scribble over
1503 * their read buffer while the operation is in progress do not end up
1504 * modifying the image file. This is critical for zero-copy guest I/O
1505 * where anything might happen inside guest memory.
1507 void *bounce_buffer
;
1510 QEMUIOVector bounce_qiov
;
1511 int64_t cluster_sector_num
;
1512 int cluster_nb_sectors
;
1516 /* Cover entire cluster so no additional backing file I/O is required when
1517 * allocating cluster in the image file.
1519 round_to_clusters(bs
, sector_num
, nb_sectors
,
1520 &cluster_sector_num
, &cluster_nb_sectors
);
1522 trace_bdrv_co_copy_on_readv(bs
, sector_num
, nb_sectors
,
1523 cluster_sector_num
, cluster_nb_sectors
);
1525 iov
.iov_len
= cluster_nb_sectors
* BDRV_SECTOR_SIZE
;
1526 iov
.iov_base
= bounce_buffer
= qemu_blockalign(bs
, iov
.iov_len
);
1527 qemu_iovec_init_external(&bounce_qiov
, &iov
, 1);
1529 ret
= bs
->drv
->bdrv_co_readv(bs
, cluster_sector_num
, cluster_nb_sectors
,
1535 ret
= bs
->drv
->bdrv_co_writev(bs
, cluster_sector_num
, cluster_nb_sectors
,
1538 /* It might be okay to ignore write errors for guest requests. If this
1539 * is a deliberate copy-on-read then we don't want to ignore the error.
1540 * Simply report it in all cases.
1545 skip_bytes
= (sector_num
- cluster_sector_num
) * BDRV_SECTOR_SIZE
;
1546 qemu_iovec_from_buffer(qiov
, bounce_buffer
+ skip_bytes
,
1547 nb_sectors
* BDRV_SECTOR_SIZE
);
1550 qemu_vfree(bounce_buffer
);
1555 * Handle a read request in coroutine context
1557 static int coroutine_fn
bdrv_co_do_readv(BlockDriverState
*bs
,
1558 int64_t sector_num
, int nb_sectors
, QEMUIOVector
*qiov
)
1560 BlockDriver
*drv
= bs
->drv
;
1561 BdrvTrackedRequest req
;
1567 if (bdrv_check_request(bs
, sector_num
, nb_sectors
)) {
1571 /* throttling disk read I/O */
1572 if (bs
->io_limits_enabled
) {
1573 bdrv_io_limits_intercept(bs
, false, nb_sectors
);
1576 if (bs
->copy_on_read
) {
1577 wait_for_overlapping_requests(bs
, sector_num
, nb_sectors
);
1580 tracked_request_begin(&req
, bs
, sector_num
, nb_sectors
, false);
1582 if (bs
->copy_on_read
) {
1585 ret
= bdrv_co_is_allocated(bs
, sector_num
, nb_sectors
, &pnum
);
1590 if (!ret
|| pnum
!= nb_sectors
) {
1591 ret
= bdrv_co_copy_on_readv(bs
, sector_num
, nb_sectors
, qiov
);
1596 ret
= drv
->bdrv_co_readv(bs
, sector_num
, nb_sectors
, qiov
);
1599 tracked_request_end(&req
);
1603 int coroutine_fn
bdrv_co_readv(BlockDriverState
*bs
, int64_t sector_num
,
1604 int nb_sectors
, QEMUIOVector
*qiov
)
1606 trace_bdrv_co_readv(bs
, sector_num
, nb_sectors
);
1608 return bdrv_co_do_readv(bs
, sector_num
, nb_sectors
, qiov
);
1612 * Handle a write request in coroutine context
1614 static int coroutine_fn
bdrv_co_do_writev(BlockDriverState
*bs
,
1615 int64_t sector_num
, int nb_sectors
, QEMUIOVector
*qiov
)
1617 BlockDriver
*drv
= bs
->drv
;
1618 BdrvTrackedRequest req
;
1624 if (bs
->read_only
) {
1627 if (bdrv_check_request(bs
, sector_num
, nb_sectors
)) {
1631 /* throttling disk write I/O */
1632 if (bs
->io_limits_enabled
) {
1633 bdrv_io_limits_intercept(bs
, true, nb_sectors
);
1636 if (bs
->copy_on_read
) {
1637 wait_for_overlapping_requests(bs
, sector_num
, nb_sectors
);
1640 tracked_request_begin(&req
, bs
, sector_num
, nb_sectors
, true);
1642 ret
= drv
->bdrv_co_writev(bs
, sector_num
, nb_sectors
, qiov
);
1644 if (bs
->dirty_bitmap
) {
1645 set_dirty_bitmap(bs
, sector_num
, nb_sectors
, 1);
1648 if (bs
->wr_highest_sector
< sector_num
+ nb_sectors
- 1) {
1649 bs
->wr_highest_sector
= sector_num
+ nb_sectors
- 1;
1652 tracked_request_end(&req
);
1657 int coroutine_fn
bdrv_co_writev(BlockDriverState
*bs
, int64_t sector_num
,
1658 int nb_sectors
, QEMUIOVector
*qiov
)
1660 trace_bdrv_co_writev(bs
, sector_num
, nb_sectors
);
1662 return bdrv_co_do_writev(bs
, sector_num
, nb_sectors
, qiov
);
1666 * Truncate file to 'offset' bytes (needed only for file protocols)
1668 int bdrv_truncate(BlockDriverState
*bs
, int64_t offset
)
1670 BlockDriver
*drv
= bs
->drv
;
1674 if (!drv
->bdrv_truncate
)
1678 if (bdrv_in_use(bs
))
1680 ret
= drv
->bdrv_truncate(bs
, offset
);
1682 ret
= refresh_total_sectors(bs
, offset
>> BDRV_SECTOR_BITS
);
1683 bdrv_dev_resize_cb(bs
);
1689 * Length of a allocated file in bytes. Sparse files are counted by actual
1690 * allocated space. Return < 0 if error or unknown.
1692 int64_t bdrv_get_allocated_file_size(BlockDriverState
*bs
)
1694 BlockDriver
*drv
= bs
->drv
;
1698 if (drv
->bdrv_get_allocated_file_size
) {
1699 return drv
->bdrv_get_allocated_file_size(bs
);
1702 return bdrv_get_allocated_file_size(bs
->file
);
1708 * Length of a file in bytes. Return < 0 if error or unknown.
1710 int64_t bdrv_getlength(BlockDriverState
*bs
)
1712 BlockDriver
*drv
= bs
->drv
;
1716 if (bs
->growable
|| bdrv_dev_has_removable_media(bs
)) {
1717 if (drv
->bdrv_getlength
) {
1718 return drv
->bdrv_getlength(bs
);
1721 return bs
->total_sectors
* BDRV_SECTOR_SIZE
;
1724 /* return 0 as number of sectors if no device present or error */
1725 void bdrv_get_geometry(BlockDriverState
*bs
, uint64_t *nb_sectors_ptr
)
1728 length
= bdrv_getlength(bs
);
1732 length
= length
>> BDRV_SECTOR_BITS
;
1733 *nb_sectors_ptr
= length
;
1737 uint8_t boot_ind
; /* 0x80 - active */
1738 uint8_t head
; /* starting head */
1739 uint8_t sector
; /* starting sector */
1740 uint8_t cyl
; /* starting cylinder */
1741 uint8_t sys_ind
; /* What partition type */
1742 uint8_t end_head
; /* end head */
1743 uint8_t end_sector
; /* end sector */
1744 uint8_t end_cyl
; /* end cylinder */
1745 uint32_t start_sect
; /* starting sector counting from 0 */
1746 uint32_t nr_sects
; /* nr of sectors in partition */
1749 /* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
1750 static int guess_disk_lchs(BlockDriverState
*bs
,
1751 int *pcylinders
, int *pheads
, int *psectors
)
1753 uint8_t buf
[BDRV_SECTOR_SIZE
];
1754 int ret
, i
, heads
, sectors
, cylinders
;
1755 struct partition
*p
;
1757 uint64_t nb_sectors
;
1759 bdrv_get_geometry(bs
, &nb_sectors
);
1761 ret
= bdrv_read(bs
, 0, buf
, 1);
1764 /* test msdos magic */
1765 if (buf
[510] != 0x55 || buf
[511] != 0xaa)
1767 for(i
= 0; i
< 4; i
++) {
1768 p
= ((struct partition
*)(buf
+ 0x1be)) + i
;
1769 nr_sects
= le32_to_cpu(p
->nr_sects
);
1770 if (nr_sects
&& p
->end_head
) {
1771 /* We make the assumption that the partition terminates on
1772 a cylinder boundary */
1773 heads
= p
->end_head
+ 1;
1774 sectors
= p
->end_sector
& 63;
1777 cylinders
= nb_sectors
/ (heads
* sectors
);
1778 if (cylinders
< 1 || cylinders
> 16383)
1781 *psectors
= sectors
;
1782 *pcylinders
= cylinders
;
1784 printf("guessed geometry: LCHS=%d %d %d\n",
1785 cylinders
, heads
, sectors
);
1793 void bdrv_guess_geometry(BlockDriverState
*bs
, int *pcyls
, int *pheads
, int *psecs
)
1795 int translation
, lba_detected
= 0;
1796 int cylinders
, heads
, secs
;
1797 uint64_t nb_sectors
;
1799 /* if a geometry hint is available, use it */
1800 bdrv_get_geometry(bs
, &nb_sectors
);
1801 bdrv_get_geometry_hint(bs
, &cylinders
, &heads
, &secs
);
1802 translation
= bdrv_get_translation_hint(bs
);
1803 if (cylinders
!= 0) {
1808 if (guess_disk_lchs(bs
, &cylinders
, &heads
, &secs
) == 0) {
1810 /* if heads > 16, it means that a BIOS LBA
1811 translation was active, so the default
1812 hardware geometry is OK */
1814 goto default_geometry
;
1819 /* disable any translation to be in sync with
1820 the logical geometry */
1821 if (translation
== BIOS_ATA_TRANSLATION_AUTO
) {
1822 bdrv_set_translation_hint(bs
,
1823 BIOS_ATA_TRANSLATION_NONE
);
1828 /* if no geometry, use a standard physical disk geometry */
1829 cylinders
= nb_sectors
/ (16 * 63);
1831 if (cylinders
> 16383)
1833 else if (cylinders
< 2)
1838 if ((lba_detected
== 1) && (translation
== BIOS_ATA_TRANSLATION_AUTO
)) {
1839 if ((*pcyls
* *pheads
) <= 131072) {
1840 bdrv_set_translation_hint(bs
,
1841 BIOS_ATA_TRANSLATION_LARGE
);
1843 bdrv_set_translation_hint(bs
,
1844 BIOS_ATA_TRANSLATION_LBA
);
1848 bdrv_set_geometry_hint(bs
, *pcyls
, *pheads
, *psecs
);
1852 void bdrv_set_geometry_hint(BlockDriverState
*bs
,
1853 int cyls
, int heads
, int secs
)
1860 void bdrv_set_translation_hint(BlockDriverState
*bs
, int translation
)
1862 bs
->translation
= translation
;
1865 void bdrv_get_geometry_hint(BlockDriverState
*bs
,
1866 int *pcyls
, int *pheads
, int *psecs
)
1869 *pheads
= bs
->heads
;
1873 /* throttling disk io limits */
1874 void bdrv_set_io_limits(BlockDriverState
*bs
,
1875 BlockIOLimit
*io_limits
)
1877 bs
->io_limits
= *io_limits
;
1878 bs
->io_limits_enabled
= bdrv_io_limits_enabled(bs
);
1881 /* Recognize floppy formats */
1882 typedef struct FDFormat
{
1889 static const FDFormat fd_formats
[] = {
1890 /* First entry is default format */
1891 /* 1.44 MB 3"1/2 floppy disks */
1892 { FDRIVE_DRV_144
, 18, 80, 1, },
1893 { FDRIVE_DRV_144
, 20, 80, 1, },
1894 { FDRIVE_DRV_144
, 21, 80, 1, },
1895 { FDRIVE_DRV_144
, 21, 82, 1, },
1896 { FDRIVE_DRV_144
, 21, 83, 1, },
1897 { FDRIVE_DRV_144
, 22, 80, 1, },
1898 { FDRIVE_DRV_144
, 23, 80, 1, },
1899 { FDRIVE_DRV_144
, 24, 80, 1, },
1900 /* 2.88 MB 3"1/2 floppy disks */
1901 { FDRIVE_DRV_288
, 36, 80, 1, },
1902 { FDRIVE_DRV_288
, 39, 80, 1, },
1903 { FDRIVE_DRV_288
, 40, 80, 1, },
1904 { FDRIVE_DRV_288
, 44, 80, 1, },
1905 { FDRIVE_DRV_288
, 48, 80, 1, },
1906 /* 720 kB 3"1/2 floppy disks */
1907 { FDRIVE_DRV_144
, 9, 80, 1, },
1908 { FDRIVE_DRV_144
, 10, 80, 1, },
1909 { FDRIVE_DRV_144
, 10, 82, 1, },
1910 { FDRIVE_DRV_144
, 10, 83, 1, },
1911 { FDRIVE_DRV_144
, 13, 80, 1, },
1912 { FDRIVE_DRV_144
, 14, 80, 1, },
1913 /* 1.2 MB 5"1/4 floppy disks */
1914 { FDRIVE_DRV_120
, 15, 80, 1, },
1915 { FDRIVE_DRV_120
, 18, 80, 1, },
1916 { FDRIVE_DRV_120
, 18, 82, 1, },
1917 { FDRIVE_DRV_120
, 18, 83, 1, },
1918 { FDRIVE_DRV_120
, 20, 80, 1, },
1919 /* 720 kB 5"1/4 floppy disks */
1920 { FDRIVE_DRV_120
, 9, 80, 1, },
1921 { FDRIVE_DRV_120
, 11, 80, 1, },
1922 /* 360 kB 5"1/4 floppy disks */
1923 { FDRIVE_DRV_120
, 9, 40, 1, },
1924 { FDRIVE_DRV_120
, 9, 40, 0, },
1925 { FDRIVE_DRV_120
, 10, 41, 1, },
1926 { FDRIVE_DRV_120
, 10, 42, 1, },
1927 /* 320 kB 5"1/4 floppy disks */
1928 { FDRIVE_DRV_120
, 8, 40, 1, },
1929 { FDRIVE_DRV_120
, 8, 40, 0, },
1930 /* 360 kB must match 5"1/4 better than 3"1/2... */
1931 { FDRIVE_DRV_144
, 9, 80, 0, },
1933 { FDRIVE_DRV_NONE
, -1, -1, 0, },
1936 void bdrv_get_floppy_geometry_hint(BlockDriverState
*bs
, int *nb_heads
,
1937 int *max_track
, int *last_sect
,
1938 FDriveType drive_in
, FDriveType
*drive
)
1940 const FDFormat
*parse
;
1941 uint64_t nb_sectors
, size
;
1942 int i
, first_match
, match
;
1944 bdrv_get_geometry_hint(bs
, nb_heads
, max_track
, last_sect
);
1945 if (*nb_heads
!= 0 && *max_track
!= 0 && *last_sect
!= 0) {
1946 /* User defined disk */
1948 bdrv_get_geometry(bs
, &nb_sectors
);
1951 for (i
= 0; ; i
++) {
1952 parse
= &fd_formats
[i
];
1953 if (parse
->drive
== FDRIVE_DRV_NONE
) {
1956 if (drive_in
== parse
->drive
||
1957 drive_in
== FDRIVE_DRV_NONE
) {
1958 size
= (parse
->max_head
+ 1) * parse
->max_track
*
1960 if (nb_sectors
== size
) {
1964 if (first_match
== -1) {
1970 if (first_match
== -1) {
1973 match
= first_match
;
1975 parse
= &fd_formats
[match
];
1977 *nb_heads
= parse
->max_head
+ 1;
1978 *max_track
= parse
->max_track
;
1979 *last_sect
= parse
->last_sect
;
1980 *drive
= parse
->drive
;
1984 int bdrv_get_translation_hint(BlockDriverState
*bs
)
1986 return bs
->translation
;
1989 void bdrv_set_on_error(BlockDriverState
*bs
, BlockErrorAction on_read_error
,
1990 BlockErrorAction on_write_error
)
1992 bs
->on_read_error
= on_read_error
;
1993 bs
->on_write_error
= on_write_error
;
1996 BlockErrorAction
bdrv_get_on_error(BlockDriverState
*bs
, int is_read
)
1998 return is_read
? bs
->on_read_error
: bs
->on_write_error
;
2001 int bdrv_is_read_only(BlockDriverState
*bs
)
2003 return bs
->read_only
;
2006 int bdrv_is_sg(BlockDriverState
*bs
)
2011 int bdrv_enable_write_cache(BlockDriverState
*bs
)
2013 return bs
->enable_write_cache
;
2016 int bdrv_is_encrypted(BlockDriverState
*bs
)
2018 if (bs
->backing_hd
&& bs
->backing_hd
->encrypted
)
2020 return bs
->encrypted
;
2023 int bdrv_key_required(BlockDriverState
*bs
)
2025 BlockDriverState
*backing_hd
= bs
->backing_hd
;
2027 if (backing_hd
&& backing_hd
->encrypted
&& !backing_hd
->valid_key
)
2029 return (bs
->encrypted
&& !bs
->valid_key
);
2032 int bdrv_set_key(BlockDriverState
*bs
, const char *key
)
2035 if (bs
->backing_hd
&& bs
->backing_hd
->encrypted
) {
2036 ret
= bdrv_set_key(bs
->backing_hd
, key
);
2042 if (!bs
->encrypted
) {
2044 } else if (!bs
->drv
|| !bs
->drv
->bdrv_set_key
) {
2047 ret
= bs
->drv
->bdrv_set_key(bs
, key
);
2050 } else if (!bs
->valid_key
) {
2052 /* call the change callback now, we skipped it on open */
2053 bdrv_dev_change_media_cb(bs
, true);
2058 void bdrv_get_format(BlockDriverState
*bs
, char *buf
, int buf_size
)
2063 pstrcpy(buf
, buf_size
, bs
->drv
->format_name
);
2067 void bdrv_iterate_format(void (*it
)(void *opaque
, const char *name
),
2072 QLIST_FOREACH(drv
, &bdrv_drivers
, list
) {
2073 it(opaque
, drv
->format_name
);
2077 BlockDriverState
*bdrv_find(const char *name
)
2079 BlockDriverState
*bs
;
2081 QTAILQ_FOREACH(bs
, &bdrv_states
, list
) {
2082 if (!strcmp(name
, bs
->device_name
)) {
2089 BlockDriverState
*bdrv_next(BlockDriverState
*bs
)
2092 return QTAILQ_FIRST(&bdrv_states
);
2094 return QTAILQ_NEXT(bs
, list
);
2097 void bdrv_iterate(void (*it
)(void *opaque
, BlockDriverState
*bs
), void *opaque
)
2099 BlockDriverState
*bs
;
2101 QTAILQ_FOREACH(bs
, &bdrv_states
, list
) {
2106 const char *bdrv_get_device_name(BlockDriverState
*bs
)
2108 return bs
->device_name
;
2111 void bdrv_flush_all(void)
2113 BlockDriverState
*bs
;
2115 QTAILQ_FOREACH(bs
, &bdrv_states
, list
) {
2116 if (!bdrv_is_read_only(bs
) && bdrv_is_inserted(bs
)) {
2122 int bdrv_has_zero_init(BlockDriverState
*bs
)
2126 if (bs
->drv
->bdrv_has_zero_init
) {
2127 return bs
->drv
->bdrv_has_zero_init(bs
);
2133 typedef struct BdrvCoIsAllocatedData
{
2134 BlockDriverState
*bs
;
2140 } BdrvCoIsAllocatedData
;
2143 * Returns true iff the specified sector is present in the disk image. Drivers
2144 * not implementing the functionality are assumed to not support backing files,
2145 * hence all their sectors are reported as allocated.
2147 * If 'sector_num' is beyond the end of the disk image the return value is 0
2148 * and 'pnum' is set to 0.
2150 * 'pnum' is set to the number of sectors (including and immediately following
2151 * the specified sector) that are known to be in the same
2152 * allocated/unallocated state.
2154 * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
2155 * beyond the end of the disk image it will be clamped.
2157 int coroutine_fn
bdrv_co_is_allocated(BlockDriverState
*bs
, int64_t sector_num
,
2158 int nb_sectors
, int *pnum
)
2162 if (sector_num
>= bs
->total_sectors
) {
2167 n
= bs
->total_sectors
- sector_num
;
2168 if (n
< nb_sectors
) {
2172 if (!bs
->drv
->bdrv_co_is_allocated
) {
2177 return bs
->drv
->bdrv_co_is_allocated(bs
, sector_num
, nb_sectors
, pnum
);
2180 /* Coroutine wrapper for bdrv_is_allocated() */
2181 static void coroutine_fn
bdrv_is_allocated_co_entry(void *opaque
)
2183 BdrvCoIsAllocatedData
*data
= opaque
;
2184 BlockDriverState
*bs
= data
->bs
;
2186 data
->ret
= bdrv_co_is_allocated(bs
, data
->sector_num
, data
->nb_sectors
,
2192 * Synchronous wrapper around bdrv_co_is_allocated().
2194 * See bdrv_co_is_allocated() for details.
2196 int bdrv_is_allocated(BlockDriverState
*bs
, int64_t sector_num
, int nb_sectors
,
2200 BdrvCoIsAllocatedData data
= {
2202 .sector_num
= sector_num
,
2203 .nb_sectors
= nb_sectors
,
2208 co
= qemu_coroutine_create(bdrv_is_allocated_co_entry
);
2209 qemu_coroutine_enter(co
, &data
);
2210 while (!data
.done
) {
2216 void bdrv_mon_event(const BlockDriverState
*bdrv
,
2217 BlockMonEventAction action
, int is_read
)
2220 const char *action_str
;
2223 case BDRV_ACTION_REPORT
:
2224 action_str
= "report";
2226 case BDRV_ACTION_IGNORE
:
2227 action_str
= "ignore";
2229 case BDRV_ACTION_STOP
:
2230 action_str
= "stop";
2236 data
= qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
2239 is_read
? "read" : "write");
2240 monitor_protocol_event(QEVENT_BLOCK_IO_ERROR
, data
);
2242 qobject_decref(data
);
2245 BlockInfoList
*qmp_query_block(Error
**errp
)
2247 BlockInfoList
*head
= NULL
, *cur_item
= NULL
;
2248 BlockDriverState
*bs
;
2250 QTAILQ_FOREACH(bs
, &bdrv_states
, list
) {
2251 BlockInfoList
*info
= g_malloc0(sizeof(*info
));
2253 info
->value
= g_malloc0(sizeof(*info
->value
));
2254 info
->value
->device
= g_strdup(bs
->device_name
);
2255 info
->value
->type
= g_strdup("unknown");
2256 info
->value
->locked
= bdrv_dev_is_medium_locked(bs
);
2257 info
->value
->removable
= bdrv_dev_has_removable_media(bs
);
2259 if (bdrv_dev_has_removable_media(bs
)) {
2260 info
->value
->has_tray_open
= true;
2261 info
->value
->tray_open
= bdrv_dev_is_tray_open(bs
);
2264 if (bdrv_iostatus_is_enabled(bs
)) {
2265 info
->value
->has_io_status
= true;
2266 info
->value
->io_status
= bs
->iostatus
;
2270 info
->value
->has_inserted
= true;
2271 info
->value
->inserted
= g_malloc0(sizeof(*info
->value
->inserted
));
2272 info
->value
->inserted
->file
= g_strdup(bs
->filename
);
2273 info
->value
->inserted
->ro
= bs
->read_only
;
2274 info
->value
->inserted
->drv
= g_strdup(bs
->drv
->format_name
);
2275 info
->value
->inserted
->encrypted
= bs
->encrypted
;
2276 if (bs
->backing_file
[0]) {
2277 info
->value
->inserted
->has_backing_file
= true;
2278 info
->value
->inserted
->backing_file
= g_strdup(bs
->backing_file
);
2281 if (bs
->io_limits_enabled
) {
2282 info
->value
->inserted
->bps
=
2283 bs
->io_limits
.bps
[BLOCK_IO_LIMIT_TOTAL
];
2284 info
->value
->inserted
->bps_rd
=
2285 bs
->io_limits
.bps
[BLOCK_IO_LIMIT_READ
];
2286 info
->value
->inserted
->bps_wr
=
2287 bs
->io_limits
.bps
[BLOCK_IO_LIMIT_WRITE
];
2288 info
->value
->inserted
->iops
=
2289 bs
->io_limits
.iops
[BLOCK_IO_LIMIT_TOTAL
];
2290 info
->value
->inserted
->iops_rd
=
2291 bs
->io_limits
.iops
[BLOCK_IO_LIMIT_READ
];
2292 info
->value
->inserted
->iops_wr
=
2293 bs
->io_limits
.iops
[BLOCK_IO_LIMIT_WRITE
];
2297 /* XXX: waiting for the qapi to support GSList */
2299 head
= cur_item
= info
;
2301 cur_item
->next
= info
;
2309 /* Consider exposing this as a full fledged QMP command */
2310 static BlockStats
*qmp_query_blockstat(const BlockDriverState
*bs
, Error
**errp
)
2314 s
= g_malloc0(sizeof(*s
));
2316 if (bs
->device_name
[0]) {
2317 s
->has_device
= true;
2318 s
->device
= g_strdup(bs
->device_name
);
2321 s
->stats
= g_malloc0(sizeof(*s
->stats
));
2322 s
->stats
->rd_bytes
= bs
->nr_bytes
[BDRV_ACCT_READ
];
2323 s
->stats
->wr_bytes
= bs
->nr_bytes
[BDRV_ACCT_WRITE
];
2324 s
->stats
->rd_operations
= bs
->nr_ops
[BDRV_ACCT_READ
];
2325 s
->stats
->wr_operations
= bs
->nr_ops
[BDRV_ACCT_WRITE
];
2326 s
->stats
->wr_highest_offset
= bs
->wr_highest_sector
* BDRV_SECTOR_SIZE
;
2327 s
->stats
->flush_operations
= bs
->nr_ops
[BDRV_ACCT_FLUSH
];
2328 s
->stats
->wr_total_time_ns
= bs
->total_time_ns
[BDRV_ACCT_WRITE
];
2329 s
->stats
->rd_total_time_ns
= bs
->total_time_ns
[BDRV_ACCT_READ
];
2330 s
->stats
->flush_total_time_ns
= bs
->total_time_ns
[BDRV_ACCT_FLUSH
];
2333 s
->has_parent
= true;
2334 s
->parent
= qmp_query_blockstat(bs
->file
, NULL
);
2340 BlockStatsList
*qmp_query_blockstats(Error
**errp
)
2342 BlockStatsList
*head
= NULL
, *cur_item
= NULL
;
2343 BlockDriverState
*bs
;
2345 QTAILQ_FOREACH(bs
, &bdrv_states
, list
) {
2346 BlockStatsList
*info
= g_malloc0(sizeof(*info
));
2347 info
->value
= qmp_query_blockstat(bs
, NULL
);
2349 /* XXX: waiting for the qapi to support GSList */
2351 head
= cur_item
= info
;
2353 cur_item
->next
= info
;
2361 const char *bdrv_get_encrypted_filename(BlockDriverState
*bs
)
2363 if (bs
->backing_hd
&& bs
->backing_hd
->encrypted
)
2364 return bs
->backing_file
;
2365 else if (bs
->encrypted
)
2366 return bs
->filename
;
2371 void bdrv_get_backing_filename(BlockDriverState
*bs
,
2372 char *filename
, int filename_size
)
2374 pstrcpy(filename
, filename_size
, bs
->backing_file
);
2377 int bdrv_write_compressed(BlockDriverState
*bs
, int64_t sector_num
,
2378 const uint8_t *buf
, int nb_sectors
)
2380 BlockDriver
*drv
= bs
->drv
;
2383 if (!drv
->bdrv_write_compressed
)
2385 if (bdrv_check_request(bs
, sector_num
, nb_sectors
))
2388 if (bs
->dirty_bitmap
) {
2389 set_dirty_bitmap(bs
, sector_num
, nb_sectors
, 1);
2392 return drv
->bdrv_write_compressed(bs
, sector_num
, buf
, nb_sectors
);
2395 int bdrv_get_info(BlockDriverState
*bs
, BlockDriverInfo
*bdi
)
2397 BlockDriver
*drv
= bs
->drv
;
2400 if (!drv
->bdrv_get_info
)
2402 memset(bdi
, 0, sizeof(*bdi
));
2403 return drv
->bdrv_get_info(bs
, bdi
);
2406 int bdrv_save_vmstate(BlockDriverState
*bs
, const uint8_t *buf
,
2407 int64_t pos
, int size
)
2409 BlockDriver
*drv
= bs
->drv
;
2412 if (drv
->bdrv_save_vmstate
)
2413 return drv
->bdrv_save_vmstate(bs
, buf
, pos
, size
);
2415 return bdrv_save_vmstate(bs
->file
, buf
, pos
, size
);
2419 int bdrv_load_vmstate(BlockDriverState
*bs
, uint8_t *buf
,
2420 int64_t pos
, int size
)
2422 BlockDriver
*drv
= bs
->drv
;
2425 if (drv
->bdrv_load_vmstate
)
2426 return drv
->bdrv_load_vmstate(bs
, buf
, pos
, size
);
2428 return bdrv_load_vmstate(bs
->file
, buf
, pos
, size
);
2432 void bdrv_debug_event(BlockDriverState
*bs
, BlkDebugEvent event
)
2434 BlockDriver
*drv
= bs
->drv
;
2436 if (!drv
|| !drv
->bdrv_debug_event
) {
2440 return drv
->bdrv_debug_event(bs
, event
);
2444 /**************************************************************/
2445 /* handling of snapshots */
2447 int bdrv_can_snapshot(BlockDriverState
*bs
)
2449 BlockDriver
*drv
= bs
->drv
;
2450 if (!drv
|| !bdrv_is_inserted(bs
) || bdrv_is_read_only(bs
)) {
2454 if (!drv
->bdrv_snapshot_create
) {
2455 if (bs
->file
!= NULL
) {
2456 return bdrv_can_snapshot(bs
->file
);
2464 int bdrv_is_snapshot(BlockDriverState
*bs
)
2466 return !!(bs
->open_flags
& BDRV_O_SNAPSHOT
);
2469 BlockDriverState
*bdrv_snapshots(void)
2471 BlockDriverState
*bs
;
2474 return bs_snapshots
;
2478 while ((bs
= bdrv_next(bs
))) {
2479 if (bdrv_can_snapshot(bs
)) {
2487 int bdrv_snapshot_create(BlockDriverState
*bs
,
2488 QEMUSnapshotInfo
*sn_info
)
2490 BlockDriver
*drv
= bs
->drv
;
2493 if (drv
->bdrv_snapshot_create
)
2494 return drv
->bdrv_snapshot_create(bs
, sn_info
);
2496 return bdrv_snapshot_create(bs
->file
, sn_info
);
2500 int bdrv_snapshot_goto(BlockDriverState
*bs
,
2501 const char *snapshot_id
)
2503 BlockDriver
*drv
= bs
->drv
;
2508 if (drv
->bdrv_snapshot_goto
)
2509 return drv
->bdrv_snapshot_goto(bs
, snapshot_id
);
2512 drv
->bdrv_close(bs
);
2513 ret
= bdrv_snapshot_goto(bs
->file
, snapshot_id
);
2514 open_ret
= drv
->bdrv_open(bs
, bs
->open_flags
);
2516 bdrv_delete(bs
->file
);
2526 int bdrv_snapshot_delete(BlockDriverState
*bs
, const char *snapshot_id
)
2528 BlockDriver
*drv
= bs
->drv
;
2531 if (drv
->bdrv_snapshot_delete
)
2532 return drv
->bdrv_snapshot_delete(bs
, snapshot_id
);
2534 return bdrv_snapshot_delete(bs
->file
, snapshot_id
);
2538 int bdrv_snapshot_list(BlockDriverState
*bs
,
2539 QEMUSnapshotInfo
**psn_info
)
2541 BlockDriver
*drv
= bs
->drv
;
2544 if (drv
->bdrv_snapshot_list
)
2545 return drv
->bdrv_snapshot_list(bs
, psn_info
);
2547 return bdrv_snapshot_list(bs
->file
, psn_info
);
2551 int bdrv_snapshot_load_tmp(BlockDriverState
*bs
,
2552 const char *snapshot_name
)
2554 BlockDriver
*drv
= bs
->drv
;
2558 if (!bs
->read_only
) {
2561 if (drv
->bdrv_snapshot_load_tmp
) {
2562 return drv
->bdrv_snapshot_load_tmp(bs
, snapshot_name
);
2567 #define NB_SUFFIXES 4
2569 char *get_human_readable_size(char *buf
, int buf_size
, int64_t size
)
2571 static const char suffixes
[NB_SUFFIXES
] = "KMGT";
2576 snprintf(buf
, buf_size
, "%" PRId64
, size
);
2579 for(i
= 0; i
< NB_SUFFIXES
; i
++) {
2580 if (size
< (10 * base
)) {
2581 snprintf(buf
, buf_size
, "%0.1f%c",
2582 (double)size
/ base
,
2585 } else if (size
< (1000 * base
) || i
== (NB_SUFFIXES
- 1)) {
2586 snprintf(buf
, buf_size
, "%" PRId64
"%c",
2587 ((size
+ (base
>> 1)) / base
),
2597 char *bdrv_snapshot_dump(char *buf
, int buf_size
, QEMUSnapshotInfo
*sn
)
2599 char buf1
[128], date_buf
[128], clock_buf
[128];
2609 snprintf(buf
, buf_size
,
2610 "%-10s%-20s%7s%20s%15s",
2611 "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
2615 ptm
= localtime(&ti
);
2616 strftime(date_buf
, sizeof(date_buf
),
2617 "%Y-%m-%d %H:%M:%S", ptm
);
2619 localtime_r(&ti
, &tm
);
2620 strftime(date_buf
, sizeof(date_buf
),
2621 "%Y-%m-%d %H:%M:%S", &tm
);
2623 secs
= sn
->vm_clock_nsec
/ 1000000000;
2624 snprintf(clock_buf
, sizeof(clock_buf
),
2625 "%02d:%02d:%02d.%03d",
2627 (int)((secs
/ 60) % 60),
2629 (int)((sn
->vm_clock_nsec
/ 1000000) % 1000));
2630 snprintf(buf
, buf_size
,
2631 "%-10s%-20s%7s%20s%15s",
2632 sn
->id_str
, sn
->name
,
2633 get_human_readable_size(buf1
, sizeof(buf1
), sn
->vm_state_size
),
2640 /**************************************************************/
2643 BlockDriverAIOCB
*bdrv_aio_readv(BlockDriverState
*bs
, int64_t sector_num
,
2644 QEMUIOVector
*qiov
, int nb_sectors
,
2645 BlockDriverCompletionFunc
*cb
, void *opaque
)
2647 trace_bdrv_aio_readv(bs
, sector_num
, nb_sectors
, opaque
);
2649 return bdrv_co_aio_rw_vector(bs
, sector_num
, qiov
, nb_sectors
,
2653 BlockDriverAIOCB
*bdrv_aio_writev(BlockDriverState
*bs
, int64_t sector_num
,
2654 QEMUIOVector
*qiov
, int nb_sectors
,
2655 BlockDriverCompletionFunc
*cb
, void *opaque
)
2657 trace_bdrv_aio_writev(bs
, sector_num
, nb_sectors
, opaque
);
2659 return bdrv_co_aio_rw_vector(bs
, sector_num
, qiov
, nb_sectors
,
2664 typedef struct MultiwriteCB
{
2669 BlockDriverCompletionFunc
*cb
;
2671 QEMUIOVector
*free_qiov
;
2676 static void multiwrite_user_cb(MultiwriteCB
*mcb
)
2680 for (i
= 0; i
< mcb
->num_callbacks
; i
++) {
2681 mcb
->callbacks
[i
].cb(mcb
->callbacks
[i
].opaque
, mcb
->error
);
2682 if (mcb
->callbacks
[i
].free_qiov
) {
2683 qemu_iovec_destroy(mcb
->callbacks
[i
].free_qiov
);
2685 g_free(mcb
->callbacks
[i
].free_qiov
);
2686 qemu_vfree(mcb
->callbacks
[i
].free_buf
);
2690 static void multiwrite_cb(void *opaque
, int ret
)
2692 MultiwriteCB
*mcb
= opaque
;
2694 trace_multiwrite_cb(mcb
, ret
);
2696 if (ret
< 0 && !mcb
->error
) {
2700 mcb
->num_requests
--;
2701 if (mcb
->num_requests
== 0) {
2702 multiwrite_user_cb(mcb
);
2707 static int multiwrite_req_compare(const void *a
, const void *b
)
2709 const BlockRequest
*req1
= a
, *req2
= b
;
2712 * Note that we can't simply subtract req2->sector from req1->sector
2713 * here as that could overflow the return value.
2715 if (req1
->sector
> req2
->sector
) {
2717 } else if (req1
->sector
< req2
->sector
) {
2725 * Takes a bunch of requests and tries to merge them. Returns the number of
2726 * requests that remain after merging.
2728 static int multiwrite_merge(BlockDriverState
*bs
, BlockRequest
*reqs
,
2729 int num_reqs
, MultiwriteCB
*mcb
)
2733 // Sort requests by start sector
2734 qsort(reqs
, num_reqs
, sizeof(*reqs
), &multiwrite_req_compare
);
2736 // Check if adjacent requests touch the same clusters. If so, combine them,
2737 // filling up gaps with zero sectors.
2739 for (i
= 1; i
< num_reqs
; i
++) {
2741 int64_t oldreq_last
= reqs
[outidx
].sector
+ reqs
[outidx
].nb_sectors
;
2743 // This handles the cases that are valid for all block drivers, namely
2744 // exactly sequential writes and overlapping writes.
2745 if (reqs
[i
].sector
<= oldreq_last
) {
2749 // The block driver may decide that it makes sense to combine requests
2750 // even if there is a gap of some sectors between them. In this case,
2751 // the gap is filled with zeros (therefore only applicable for yet
2752 // unused space in format like qcow2).
2753 if (!merge
&& bs
->drv
->bdrv_merge_requests
) {
2754 merge
= bs
->drv
->bdrv_merge_requests(bs
, &reqs
[outidx
], &reqs
[i
]);
2757 if (reqs
[outidx
].qiov
->niov
+ reqs
[i
].qiov
->niov
+ 1 > IOV_MAX
) {
2763 QEMUIOVector
*qiov
= g_malloc0(sizeof(*qiov
));
2764 qemu_iovec_init(qiov
,
2765 reqs
[outidx
].qiov
->niov
+ reqs
[i
].qiov
->niov
+ 1);
2767 // Add the first request to the merged one. If the requests are
2768 // overlapping, drop the last sectors of the first request.
2769 size
= (reqs
[i
].sector
- reqs
[outidx
].sector
) << 9;
2770 qemu_iovec_concat(qiov
, reqs
[outidx
].qiov
, size
);
2772 // We might need to add some zeros between the two requests
2773 if (reqs
[i
].sector
> oldreq_last
) {
2774 size_t zero_bytes
= (reqs
[i
].sector
- oldreq_last
) << 9;
2775 uint8_t *buf
= qemu_blockalign(bs
, zero_bytes
);
2776 memset(buf
, 0, zero_bytes
);
2777 qemu_iovec_add(qiov
, buf
, zero_bytes
);
2778 mcb
->callbacks
[i
].free_buf
= buf
;
2781 // Add the second request
2782 qemu_iovec_concat(qiov
, reqs
[i
].qiov
, reqs
[i
].qiov
->size
);
2784 reqs
[outidx
].nb_sectors
= qiov
->size
>> 9;
2785 reqs
[outidx
].qiov
= qiov
;
2787 mcb
->callbacks
[i
].free_qiov
= reqs
[outidx
].qiov
;
2790 reqs
[outidx
].sector
= reqs
[i
].sector
;
2791 reqs
[outidx
].nb_sectors
= reqs
[i
].nb_sectors
;
2792 reqs
[outidx
].qiov
= reqs
[i
].qiov
;
2800 * Submit multiple AIO write requests at once.
2802 * On success, the function returns 0 and all requests in the reqs array have
2803 * been submitted. In error case this function returns -1, and any of the
2804 * requests may or may not be submitted yet. In particular, this means that the
2805 * callback will be called for some of the requests, for others it won't. The
2806 * caller must check the error field of the BlockRequest to wait for the right
2807 * callbacks (if error != 0, no callback will be called).
2809 * The implementation may modify the contents of the reqs array, e.g. to merge
2810 * requests. However, the fields opaque and error are left unmodified as they
2811 * are used to signal failure for a single request to the caller.
2813 int bdrv_aio_multiwrite(BlockDriverState
*bs
, BlockRequest
*reqs
, int num_reqs
)
2818 /* don't submit writes if we don't have a medium */
2819 if (bs
->drv
== NULL
) {
2820 for (i
= 0; i
< num_reqs
; i
++) {
2821 reqs
[i
].error
= -ENOMEDIUM
;
2826 if (num_reqs
== 0) {
2830 // Create MultiwriteCB structure
2831 mcb
= g_malloc0(sizeof(*mcb
) + num_reqs
* sizeof(*mcb
->callbacks
));
2832 mcb
->num_requests
= 0;
2833 mcb
->num_callbacks
= num_reqs
;
2835 for (i
= 0; i
< num_reqs
; i
++) {
2836 mcb
->callbacks
[i
].cb
= reqs
[i
].cb
;
2837 mcb
->callbacks
[i
].opaque
= reqs
[i
].opaque
;
2840 // Check for mergable requests
2841 num_reqs
= multiwrite_merge(bs
, reqs
, num_reqs
, mcb
);
2843 trace_bdrv_aio_multiwrite(mcb
, mcb
->num_callbacks
, num_reqs
);
2845 /* Run the aio requests. */
2846 mcb
->num_requests
= num_reqs
;
2847 for (i
= 0; i
< num_reqs
; i
++) {
2848 bdrv_aio_writev(bs
, reqs
[i
].sector
, reqs
[i
].qiov
,
2849 reqs
[i
].nb_sectors
, multiwrite_cb
, mcb
);
2855 void bdrv_aio_cancel(BlockDriverAIOCB
*acb
)
2857 acb
->pool
->cancel(acb
);
2860 /* block I/O throttling */
2861 static bool bdrv_exceed_bps_limits(BlockDriverState
*bs
, int nb_sectors
,
2862 bool is_write
, double elapsed_time
, uint64_t *wait
)
2864 uint64_t bps_limit
= 0;
2865 double bytes_limit
, bytes_base
, bytes_res
;
2866 double slice_time
, wait_time
;
2868 if (bs
->io_limits
.bps
[BLOCK_IO_LIMIT_TOTAL
]) {
2869 bps_limit
= bs
->io_limits
.bps
[BLOCK_IO_LIMIT_TOTAL
];
2870 } else if (bs
->io_limits
.bps
[is_write
]) {
2871 bps_limit
= bs
->io_limits
.bps
[is_write
];
2880 slice_time
= bs
->slice_end
- bs
->slice_start
;
2881 slice_time
/= (NANOSECONDS_PER_SECOND
);
2882 bytes_limit
= bps_limit
* slice_time
;
2883 bytes_base
= bs
->nr_bytes
[is_write
] - bs
->io_base
.bytes
[is_write
];
2884 if (bs
->io_limits
.bps
[BLOCK_IO_LIMIT_TOTAL
]) {
2885 bytes_base
+= bs
->nr_bytes
[!is_write
] - bs
->io_base
.bytes
[!is_write
];
2888 /* bytes_base: the bytes of data which have been read/written; and
2889 * it is obtained from the history statistic info.
2890 * bytes_res: the remaining bytes of data which need to be read/written.
2891 * (bytes_base + bytes_res) / bps_limit: used to calcuate
2892 * the total time for completing reading/writting all data.
2894 bytes_res
= (unsigned) nb_sectors
* BDRV_SECTOR_SIZE
;
2896 if (bytes_base
+ bytes_res
<= bytes_limit
) {
2904 /* Calc approx time to dispatch */
2905 wait_time
= (bytes_base
+ bytes_res
) / bps_limit
- elapsed_time
;
2907 /* When the I/O rate at runtime exceeds the limits,
2908 * bs->slice_end need to be extended in order that the current statistic
2909 * info can be kept until the timer fire, so it is increased and tuned
2910 * based on the result of experiment.
2912 bs
->slice_time
= wait_time
* BLOCK_IO_SLICE_TIME
* 10;
2913 bs
->slice_end
+= bs
->slice_time
- 3 * BLOCK_IO_SLICE_TIME
;
2915 *wait
= wait_time
* BLOCK_IO_SLICE_TIME
* 10;
2921 static bool bdrv_exceed_iops_limits(BlockDriverState
*bs
, bool is_write
,
2922 double elapsed_time
, uint64_t *wait
)
2924 uint64_t iops_limit
= 0;
2925 double ios_limit
, ios_base
;
2926 double slice_time
, wait_time
;
2928 if (bs
->io_limits
.iops
[BLOCK_IO_LIMIT_TOTAL
]) {
2929 iops_limit
= bs
->io_limits
.iops
[BLOCK_IO_LIMIT_TOTAL
];
2930 } else if (bs
->io_limits
.iops
[is_write
]) {
2931 iops_limit
= bs
->io_limits
.iops
[is_write
];
2940 slice_time
= bs
->slice_end
- bs
->slice_start
;
2941 slice_time
/= (NANOSECONDS_PER_SECOND
);
2942 ios_limit
= iops_limit
* slice_time
;
2943 ios_base
= bs
->nr_ops
[is_write
] - bs
->io_base
.ios
[is_write
];
2944 if (bs
->io_limits
.iops
[BLOCK_IO_LIMIT_TOTAL
]) {
2945 ios_base
+= bs
->nr_ops
[!is_write
] - bs
->io_base
.ios
[!is_write
];
2948 if (ios_base
+ 1 <= ios_limit
) {
2956 /* Calc approx time to dispatch */
2957 wait_time
= (ios_base
+ 1) / iops_limit
;
2958 if (wait_time
> elapsed_time
) {
2959 wait_time
= wait_time
- elapsed_time
;
2964 bs
->slice_time
= wait_time
* BLOCK_IO_SLICE_TIME
* 10;
2965 bs
->slice_end
+= bs
->slice_time
- 3 * BLOCK_IO_SLICE_TIME
;
2967 *wait
= wait_time
* BLOCK_IO_SLICE_TIME
* 10;
2973 static bool bdrv_exceed_io_limits(BlockDriverState
*bs
, int nb_sectors
,
2974 bool is_write
, int64_t *wait
)
2976 int64_t now
, max_wait
;
2977 uint64_t bps_wait
= 0, iops_wait
= 0;
2978 double elapsed_time
;
2979 int bps_ret
, iops_ret
;
2981 now
= qemu_get_clock_ns(vm_clock
);
2982 if ((bs
->slice_start
< now
)
2983 && (bs
->slice_end
> now
)) {
2984 bs
->slice_end
= now
+ bs
->slice_time
;
2986 bs
->slice_time
= 5 * BLOCK_IO_SLICE_TIME
;
2987 bs
->slice_start
= now
;
2988 bs
->slice_end
= now
+ bs
->slice_time
;
2990 bs
->io_base
.bytes
[is_write
] = bs
->nr_bytes
[is_write
];
2991 bs
->io_base
.bytes
[!is_write
] = bs
->nr_bytes
[!is_write
];
2993 bs
->io_base
.ios
[is_write
] = bs
->nr_ops
[is_write
];
2994 bs
->io_base
.ios
[!is_write
] = bs
->nr_ops
[!is_write
];
2997 elapsed_time
= now
- bs
->slice_start
;
2998 elapsed_time
/= (NANOSECONDS_PER_SECOND
);
3000 bps_ret
= bdrv_exceed_bps_limits(bs
, nb_sectors
,
3001 is_write
, elapsed_time
, &bps_wait
);
3002 iops_ret
= bdrv_exceed_iops_limits(bs
, is_write
,
3003 elapsed_time
, &iops_wait
);
3004 if (bps_ret
|| iops_ret
) {
3005 max_wait
= bps_wait
> iops_wait
? bps_wait
: iops_wait
;
3010 now
= qemu_get_clock_ns(vm_clock
);
3011 if (bs
->slice_end
< now
+ max_wait
) {
3012 bs
->slice_end
= now
+ max_wait
;
3025 /**************************************************************/
3026 /* async block device emulation */
3028 typedef struct BlockDriverAIOCBSync
{
3029 BlockDriverAIOCB common
;
3032 /* vector translation state */
3036 } BlockDriverAIOCBSync
;
3038 static void bdrv_aio_cancel_em(BlockDriverAIOCB
*blockacb
)
3040 BlockDriverAIOCBSync
*acb
=
3041 container_of(blockacb
, BlockDriverAIOCBSync
, common
);
3042 qemu_bh_delete(acb
->bh
);
3044 qemu_aio_release(acb
);
3047 static AIOPool bdrv_em_aio_pool
= {
3048 .aiocb_size
= sizeof(BlockDriverAIOCBSync
),
3049 .cancel
= bdrv_aio_cancel_em
,
3052 static void bdrv_aio_bh_cb(void *opaque
)
3054 BlockDriverAIOCBSync
*acb
= opaque
;
3057 qemu_iovec_from_buffer(acb
->qiov
, acb
->bounce
, acb
->qiov
->size
);
3058 qemu_vfree(acb
->bounce
);
3059 acb
->common
.cb(acb
->common
.opaque
, acb
->ret
);
3060 qemu_bh_delete(acb
->bh
);
3062 qemu_aio_release(acb
);
3065 static BlockDriverAIOCB
*bdrv_aio_rw_vector(BlockDriverState
*bs
,
3069 BlockDriverCompletionFunc
*cb
,
3074 BlockDriverAIOCBSync
*acb
;
3076 acb
= qemu_aio_get(&bdrv_em_aio_pool
, bs
, cb
, opaque
);
3077 acb
->is_write
= is_write
;
3079 acb
->bounce
= qemu_blockalign(bs
, qiov
->size
);
3080 acb
->bh
= qemu_bh_new(bdrv_aio_bh_cb
, acb
);
3083 qemu_iovec_to_buffer(acb
->qiov
, acb
->bounce
);
3084 acb
->ret
= bs
->drv
->bdrv_write(bs
, sector_num
, acb
->bounce
, nb_sectors
);
3086 acb
->ret
= bs
->drv
->bdrv_read(bs
, sector_num
, acb
->bounce
, nb_sectors
);
3089 qemu_bh_schedule(acb
->bh
);
3091 return &acb
->common
;
3094 static BlockDriverAIOCB
*bdrv_aio_readv_em(BlockDriverState
*bs
,
3095 int64_t sector_num
, QEMUIOVector
*qiov
, int nb_sectors
,
3096 BlockDriverCompletionFunc
*cb
, void *opaque
)
3098 return bdrv_aio_rw_vector(bs
, sector_num
, qiov
, nb_sectors
, cb
, opaque
, 0);
3101 static BlockDriverAIOCB
*bdrv_aio_writev_em(BlockDriverState
*bs
,
3102 int64_t sector_num
, QEMUIOVector
*qiov
, int nb_sectors
,
3103 BlockDriverCompletionFunc
*cb
, void *opaque
)
3105 return bdrv_aio_rw_vector(bs
, sector_num
, qiov
, nb_sectors
, cb
, opaque
, 1);
3109 typedef struct BlockDriverAIOCBCoroutine
{
3110 BlockDriverAIOCB common
;
3114 } BlockDriverAIOCBCoroutine
;
3116 static void bdrv_aio_co_cancel_em(BlockDriverAIOCB
*blockacb
)
3121 static AIOPool bdrv_em_co_aio_pool
= {
3122 .aiocb_size
= sizeof(BlockDriverAIOCBCoroutine
),
3123 .cancel
= bdrv_aio_co_cancel_em
,
3126 static void bdrv_co_em_bh(void *opaque
)
3128 BlockDriverAIOCBCoroutine
*acb
= opaque
;
3130 acb
->common
.cb(acb
->common
.opaque
, acb
->req
.error
);
3131 qemu_bh_delete(acb
->bh
);
3132 qemu_aio_release(acb
);
3135 /* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
3136 static void coroutine_fn
bdrv_co_do_rw(void *opaque
)
3138 BlockDriverAIOCBCoroutine
*acb
= opaque
;
3139 BlockDriverState
*bs
= acb
->common
.bs
;
3141 if (!acb
->is_write
) {
3142 acb
->req
.error
= bdrv_co_do_readv(bs
, acb
->req
.sector
,
3143 acb
->req
.nb_sectors
, acb
->req
.qiov
);
3145 acb
->req
.error
= bdrv_co_do_writev(bs
, acb
->req
.sector
,
3146 acb
->req
.nb_sectors
, acb
->req
.qiov
);
3149 acb
->bh
= qemu_bh_new(bdrv_co_em_bh
, acb
);
3150 qemu_bh_schedule(acb
->bh
);
3153 static BlockDriverAIOCB
*bdrv_co_aio_rw_vector(BlockDriverState
*bs
,
3157 BlockDriverCompletionFunc
*cb
,
3162 BlockDriverAIOCBCoroutine
*acb
;
3164 acb
= qemu_aio_get(&bdrv_em_co_aio_pool
, bs
, cb
, opaque
);
3165 acb
->req
.sector
= sector_num
;
3166 acb
->req
.nb_sectors
= nb_sectors
;
3167 acb
->req
.qiov
= qiov
;
3168 acb
->is_write
= is_write
;
3170 co
= qemu_coroutine_create(bdrv_co_do_rw
);
3171 qemu_coroutine_enter(co
, acb
);
3173 return &acb
->common
;
3176 static void coroutine_fn
bdrv_aio_flush_co_entry(void *opaque
)
3178 BlockDriverAIOCBCoroutine
*acb
= opaque
;
3179 BlockDriverState
*bs
= acb
->common
.bs
;
3181 acb
->req
.error
= bdrv_co_flush(bs
);
3182 acb
->bh
= qemu_bh_new(bdrv_co_em_bh
, acb
);
3183 qemu_bh_schedule(acb
->bh
);
3186 BlockDriverAIOCB
*bdrv_aio_flush(BlockDriverState
*bs
,
3187 BlockDriverCompletionFunc
*cb
, void *opaque
)
3189 trace_bdrv_aio_flush(bs
, opaque
);
3192 BlockDriverAIOCBCoroutine
*acb
;
3194 acb
= qemu_aio_get(&bdrv_em_co_aio_pool
, bs
, cb
, opaque
);
3195 co
= qemu_coroutine_create(bdrv_aio_flush_co_entry
);
3196 qemu_coroutine_enter(co
, acb
);
3198 return &acb
->common
;
3201 static void coroutine_fn
bdrv_aio_discard_co_entry(void *opaque
)
3203 BlockDriverAIOCBCoroutine
*acb
= opaque
;
3204 BlockDriverState
*bs
= acb
->common
.bs
;
3206 acb
->req
.error
= bdrv_co_discard(bs
, acb
->req
.sector
, acb
->req
.nb_sectors
);
3207 acb
->bh
= qemu_bh_new(bdrv_co_em_bh
, acb
);
3208 qemu_bh_schedule(acb
->bh
);
3211 BlockDriverAIOCB
*bdrv_aio_discard(BlockDriverState
*bs
,
3212 int64_t sector_num
, int nb_sectors
,
3213 BlockDriverCompletionFunc
*cb
, void *opaque
)
3216 BlockDriverAIOCBCoroutine
*acb
;
3218 trace_bdrv_aio_discard(bs
, sector_num
, nb_sectors
, opaque
);
3220 acb
= qemu_aio_get(&bdrv_em_co_aio_pool
, bs
, cb
, opaque
);
3221 acb
->req
.sector
= sector_num
;
3222 acb
->req
.nb_sectors
= nb_sectors
;
3223 co
= qemu_coroutine_create(bdrv_aio_discard_co_entry
);
3224 qemu_coroutine_enter(co
, acb
);
3226 return &acb
->common
;
3229 void bdrv_init(void)
3231 module_call_init(MODULE_INIT_BLOCK
);
3234 void bdrv_init_with_whitelist(void)
3236 use_bdrv_whitelist
= 1;
3240 void *qemu_aio_get(AIOPool
*pool
, BlockDriverState
*bs
,
3241 BlockDriverCompletionFunc
*cb
, void *opaque
)
3243 BlockDriverAIOCB
*acb
;
3245 if (pool
->free_aiocb
) {
3246 acb
= pool
->free_aiocb
;
3247 pool
->free_aiocb
= acb
->next
;
3249 acb
= g_malloc0(pool
->aiocb_size
);
3254 acb
->opaque
= opaque
;
3258 void qemu_aio_release(void *p
)
3260 BlockDriverAIOCB
*acb
= (BlockDriverAIOCB
*)p
;
3261 AIOPool
*pool
= acb
->pool
;
3262 acb
->next
= pool
->free_aiocb
;
3263 pool
->free_aiocb
= acb
;
3266 /**************************************************************/
3267 /* Coroutine block device emulation */
3269 typedef struct CoroutineIOCompletion
{
3270 Coroutine
*coroutine
;
3272 } CoroutineIOCompletion
;
3274 static void bdrv_co_io_em_complete(void *opaque
, int ret
)
3276 CoroutineIOCompletion
*co
= opaque
;
3279 qemu_coroutine_enter(co
->coroutine
, NULL
);
3282 static int coroutine_fn
bdrv_co_io_em(BlockDriverState
*bs
, int64_t sector_num
,
3283 int nb_sectors
, QEMUIOVector
*iov
,
3286 CoroutineIOCompletion co
= {
3287 .coroutine
= qemu_coroutine_self(),
3289 BlockDriverAIOCB
*acb
;
3292 acb
= bs
->drv
->bdrv_aio_writev(bs
, sector_num
, iov
, nb_sectors
,
3293 bdrv_co_io_em_complete
, &co
);
3295 acb
= bs
->drv
->bdrv_aio_readv(bs
, sector_num
, iov
, nb_sectors
,
3296 bdrv_co_io_em_complete
, &co
);
3299 trace_bdrv_co_io_em(bs
, sector_num
, nb_sectors
, is_write
, acb
);
3303 qemu_coroutine_yield();
3308 static int coroutine_fn
bdrv_co_readv_em(BlockDriverState
*bs
,
3309 int64_t sector_num
, int nb_sectors
,
3312 return bdrv_co_io_em(bs
, sector_num
, nb_sectors
, iov
, false);
3315 static int coroutine_fn
bdrv_co_writev_em(BlockDriverState
*bs
,
3316 int64_t sector_num
, int nb_sectors
,
3319 return bdrv_co_io_em(bs
, sector_num
, nb_sectors
, iov
, true);
3322 static void coroutine_fn
bdrv_flush_co_entry(void *opaque
)
3324 RwCo
*rwco
= opaque
;
3326 rwco
->ret
= bdrv_co_flush(rwco
->bs
);
3329 int coroutine_fn
bdrv_co_flush(BlockDriverState
*bs
)
3337 /* Write back cached data to the OS even with cache=unsafe */
3338 if (bs
->drv
->bdrv_co_flush_to_os
) {
3339 ret
= bs
->drv
->bdrv_co_flush_to_os(bs
);
3345 /* But don't actually force it to the disk with cache=unsafe */
3346 if (bs
->open_flags
& BDRV_O_NO_FLUSH
) {
3350 if (bs
->drv
->bdrv_co_flush_to_disk
) {
3351 return bs
->drv
->bdrv_co_flush_to_disk(bs
);
3352 } else if (bs
->drv
->bdrv_aio_flush
) {
3353 BlockDriverAIOCB
*acb
;
3354 CoroutineIOCompletion co
= {
3355 .coroutine
= qemu_coroutine_self(),
3358 acb
= bs
->drv
->bdrv_aio_flush(bs
, bdrv_co_io_em_complete
, &co
);
3362 qemu_coroutine_yield();
3367 * Some block drivers always operate in either writethrough or unsafe
3368 * mode and don't support bdrv_flush therefore. Usually qemu doesn't
3369 * know how the server works (because the behaviour is hardcoded or
3370 * depends on server-side configuration), so we can't ensure that
3371 * everything is safe on disk. Returning an error doesn't work because
3372 * that would break guests even if the server operates in writethrough
3375 * Let's hope the user knows what he's doing.
3381 void bdrv_invalidate_cache(BlockDriverState
*bs
)
3383 if (bs
->drv
&& bs
->drv
->bdrv_invalidate_cache
) {
3384 bs
->drv
->bdrv_invalidate_cache(bs
);
3388 void bdrv_invalidate_cache_all(void)
3390 BlockDriverState
*bs
;
3392 QTAILQ_FOREACH(bs
, &bdrv_states
, list
) {
3393 bdrv_invalidate_cache(bs
);
3397 int bdrv_flush(BlockDriverState
*bs
)
3405 if (qemu_in_coroutine()) {
3406 /* Fast-path if already in coroutine context */
3407 bdrv_flush_co_entry(&rwco
);
3409 co
= qemu_coroutine_create(bdrv_flush_co_entry
);
3410 qemu_coroutine_enter(co
, &rwco
);
3411 while (rwco
.ret
== NOT_DONE
) {
3419 static void coroutine_fn
bdrv_discard_co_entry(void *opaque
)
3421 RwCo
*rwco
= opaque
;
3423 rwco
->ret
= bdrv_co_discard(rwco
->bs
, rwco
->sector_num
, rwco
->nb_sectors
);
3426 int coroutine_fn
bdrv_co_discard(BlockDriverState
*bs
, int64_t sector_num
,
3431 } else if (bdrv_check_request(bs
, sector_num
, nb_sectors
)) {
3433 } else if (bs
->read_only
) {
3435 } else if (bs
->drv
->bdrv_co_discard
) {
3436 return bs
->drv
->bdrv_co_discard(bs
, sector_num
, nb_sectors
);
3437 } else if (bs
->drv
->bdrv_aio_discard
) {
3438 BlockDriverAIOCB
*acb
;
3439 CoroutineIOCompletion co
= {
3440 .coroutine
= qemu_coroutine_self(),
3443 acb
= bs
->drv
->bdrv_aio_discard(bs
, sector_num
, nb_sectors
,
3444 bdrv_co_io_em_complete
, &co
);
3448 qemu_coroutine_yield();
3456 int bdrv_discard(BlockDriverState
*bs
, int64_t sector_num
, int nb_sectors
)
3461 .sector_num
= sector_num
,
3462 .nb_sectors
= nb_sectors
,
3466 if (qemu_in_coroutine()) {
3467 /* Fast-path if already in coroutine context */
3468 bdrv_discard_co_entry(&rwco
);
3470 co
= qemu_coroutine_create(bdrv_discard_co_entry
);
3471 qemu_coroutine_enter(co
, &rwco
);
3472 while (rwco
.ret
== NOT_DONE
) {
3480 /**************************************************************/
3481 /* removable device support */
3484 * Return TRUE if the media is present
3486 int bdrv_is_inserted(BlockDriverState
*bs
)
3488 BlockDriver
*drv
= bs
->drv
;
3492 if (!drv
->bdrv_is_inserted
)
3494 return drv
->bdrv_is_inserted(bs
);
3498 * Return whether the media changed since the last call to this
3499 * function, or -ENOTSUP if we don't know. Most drivers don't know.
3501 int bdrv_media_changed(BlockDriverState
*bs
)
3503 BlockDriver
*drv
= bs
->drv
;
3505 if (drv
&& drv
->bdrv_media_changed
) {
3506 return drv
->bdrv_media_changed(bs
);
3512 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3514 void bdrv_eject(BlockDriverState
*bs
, int eject_flag
)
3516 BlockDriver
*drv
= bs
->drv
;
3518 if (drv
&& drv
->bdrv_eject
) {
3519 drv
->bdrv_eject(bs
, eject_flag
);
3524 * Lock or unlock the media (if it is locked, the user won't be able
3525 * to eject it manually).
3527 void bdrv_lock_medium(BlockDriverState
*bs
, bool locked
)
3529 BlockDriver
*drv
= bs
->drv
;
3531 trace_bdrv_lock_medium(bs
, locked
);
3533 if (drv
&& drv
->bdrv_lock_medium
) {
3534 drv
->bdrv_lock_medium(bs
, locked
);
3538 /* needed for generic scsi interface */
3540 int bdrv_ioctl(BlockDriverState
*bs
, unsigned long int req
, void *buf
)
3542 BlockDriver
*drv
= bs
->drv
;
3544 if (drv
&& drv
->bdrv_ioctl
)
3545 return drv
->bdrv_ioctl(bs
, req
, buf
);
3549 BlockDriverAIOCB
*bdrv_aio_ioctl(BlockDriverState
*bs
,
3550 unsigned long int req
, void *buf
,
3551 BlockDriverCompletionFunc
*cb
, void *opaque
)
3553 BlockDriver
*drv
= bs
->drv
;
3555 if (drv
&& drv
->bdrv_aio_ioctl
)
3556 return drv
->bdrv_aio_ioctl(bs
, req
, buf
, cb
, opaque
);
3560 void bdrv_set_buffer_alignment(BlockDriverState
*bs
, int align
)
3562 bs
->buffer_alignment
= align
;
3565 void *qemu_blockalign(BlockDriverState
*bs
, size_t size
)
3567 return qemu_memalign((bs
&& bs
->buffer_alignment
) ? bs
->buffer_alignment
: 512, size
);
3570 void bdrv_set_dirty_tracking(BlockDriverState
*bs
, int enable
)
3572 int64_t bitmap_size
;
3574 bs
->dirty_count
= 0;
3576 if (!bs
->dirty_bitmap
) {
3577 bitmap_size
= (bdrv_getlength(bs
) >> BDRV_SECTOR_BITS
) +
3578 BDRV_SECTORS_PER_DIRTY_CHUNK
* 8 - 1;
3579 bitmap_size
/= BDRV_SECTORS_PER_DIRTY_CHUNK
* 8;
3581 bs
->dirty_bitmap
= g_malloc0(bitmap_size
);
3584 if (bs
->dirty_bitmap
) {
3585 g_free(bs
->dirty_bitmap
);
3586 bs
->dirty_bitmap
= NULL
;
3591 int bdrv_get_dirty(BlockDriverState
*bs
, int64_t sector
)
3593 int64_t chunk
= sector
/ (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK
;
3595 if (bs
->dirty_bitmap
&&
3596 (sector
<< BDRV_SECTOR_BITS
) < bdrv_getlength(bs
)) {
3597 return !!(bs
->dirty_bitmap
[chunk
/ (sizeof(unsigned long) * 8)] &
3598 (1UL << (chunk
% (sizeof(unsigned long) * 8))));
3604 void bdrv_reset_dirty(BlockDriverState
*bs
, int64_t cur_sector
,
3607 set_dirty_bitmap(bs
, cur_sector
, nr_sectors
, 0);
3610 int64_t bdrv_get_dirty_count(BlockDriverState
*bs
)
3612 return bs
->dirty_count
;
3615 void bdrv_set_in_use(BlockDriverState
*bs
, int in_use
)
3617 assert(bs
->in_use
!= in_use
);
3618 bs
->in_use
= in_use
;
3621 int bdrv_in_use(BlockDriverState
*bs
)
3626 void bdrv_iostatus_enable(BlockDriverState
*bs
)
3628 bs
->iostatus_enabled
= true;
3629 bs
->iostatus
= BLOCK_DEVICE_IO_STATUS_OK
;
3632 /* The I/O status is only enabled if the drive explicitly
3633 * enables it _and_ the VM is configured to stop on errors */
3634 bool bdrv_iostatus_is_enabled(const BlockDriverState
*bs
)
3636 return (bs
->iostatus_enabled
&&
3637 (bs
->on_write_error
== BLOCK_ERR_STOP_ENOSPC
||
3638 bs
->on_write_error
== BLOCK_ERR_STOP_ANY
||
3639 bs
->on_read_error
== BLOCK_ERR_STOP_ANY
));
3642 void bdrv_iostatus_disable(BlockDriverState
*bs
)
3644 bs
->iostatus_enabled
= false;
3647 void bdrv_iostatus_reset(BlockDriverState
*bs
)
3649 if (bdrv_iostatus_is_enabled(bs
)) {
3650 bs
->iostatus
= BLOCK_DEVICE_IO_STATUS_OK
;
3654 /* XXX: Today this is set by device models because it makes the implementation
3655 quite simple. However, the block layer knows about the error, so it's
3656 possible to implement this without device models being involved */
3657 void bdrv_iostatus_set_err(BlockDriverState
*bs
, int error
)
3659 if (bdrv_iostatus_is_enabled(bs
) &&
3660 bs
->iostatus
== BLOCK_DEVICE_IO_STATUS_OK
) {
3662 bs
->iostatus
= error
== ENOSPC
? BLOCK_DEVICE_IO_STATUS_NOSPACE
:
3663 BLOCK_DEVICE_IO_STATUS_FAILED
;
3668 bdrv_acct_start(BlockDriverState
*bs
, BlockAcctCookie
*cookie
, int64_t bytes
,
3669 enum BlockAcctType type
)
3671 assert(type
< BDRV_MAX_IOTYPE
);
3673 cookie
->bytes
= bytes
;
3674 cookie
->start_time_ns
= get_clock();
3675 cookie
->type
= type
;
3679 bdrv_acct_done(BlockDriverState
*bs
, BlockAcctCookie
*cookie
)
3681 assert(cookie
->type
< BDRV_MAX_IOTYPE
);
3683 bs
->nr_bytes
[cookie
->type
] += cookie
->bytes
;
3684 bs
->nr_ops
[cookie
->type
]++;
3685 bs
->total_time_ns
[cookie
->type
] += get_clock() - cookie
->start_time_ns
;
3688 int bdrv_img_create(const char *filename
, const char *fmt
,
3689 const char *base_filename
, const char *base_fmt
,
3690 char *options
, uint64_t img_size
, int flags
)
3692 QEMUOptionParameter
*param
= NULL
, *create_options
= NULL
;
3693 QEMUOptionParameter
*backing_fmt
, *backing_file
, *size
;
3694 BlockDriverState
*bs
= NULL
;
3695 BlockDriver
*drv
, *proto_drv
;
3696 BlockDriver
*backing_drv
= NULL
;
3699 /* Find driver and parse its options */
3700 drv
= bdrv_find_format(fmt
);
3702 error_report("Unknown file format '%s'", fmt
);
3707 proto_drv
= bdrv_find_protocol(filename
);
3709 error_report("Unknown protocol '%s'", filename
);
3714 create_options
= append_option_parameters(create_options
,
3715 drv
->create_options
);
3716 create_options
= append_option_parameters(create_options
,
3717 proto_drv
->create_options
);
3719 /* Create parameter list with default values */
3720 param
= parse_option_parameters("", create_options
, param
);
3722 set_option_parameter_int(param
, BLOCK_OPT_SIZE
, img_size
);
3724 /* Parse -o options */
3726 param
= parse_option_parameters(options
, create_options
, param
);
3727 if (param
== NULL
) {
3728 error_report("Invalid options for file format '%s'.", fmt
);
3734 if (base_filename
) {
3735 if (set_option_parameter(param
, BLOCK_OPT_BACKING_FILE
,
3737 error_report("Backing file not supported for file format '%s'",
3745 if (set_option_parameter(param
, BLOCK_OPT_BACKING_FMT
, base_fmt
)) {
3746 error_report("Backing file format not supported for file "
3747 "format '%s'", fmt
);
3753 backing_file
= get_option_parameter(param
, BLOCK_OPT_BACKING_FILE
);
3754 if (backing_file
&& backing_file
->value
.s
) {
3755 if (!strcmp(filename
, backing_file
->value
.s
)) {
3756 error_report("Error: Trying to create an image with the "
3757 "same filename as the backing file");
3763 backing_fmt
= get_option_parameter(param
, BLOCK_OPT_BACKING_FMT
);
3764 if (backing_fmt
&& backing_fmt
->value
.s
) {
3765 backing_drv
= bdrv_find_format(backing_fmt
->value
.s
);
3767 error_report("Unknown backing file format '%s'",
3768 backing_fmt
->value
.s
);
3774 // The size for the image must always be specified, with one exception:
3775 // If we are using a backing file, we can obtain the size from there
3776 size
= get_option_parameter(param
, BLOCK_OPT_SIZE
);
3777 if (size
&& size
->value
.n
== -1) {
3778 if (backing_file
&& backing_file
->value
.s
) {
3784 ret
= bdrv_open(bs
, backing_file
->value
.s
, flags
, backing_drv
);
3786 error_report("Could not open '%s'", backing_file
->value
.s
);
3789 bdrv_get_geometry(bs
, &size
);
3792 snprintf(buf
, sizeof(buf
), "%" PRId64
, size
);
3793 set_option_parameter(param
, BLOCK_OPT_SIZE
, buf
);
3795 error_report("Image creation needs a size parameter");
3801 printf("Formatting '%s', fmt=%s ", filename
, fmt
);
3802 print_option_parameters(param
);
3805 ret
= bdrv_create(drv
, filename
, param
);
3808 if (ret
== -ENOTSUP
) {
3809 error_report("Formatting or formatting option not supported for "
3810 "file format '%s'", fmt
);
3811 } else if (ret
== -EFBIG
) {
3812 error_report("The image size is too large for file format '%s'",
3815 error_report("%s: error while creating %s: %s", filename
, fmt
,
3821 free_option_parameters(create_options
);
3822 free_option_parameters(param
);