2 * QEMU System Emulator block driver
4 * Copyright (c) 2003 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 #include "config-host.h"
25 #include "qemu-common.h"
28 #include "block_int.h"
31 #include "qemu-coroutine.h"
32 #include "qmp-commands.h"
33 #include "qemu-timer.h"
36 #include <sys/types.h>
38 #include <sys/ioctl.h>
39 #include <sys/queue.h>
49 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
52 BDRV_REQ_COPY_ON_READ
= 0x1,
53 BDRV_REQ_ZERO_WRITE
= 0x2,
56 static void bdrv_dev_change_media_cb(BlockDriverState
*bs
, bool load
);
57 static BlockDriverAIOCB
*bdrv_aio_readv_em(BlockDriverState
*bs
,
58 int64_t sector_num
, QEMUIOVector
*qiov
, int nb_sectors
,
59 BlockDriverCompletionFunc
*cb
, void *opaque
);
60 static BlockDriverAIOCB
*bdrv_aio_writev_em(BlockDriverState
*bs
,
61 int64_t sector_num
, QEMUIOVector
*qiov
, int nb_sectors
,
62 BlockDriverCompletionFunc
*cb
, void *opaque
);
63 static int coroutine_fn
bdrv_co_readv_em(BlockDriverState
*bs
,
64 int64_t sector_num
, int nb_sectors
,
66 static int coroutine_fn
bdrv_co_writev_em(BlockDriverState
*bs
,
67 int64_t sector_num
, int nb_sectors
,
69 static int coroutine_fn
bdrv_co_do_readv(BlockDriverState
*bs
,
70 int64_t sector_num
, int nb_sectors
, QEMUIOVector
*qiov
,
71 BdrvRequestFlags flags
);
72 static int coroutine_fn
bdrv_co_do_writev(BlockDriverState
*bs
,
73 int64_t sector_num
, int nb_sectors
, QEMUIOVector
*qiov
,
74 BdrvRequestFlags flags
);
75 static BlockDriverAIOCB
*bdrv_co_aio_rw_vector(BlockDriverState
*bs
,
79 BlockDriverCompletionFunc
*cb
,
82 static void coroutine_fn
bdrv_co_do_rw(void *opaque
);
83 static int coroutine_fn
bdrv_co_do_write_zeroes(BlockDriverState
*bs
,
84 int64_t sector_num
, int nb_sectors
);
86 static bool bdrv_exceed_bps_limits(BlockDriverState
*bs
, int nb_sectors
,
87 bool is_write
, double elapsed_time
, uint64_t *wait
);
88 static bool bdrv_exceed_iops_limits(BlockDriverState
*bs
, bool is_write
,
89 double elapsed_time
, uint64_t *wait
);
90 static bool bdrv_exceed_io_limits(BlockDriverState
*bs
, int nb_sectors
,
91 bool is_write
, int64_t *wait
);
93 static QTAILQ_HEAD(, BlockDriverState
) bdrv_states
=
94 QTAILQ_HEAD_INITIALIZER(bdrv_states
);
96 static QLIST_HEAD(, BlockDriver
) bdrv_drivers
=
97 QLIST_HEAD_INITIALIZER(bdrv_drivers
);
99 /* The device to use for VM snapshots */
100 static BlockDriverState
*bs_snapshots
;
102 /* If non-zero, use only whitelisted block drivers */
103 static int use_bdrv_whitelist
;
106 static int is_windows_drive_prefix(const char *filename
)
108 return (((filename
[0] >= 'a' && filename
[0] <= 'z') ||
109 (filename
[0] >= 'A' && filename
[0] <= 'Z')) &&
113 int is_windows_drive(const char *filename
)
115 if (is_windows_drive_prefix(filename
) &&
118 if (strstart(filename
, "\\\\.\\", NULL
) ||
119 strstart(filename
, "//./", NULL
))
125 /* throttling disk I/O limits */
126 void bdrv_io_limits_disable(BlockDriverState
*bs
)
128 bs
->io_limits_enabled
= false;
130 while (qemu_co_queue_next(&bs
->throttled_reqs
));
132 if (bs
->block_timer
) {
133 qemu_del_timer(bs
->block_timer
);
134 qemu_free_timer(bs
->block_timer
);
135 bs
->block_timer
= NULL
;
141 memset(&bs
->io_base
, 0, sizeof(bs
->io_base
));
144 static void bdrv_block_timer(void *opaque
)
146 BlockDriverState
*bs
= opaque
;
148 qemu_co_queue_next(&bs
->throttled_reqs
);
151 void bdrv_io_limits_enable(BlockDriverState
*bs
)
153 qemu_co_queue_init(&bs
->throttled_reqs
);
154 bs
->block_timer
= qemu_new_timer_ns(vm_clock
, bdrv_block_timer
, bs
);
155 bs
->slice_time
= 5 * BLOCK_IO_SLICE_TIME
;
156 bs
->slice_start
= qemu_get_clock_ns(vm_clock
);
157 bs
->slice_end
= bs
->slice_start
+ bs
->slice_time
;
158 memset(&bs
->io_base
, 0, sizeof(bs
->io_base
));
159 bs
->io_limits_enabled
= true;
162 bool bdrv_io_limits_enabled(BlockDriverState
*bs
)
164 BlockIOLimit
*io_limits
= &bs
->io_limits
;
165 return io_limits
->bps
[BLOCK_IO_LIMIT_READ
]
166 || io_limits
->bps
[BLOCK_IO_LIMIT_WRITE
]
167 || io_limits
->bps
[BLOCK_IO_LIMIT_TOTAL
]
168 || io_limits
->iops
[BLOCK_IO_LIMIT_READ
]
169 || io_limits
->iops
[BLOCK_IO_LIMIT_WRITE
]
170 || io_limits
->iops
[BLOCK_IO_LIMIT_TOTAL
];
173 static void bdrv_io_limits_intercept(BlockDriverState
*bs
,
174 bool is_write
, int nb_sectors
)
176 int64_t wait_time
= -1;
178 if (!qemu_co_queue_empty(&bs
->throttled_reqs
)) {
179 qemu_co_queue_wait(&bs
->throttled_reqs
);
182 /* In fact, we hope to keep each request's timing, in FIFO mode. The next
183 * throttled requests will not be dequeued until the current request is
184 * allowed to be serviced. So if the current request still exceeds the
185 * limits, it will be inserted to the head. All requests followed it will
186 * be still in throttled_reqs queue.
189 while (bdrv_exceed_io_limits(bs
, nb_sectors
, is_write
, &wait_time
)) {
190 qemu_mod_timer(bs
->block_timer
,
191 wait_time
+ qemu_get_clock_ns(vm_clock
));
192 qemu_co_queue_wait_insert_head(&bs
->throttled_reqs
);
195 qemu_co_queue_next(&bs
->throttled_reqs
);
198 /* check if the path starts with "<protocol>:" */
199 static int path_has_protocol(const char *path
)
204 if (is_windows_drive(path
) ||
205 is_windows_drive_prefix(path
)) {
208 p
= path
+ strcspn(path
, ":/\\");
210 p
= path
+ strcspn(path
, ":/");
216 int path_is_absolute(const char *path
)
219 /* specific case for names like: "\\.\d:" */
220 if (is_windows_drive(path
) || is_windows_drive_prefix(path
)) {
223 return (*path
== '/' || *path
== '\\');
225 return (*path
== '/');
229 /* if filename is absolute, just copy it to dest. Otherwise, build a
230 path to it by considering it is relative to base_path. URL are
232 void path_combine(char *dest
, int dest_size
,
233 const char *base_path
,
234 const char *filename
)
241 if (path_is_absolute(filename
)) {
242 pstrcpy(dest
, dest_size
, filename
);
244 p
= strchr(base_path
, ':');
249 p1
= strrchr(base_path
, '/');
253 p2
= strrchr(base_path
, '\\');
265 if (len
> dest_size
- 1)
267 memcpy(dest
, base_path
, len
);
269 pstrcat(dest
, dest_size
, filename
);
273 void bdrv_get_full_backing_filename(BlockDriverState
*bs
, char *dest
, size_t sz
)
275 if (bs
->backing_file
[0] == '\0' || path_has_protocol(bs
->backing_file
)) {
276 pstrcpy(dest
, sz
, bs
->backing_file
);
278 path_combine(dest
, sz
, bs
->filename
, bs
->backing_file
);
282 void bdrv_register(BlockDriver
*bdrv
)
284 /* Block drivers without coroutine functions need emulation */
285 if (!bdrv
->bdrv_co_readv
) {
286 bdrv
->bdrv_co_readv
= bdrv_co_readv_em
;
287 bdrv
->bdrv_co_writev
= bdrv_co_writev_em
;
289 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
290 * the block driver lacks aio we need to emulate that too.
292 if (!bdrv
->bdrv_aio_readv
) {
293 /* add AIO emulation layer */
294 bdrv
->bdrv_aio_readv
= bdrv_aio_readv_em
;
295 bdrv
->bdrv_aio_writev
= bdrv_aio_writev_em
;
299 QLIST_INSERT_HEAD(&bdrv_drivers
, bdrv
, list
);
302 /* create a new block device (by default it is empty) */
303 BlockDriverState
*bdrv_new(const char *device_name
)
305 BlockDriverState
*bs
;
307 bs
= g_malloc0(sizeof(BlockDriverState
));
308 pstrcpy(bs
->device_name
, sizeof(bs
->device_name
), device_name
);
309 if (device_name
[0] != '\0') {
310 QTAILQ_INSERT_TAIL(&bdrv_states
, bs
, list
);
312 bdrv_iostatus_disable(bs
);
316 BlockDriver
*bdrv_find_format(const char *format_name
)
319 QLIST_FOREACH(drv1
, &bdrv_drivers
, list
) {
320 if (!strcmp(drv1
->format_name
, format_name
)) {
327 static int bdrv_is_whitelisted(BlockDriver
*drv
)
329 static const char *whitelist
[] = {
330 CONFIG_BDRV_WHITELIST
335 return 1; /* no whitelist, anything goes */
337 for (p
= whitelist
; *p
; p
++) {
338 if (!strcmp(drv
->format_name
, *p
)) {
345 BlockDriver
*bdrv_find_whitelisted_format(const char *format_name
)
347 BlockDriver
*drv
= bdrv_find_format(format_name
);
348 return drv
&& bdrv_is_whitelisted(drv
) ? drv
: NULL
;
351 typedef struct CreateCo
{
354 QEMUOptionParameter
*options
;
358 static void coroutine_fn
bdrv_create_co_entry(void *opaque
)
360 CreateCo
*cco
= opaque
;
363 cco
->ret
= cco
->drv
->bdrv_create(cco
->filename
, cco
->options
);
366 int bdrv_create(BlockDriver
*drv
, const char* filename
,
367 QEMUOptionParameter
*options
)
374 .filename
= g_strdup(filename
),
379 if (!drv
->bdrv_create
) {
383 if (qemu_in_coroutine()) {
384 /* Fast-path if already in coroutine context */
385 bdrv_create_co_entry(&cco
);
387 co
= qemu_coroutine_create(bdrv_create_co_entry
);
388 qemu_coroutine_enter(co
, &cco
);
389 while (cco
.ret
== NOT_DONE
) {
395 g_free(cco
.filename
);
400 int bdrv_create_file(const char* filename
, QEMUOptionParameter
*options
)
404 drv
= bdrv_find_protocol(filename
);
409 return bdrv_create(drv
, filename
, options
);
413 * Create a uniquely-named empty temporary file.
414 * Return 0 upon success, otherwise a negative errno value.
416 int get_tmp_filename(char *filename
, int size
)
419 char temp_dir
[MAX_PATH
];
420 /* GetTempFileName requires that its output buffer (4th param)
421 have length MAX_PATH or greater. */
422 assert(size
>= MAX_PATH
);
423 return (GetTempPath(MAX_PATH
, temp_dir
)
424 && GetTempFileName(temp_dir
, "qem", 0, filename
)
425 ? 0 : -GetLastError());
429 tmpdir
= getenv("TMPDIR");
432 if (snprintf(filename
, size
, "%s/vl.XXXXXX", tmpdir
) >= size
) {
435 fd
= mkstemp(filename
);
436 if (fd
< 0 || close(fd
)) {
444 * Detect host devices. By convention, /dev/cdrom[N] is always
445 * recognized as a host CDROM.
447 static BlockDriver
*find_hdev_driver(const char *filename
)
449 int score_max
= 0, score
;
450 BlockDriver
*drv
= NULL
, *d
;
452 QLIST_FOREACH(d
, &bdrv_drivers
, list
) {
453 if (d
->bdrv_probe_device
) {
454 score
= d
->bdrv_probe_device(filename
);
455 if (score
> score_max
) {
465 BlockDriver
*bdrv_find_protocol(const char *filename
)
472 /* TODO Drivers without bdrv_file_open must be specified explicitly */
475 * XXX(hch): we really should not let host device detection
476 * override an explicit protocol specification, but moving this
477 * later breaks access to device names with colons in them.
478 * Thanks to the brain-dead persistent naming schemes on udev-
479 * based Linux systems those actually are quite common.
481 drv1
= find_hdev_driver(filename
);
486 if (!path_has_protocol(filename
)) {
487 return bdrv_find_format("file");
489 p
= strchr(filename
, ':');
492 if (len
> sizeof(protocol
) - 1)
493 len
= sizeof(protocol
) - 1;
494 memcpy(protocol
, filename
, len
);
495 protocol
[len
] = '\0';
496 QLIST_FOREACH(drv1
, &bdrv_drivers
, list
) {
497 if (drv1
->protocol_name
&&
498 !strcmp(drv1
->protocol_name
, protocol
)) {
505 static int find_image_format(const char *filename
, BlockDriver
**pdrv
)
507 int ret
, score
, score_max
;
508 BlockDriver
*drv1
, *drv
;
510 BlockDriverState
*bs
;
512 ret
= bdrv_file_open(&bs
, filename
, 0);
518 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
519 if (bs
->sg
|| !bdrv_is_inserted(bs
)) {
521 drv
= bdrv_find_format("raw");
529 ret
= bdrv_pread(bs
, 0, buf
, sizeof(buf
));
538 QLIST_FOREACH(drv1
, &bdrv_drivers
, list
) {
539 if (drv1
->bdrv_probe
) {
540 score
= drv1
->bdrv_probe(buf
, ret
, filename
);
541 if (score
> score_max
) {
555 * Set the current 'total_sectors' value
557 static int refresh_total_sectors(BlockDriverState
*bs
, int64_t hint
)
559 BlockDriver
*drv
= bs
->drv
;
561 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
565 /* query actual device if possible, otherwise just trust the hint */
566 if (drv
->bdrv_getlength
) {
567 int64_t length
= drv
->bdrv_getlength(bs
);
571 hint
= length
>> BDRV_SECTOR_BITS
;
574 bs
->total_sectors
= hint
;
579 * Set open flags for a given cache mode
581 * Return 0 on success, -1 if the cache mode was invalid.
583 int bdrv_parse_cache_flags(const char *mode
, int *flags
)
585 *flags
&= ~BDRV_O_CACHE_MASK
;
587 if (!strcmp(mode
, "off") || !strcmp(mode
, "none")) {
588 *flags
|= BDRV_O_NOCACHE
| BDRV_O_CACHE_WB
;
589 } else if (!strcmp(mode
, "directsync")) {
590 *flags
|= BDRV_O_NOCACHE
;
591 } else if (!strcmp(mode
, "writeback")) {
592 *flags
|= BDRV_O_CACHE_WB
;
593 } else if (!strcmp(mode
, "unsafe")) {
594 *flags
|= BDRV_O_CACHE_WB
;
595 *flags
|= BDRV_O_NO_FLUSH
;
596 } else if (!strcmp(mode
, "writethrough")) {
597 /* this is the default */
606 * The copy-on-read flag is actually a reference count so multiple users may
607 * use the feature without worrying about clobbering its previous state.
608 * Copy-on-read stays enabled until all users have called to disable it.
610 void bdrv_enable_copy_on_read(BlockDriverState
*bs
)
615 void bdrv_disable_copy_on_read(BlockDriverState
*bs
)
617 assert(bs
->copy_on_read
> 0);
622 * Common part for opening disk images and files
624 static int bdrv_open_common(BlockDriverState
*bs
, const char *filename
,
625 int flags
, BlockDriver
*drv
)
630 assert(bs
->file
== NULL
);
632 trace_bdrv_open_common(bs
, filename
, flags
, drv
->format_name
);
634 bs
->open_flags
= flags
;
635 bs
->buffer_alignment
= 512;
637 assert(bs
->copy_on_read
== 0); /* bdrv_new() and bdrv_close() make it so */
638 if ((flags
& BDRV_O_RDWR
) && (flags
& BDRV_O_COPY_ON_READ
)) {
639 bdrv_enable_copy_on_read(bs
);
642 pstrcpy(bs
->filename
, sizeof(bs
->filename
), filename
);
644 if (use_bdrv_whitelist
&& !bdrv_is_whitelisted(drv
)) {
649 bs
->opaque
= g_malloc0(drv
->instance_size
);
651 bs
->enable_write_cache
= !!(flags
& BDRV_O_CACHE_WB
);
654 * Clear flags that are internal to the block layer before opening the
657 open_flags
= flags
& ~(BDRV_O_SNAPSHOT
| BDRV_O_NO_BACKING
);
660 * Snapshots should be writable.
662 if (bs
->is_temporary
) {
663 open_flags
|= BDRV_O_RDWR
;
666 bs
->keep_read_only
= bs
->read_only
= !(open_flags
& BDRV_O_RDWR
);
668 /* Open the image, either directly or using a protocol */
669 if (drv
->bdrv_file_open
) {
670 ret
= drv
->bdrv_file_open(bs
, filename
, open_flags
);
672 ret
= bdrv_file_open(&bs
->file
, filename
, open_flags
);
674 ret
= drv
->bdrv_open(bs
, open_flags
);
682 ret
= refresh_total_sectors(bs
, bs
->total_sectors
);
688 if (bs
->is_temporary
) {
696 bdrv_delete(bs
->file
);
706 * Opens a file using a protocol (file, host_device, nbd, ...)
708 int bdrv_file_open(BlockDriverState
**pbs
, const char *filename
, int flags
)
710 BlockDriverState
*bs
;
714 drv
= bdrv_find_protocol(filename
);
720 ret
= bdrv_open_common(bs
, filename
, flags
, drv
);
731 * Opens a disk image (raw, qcow2, vmdk, ...)
733 int bdrv_open(BlockDriverState
*bs
, const char *filename
, int flags
,
737 char tmp_filename
[PATH_MAX
];
739 if (flags
& BDRV_O_SNAPSHOT
) {
740 BlockDriverState
*bs1
;
743 BlockDriver
*bdrv_qcow2
;
744 QEMUOptionParameter
*options
;
745 char backing_filename
[PATH_MAX
];
747 /* if snapshot, we create a temporary backing file and open it
748 instead of opening 'filename' directly */
750 /* if there is a backing file, use it */
752 ret
= bdrv_open(bs1
, filename
, 0, drv
);
757 total_size
= bdrv_getlength(bs1
) & BDRV_SECTOR_MASK
;
759 if (bs1
->drv
&& bs1
->drv
->protocol_name
)
764 ret
= get_tmp_filename(tmp_filename
, sizeof(tmp_filename
));
769 /* Real path is meaningless for protocols */
771 snprintf(backing_filename
, sizeof(backing_filename
),
773 else if (!realpath(filename
, backing_filename
))
776 bdrv_qcow2
= bdrv_find_format("qcow2");
777 options
= parse_option_parameters("", bdrv_qcow2
->create_options
, NULL
);
779 set_option_parameter_int(options
, BLOCK_OPT_SIZE
, total_size
);
780 set_option_parameter(options
, BLOCK_OPT_BACKING_FILE
, backing_filename
);
782 set_option_parameter(options
, BLOCK_OPT_BACKING_FMT
,
786 ret
= bdrv_create(bdrv_qcow2
, tmp_filename
, options
);
787 free_option_parameters(options
);
792 filename
= tmp_filename
;
794 bs
->is_temporary
= 1;
797 /* Find the right image format driver */
799 ret
= find_image_format(filename
, &drv
);
803 goto unlink_and_fail
;
807 ret
= bdrv_open_common(bs
, filename
, flags
, drv
);
809 goto unlink_and_fail
;
812 /* If there is a backing file, use it */
813 if ((flags
& BDRV_O_NO_BACKING
) == 0 && bs
->backing_file
[0] != '\0') {
814 char backing_filename
[PATH_MAX
];
816 BlockDriver
*back_drv
= NULL
;
818 bs
->backing_hd
= bdrv_new("");
819 bdrv_get_full_backing_filename(bs
, backing_filename
,
820 sizeof(backing_filename
));
822 if (bs
->backing_format
[0] != '\0') {
823 back_drv
= bdrv_find_format(bs
->backing_format
);
826 /* backing files always opened read-only */
828 flags
& ~(BDRV_O_RDWR
| BDRV_O_SNAPSHOT
| BDRV_O_NO_BACKING
);
830 ret
= bdrv_open(bs
->backing_hd
, backing_filename
, back_flags
, back_drv
);
835 if (bs
->is_temporary
) {
836 bs
->backing_hd
->keep_read_only
= !(flags
& BDRV_O_RDWR
);
838 /* base image inherits from "parent" */
839 bs
->backing_hd
->keep_read_only
= bs
->keep_read_only
;
843 if (!bdrv_key_required(bs
)) {
844 bdrv_dev_change_media_cb(bs
, true);
847 /* throttling disk I/O limits */
848 if (bs
->io_limits_enabled
) {
849 bdrv_io_limits_enable(bs
);
855 if (bs
->is_temporary
) {
861 void bdrv_close(BlockDriverState
*bs
)
866 block_job_cancel_sync(bs
->job
);
870 if (bs
== bs_snapshots
) {
873 if (bs
->backing_hd
) {
874 bdrv_delete(bs
->backing_hd
);
875 bs
->backing_hd
= NULL
;
877 bs
->drv
->bdrv_close(bs
);
880 if (bs
->is_temporary
) {
881 unlink(bs
->filename
);
886 bs
->copy_on_read
= 0;
887 bs
->backing_file
[0] = '\0';
888 bs
->backing_format
[0] = '\0';
889 bs
->total_sectors
= 0;
895 if (bs
->file
!= NULL
) {
896 bdrv_delete(bs
->file
);
900 bdrv_dev_change_media_cb(bs
, false);
903 /*throttling disk I/O limits*/
904 if (bs
->io_limits_enabled
) {
905 bdrv_io_limits_disable(bs
);
909 void bdrv_close_all(void)
911 BlockDriverState
*bs
;
913 QTAILQ_FOREACH(bs
, &bdrv_states
, list
) {
919 * Wait for pending requests to complete across all BlockDriverStates
921 * This function does not flush data to disk, use bdrv_flush_all() for that
922 * after calling this function.
924 * Note that completion of an asynchronous I/O operation can trigger any
925 * number of other I/O operations on other devices---for example a coroutine
926 * can be arbitrarily complex and a constant flow of I/O can come until the
927 * coroutine is complete. Because of this, it is not possible to have a
928 * function to drain a single device's I/O queue.
930 void bdrv_drain_all(void)
932 BlockDriverState
*bs
;
936 busy
= qemu_aio_wait();
938 /* FIXME: We do not have timer support here, so this is effectively
941 QTAILQ_FOREACH(bs
, &bdrv_states
, list
) {
942 if (!qemu_co_queue_empty(&bs
->throttled_reqs
)) {
943 qemu_co_queue_restart_all(&bs
->throttled_reqs
);
949 /* If requests are still pending there is a bug somewhere */
950 QTAILQ_FOREACH(bs
, &bdrv_states
, list
) {
951 assert(QLIST_EMPTY(&bs
->tracked_requests
));
952 assert(qemu_co_queue_empty(&bs
->throttled_reqs
));
956 /* make a BlockDriverState anonymous by removing from bdrv_state list.
957 Also, NULL terminate the device_name to prevent double remove */
958 void bdrv_make_anon(BlockDriverState
*bs
)
960 if (bs
->device_name
[0] != '\0') {
961 QTAILQ_REMOVE(&bdrv_states
, bs
, list
);
963 bs
->device_name
[0] = '\0';
966 static void bdrv_rebind(BlockDriverState
*bs
)
968 if (bs
->drv
&& bs
->drv
->bdrv_rebind
) {
969 bs
->drv
->bdrv_rebind(bs
);
974 * Add new bs contents at the top of an image chain while the chain is
975 * live, while keeping required fields on the top layer.
977 * This will modify the BlockDriverState fields, and swap contents
978 * between bs_new and bs_top. Both bs_new and bs_top are modified.
980 * bs_new is required to be anonymous.
982 * This function does not create any image files.
984 void bdrv_append(BlockDriverState
*bs_new
, BlockDriverState
*bs_top
)
986 BlockDriverState tmp
;
988 /* bs_new must be anonymous */
989 assert(bs_new
->device_name
[0] == '\0');
993 /* there are some fields that need to stay on the top layer: */
994 tmp
.open_flags
= bs_top
->open_flags
;
997 tmp
.dev_ops
= bs_top
->dev_ops
;
998 tmp
.dev_opaque
= bs_top
->dev_opaque
;
999 tmp
.dev
= bs_top
->dev
;
1000 tmp
.buffer_alignment
= bs_top
->buffer_alignment
;
1001 tmp
.copy_on_read
= bs_top
->copy_on_read
;
1003 tmp
.enable_write_cache
= bs_top
->enable_write_cache
;
1005 /* i/o timing parameters */
1006 tmp
.slice_time
= bs_top
->slice_time
;
1007 tmp
.slice_start
= bs_top
->slice_start
;
1008 tmp
.slice_end
= bs_top
->slice_end
;
1009 tmp
.io_limits
= bs_top
->io_limits
;
1010 tmp
.io_base
= bs_top
->io_base
;
1011 tmp
.throttled_reqs
= bs_top
->throttled_reqs
;
1012 tmp
.block_timer
= bs_top
->block_timer
;
1013 tmp
.io_limits_enabled
= bs_top
->io_limits_enabled
;
1016 tmp
.cyls
= bs_top
->cyls
;
1017 tmp
.heads
= bs_top
->heads
;
1018 tmp
.secs
= bs_top
->secs
;
1019 tmp
.translation
= bs_top
->translation
;
1022 tmp
.on_read_error
= bs_top
->on_read_error
;
1023 tmp
.on_write_error
= bs_top
->on_write_error
;
1026 tmp
.iostatus_enabled
= bs_top
->iostatus_enabled
;
1027 tmp
.iostatus
= bs_top
->iostatus
;
1029 /* keep the same entry in bdrv_states */
1030 pstrcpy(tmp
.device_name
, sizeof(tmp
.device_name
), bs_top
->device_name
);
1031 tmp
.list
= bs_top
->list
;
1033 /* The contents of 'tmp' will become bs_top, as we are
1034 * swapping bs_new and bs_top contents. */
1035 tmp
.backing_hd
= bs_new
;
1036 pstrcpy(tmp
.backing_file
, sizeof(tmp
.backing_file
), bs_top
->filename
);
1037 bdrv_get_format(bs_top
, tmp
.backing_format
, sizeof(tmp
.backing_format
));
1039 /* swap contents of the fixed new bs and the current top */
1043 /* device_name[] was carried over from the old bs_top. bs_new
1044 * shouldn't be in bdrv_states, so we need to make device_name[]
1045 * reflect the anonymity of bs_new
1047 bs_new
->device_name
[0] = '\0';
1049 /* clear the copied fields in the new backing file */
1050 bdrv_detach_dev(bs_new
, bs_new
->dev
);
1052 qemu_co_queue_init(&bs_new
->throttled_reqs
);
1053 memset(&bs_new
->io_base
, 0, sizeof(bs_new
->io_base
));
1054 memset(&bs_new
->io_limits
, 0, sizeof(bs_new
->io_limits
));
1055 bdrv_iostatus_disable(bs_new
);
1057 /* we don't use bdrv_io_limits_disable() for this, because we don't want
1058 * to affect or delete the block_timer, as it has been moved to bs_top */
1059 bs_new
->io_limits_enabled
= false;
1060 bs_new
->block_timer
= NULL
;
1061 bs_new
->slice_time
= 0;
1062 bs_new
->slice_start
= 0;
1063 bs_new
->slice_end
= 0;
1065 bdrv_rebind(bs_new
);
1066 bdrv_rebind(bs_top
);
1069 void bdrv_delete(BlockDriverState
*bs
)
1073 assert(!bs
->in_use
);
1075 /* remove from list, if necessary */
1080 assert(bs
!= bs_snapshots
);
1084 int bdrv_attach_dev(BlockDriverState
*bs
, void *dev
)
1085 /* TODO change to DeviceState *dev when all users are qdevified */
1091 bdrv_iostatus_reset(bs
);
1095 /* TODO qdevified devices don't use this, remove when devices are qdevified */
1096 void bdrv_attach_dev_nofail(BlockDriverState
*bs
, void *dev
)
1098 if (bdrv_attach_dev(bs
, dev
) < 0) {
1103 void bdrv_detach_dev(BlockDriverState
*bs
, void *dev
)
1104 /* TODO change to DeviceState *dev when all users are qdevified */
1106 assert(bs
->dev
== dev
);
1109 bs
->dev_opaque
= NULL
;
1110 bs
->buffer_alignment
= 512;
1113 /* TODO change to return DeviceState * when all users are qdevified */
1114 void *bdrv_get_attached_dev(BlockDriverState
*bs
)
1119 void bdrv_set_dev_ops(BlockDriverState
*bs
, const BlockDevOps
*ops
,
1123 bs
->dev_opaque
= opaque
;
1124 if (bdrv_dev_has_removable_media(bs
) && bs
== bs_snapshots
) {
1125 bs_snapshots
= NULL
;
1129 void bdrv_emit_qmp_error_event(const BlockDriverState
*bdrv
,
1130 BlockQMPEventAction action
, int is_read
)
1133 const char *action_str
;
1136 case BDRV_ACTION_REPORT
:
1137 action_str
= "report";
1139 case BDRV_ACTION_IGNORE
:
1140 action_str
= "ignore";
1142 case BDRV_ACTION_STOP
:
1143 action_str
= "stop";
1149 data
= qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
1152 is_read
? "read" : "write");
1153 monitor_protocol_event(QEVENT_BLOCK_IO_ERROR
, data
);
1155 qobject_decref(data
);
1158 static void bdrv_emit_qmp_eject_event(BlockDriverState
*bs
, bool ejected
)
1162 data
= qobject_from_jsonf("{ 'device': %s, 'tray-open': %i }",
1163 bdrv_get_device_name(bs
), ejected
);
1164 monitor_protocol_event(QEVENT_DEVICE_TRAY_MOVED
, data
);
1166 qobject_decref(data
);
1169 static void bdrv_dev_change_media_cb(BlockDriverState
*bs
, bool load
)
1171 if (bs
->dev_ops
&& bs
->dev_ops
->change_media_cb
) {
1172 bool tray_was_closed
= !bdrv_dev_is_tray_open(bs
);
1173 bs
->dev_ops
->change_media_cb(bs
->dev_opaque
, load
);
1174 if (tray_was_closed
) {
1176 bdrv_emit_qmp_eject_event(bs
, true);
1180 bdrv_emit_qmp_eject_event(bs
, false);
1185 bool bdrv_dev_has_removable_media(BlockDriverState
*bs
)
1187 return !bs
->dev
|| (bs
->dev_ops
&& bs
->dev_ops
->change_media_cb
);
1190 void bdrv_dev_eject_request(BlockDriverState
*bs
, bool force
)
1192 if (bs
->dev_ops
&& bs
->dev_ops
->eject_request_cb
) {
1193 bs
->dev_ops
->eject_request_cb(bs
->dev_opaque
, force
);
1197 bool bdrv_dev_is_tray_open(BlockDriverState
*bs
)
1199 if (bs
->dev_ops
&& bs
->dev_ops
->is_tray_open
) {
1200 return bs
->dev_ops
->is_tray_open(bs
->dev_opaque
);
1205 static void bdrv_dev_resize_cb(BlockDriverState
*bs
)
1207 if (bs
->dev_ops
&& bs
->dev_ops
->resize_cb
) {
1208 bs
->dev_ops
->resize_cb(bs
->dev_opaque
);
1212 bool bdrv_dev_is_medium_locked(BlockDriverState
*bs
)
1214 if (bs
->dev_ops
&& bs
->dev_ops
->is_medium_locked
) {
1215 return bs
->dev_ops
->is_medium_locked(bs
->dev_opaque
);
1221 * Run consistency checks on an image
1223 * Returns 0 if the check could be completed (it doesn't mean that the image is
1224 * free of errors) or -errno when an internal error occurred. The results of the
1225 * check are stored in res.
1227 int bdrv_check(BlockDriverState
*bs
, BdrvCheckResult
*res
, BdrvCheckMode fix
)
1229 if (bs
->drv
->bdrv_check
== NULL
) {
1233 memset(res
, 0, sizeof(*res
));
1234 return bs
->drv
->bdrv_check(bs
, res
, fix
);
1237 #define COMMIT_BUF_SECTORS 2048
1239 /* commit COW file into the raw image */
1240 int bdrv_commit(BlockDriverState
*bs
)
1242 BlockDriver
*drv
= bs
->drv
;
1243 BlockDriver
*backing_drv
;
1244 int64_t sector
, total_sectors
;
1245 int n
, ro
, open_flags
;
1246 int ret
= 0, rw_ret
= 0;
1248 char filename
[1024];
1249 BlockDriverState
*bs_rw
, *bs_ro
;
1254 if (!bs
->backing_hd
) {
1258 if (bs
->backing_hd
->keep_read_only
) {
1262 if (bdrv_in_use(bs
) || bdrv_in_use(bs
->backing_hd
)) {
1266 backing_drv
= bs
->backing_hd
->drv
;
1267 ro
= bs
->backing_hd
->read_only
;
1268 strncpy(filename
, bs
->backing_hd
->filename
, sizeof(filename
));
1269 open_flags
= bs
->backing_hd
->open_flags
;
1273 bdrv_delete(bs
->backing_hd
);
1274 bs
->backing_hd
= NULL
;
1275 bs_rw
= bdrv_new("");
1276 rw_ret
= bdrv_open(bs_rw
, filename
, open_flags
| BDRV_O_RDWR
,
1280 /* try to re-open read-only */
1281 bs_ro
= bdrv_new("");
1282 ret
= bdrv_open(bs_ro
, filename
, open_flags
& ~BDRV_O_RDWR
,
1286 /* drive not functional anymore */
1290 bs
->backing_hd
= bs_ro
;
1293 bs
->backing_hd
= bs_rw
;
1296 total_sectors
= bdrv_getlength(bs
) >> BDRV_SECTOR_BITS
;
1297 buf
= g_malloc(COMMIT_BUF_SECTORS
* BDRV_SECTOR_SIZE
);
1299 for (sector
= 0; sector
< total_sectors
; sector
+= n
) {
1300 if (bdrv_is_allocated(bs
, sector
, COMMIT_BUF_SECTORS
, &n
)) {
1302 if (bdrv_read(bs
, sector
, buf
, n
) != 0) {
1307 if (bdrv_write(bs
->backing_hd
, sector
, buf
, n
) != 0) {
1314 if (drv
->bdrv_make_empty
) {
1315 ret
= drv
->bdrv_make_empty(bs
);
1320 * Make sure all data we wrote to the backing device is actually
1324 bdrv_flush(bs
->backing_hd
);
1331 bdrv_delete(bs
->backing_hd
);
1332 bs
->backing_hd
= NULL
;
1333 bs_ro
= bdrv_new("");
1334 ret
= bdrv_open(bs_ro
, filename
, open_flags
& ~BDRV_O_RDWR
,
1338 /* drive not functional anymore */
1342 bs
->backing_hd
= bs_ro
;
1343 bs
->backing_hd
->keep_read_only
= 0;
1349 int bdrv_commit_all(void)
1351 BlockDriverState
*bs
;
1353 QTAILQ_FOREACH(bs
, &bdrv_states
, list
) {
1354 int ret
= bdrv_commit(bs
);
1362 struct BdrvTrackedRequest
{
1363 BlockDriverState
*bs
;
1367 QLIST_ENTRY(BdrvTrackedRequest
) list
;
1368 Coroutine
*co
; /* owner, used for deadlock detection */
1369 CoQueue wait_queue
; /* coroutines blocked on this request */
1373 * Remove an active request from the tracked requests list
1375 * This function should be called when a tracked request is completing.
1377 static void tracked_request_end(BdrvTrackedRequest
*req
)
1379 QLIST_REMOVE(req
, list
);
1380 qemu_co_queue_restart_all(&req
->wait_queue
);
1384 * Add an active request to the tracked requests list
1386 static void tracked_request_begin(BdrvTrackedRequest
*req
,
1387 BlockDriverState
*bs
,
1389 int nb_sectors
, bool is_write
)
1391 *req
= (BdrvTrackedRequest
){
1393 .sector_num
= sector_num
,
1394 .nb_sectors
= nb_sectors
,
1395 .is_write
= is_write
,
1396 .co
= qemu_coroutine_self(),
1399 qemu_co_queue_init(&req
->wait_queue
);
1401 QLIST_INSERT_HEAD(&bs
->tracked_requests
, req
, list
);
1405 * Round a region to cluster boundaries
1407 static void round_to_clusters(BlockDriverState
*bs
,
1408 int64_t sector_num
, int nb_sectors
,
1409 int64_t *cluster_sector_num
,
1410 int *cluster_nb_sectors
)
1412 BlockDriverInfo bdi
;
1414 if (bdrv_get_info(bs
, &bdi
) < 0 || bdi
.cluster_size
== 0) {
1415 *cluster_sector_num
= sector_num
;
1416 *cluster_nb_sectors
= nb_sectors
;
1418 int64_t c
= bdi
.cluster_size
/ BDRV_SECTOR_SIZE
;
1419 *cluster_sector_num
= QEMU_ALIGN_DOWN(sector_num
, c
);
1420 *cluster_nb_sectors
= QEMU_ALIGN_UP(sector_num
- *cluster_sector_num
+
1425 static bool tracked_request_overlaps(BdrvTrackedRequest
*req
,
1426 int64_t sector_num
, int nb_sectors
) {
1428 if (sector_num
>= req
->sector_num
+ req
->nb_sectors
) {
1432 if (req
->sector_num
>= sector_num
+ nb_sectors
) {
1438 static void coroutine_fn
wait_for_overlapping_requests(BlockDriverState
*bs
,
1439 int64_t sector_num
, int nb_sectors
)
1441 BdrvTrackedRequest
*req
;
1442 int64_t cluster_sector_num
;
1443 int cluster_nb_sectors
;
1446 /* If we touch the same cluster it counts as an overlap. This guarantees
1447 * that allocating writes will be serialized and not race with each other
1448 * for the same cluster. For example, in copy-on-read it ensures that the
1449 * CoR read and write operations are atomic and guest writes cannot
1450 * interleave between them.
1452 round_to_clusters(bs
, sector_num
, nb_sectors
,
1453 &cluster_sector_num
, &cluster_nb_sectors
);
1457 QLIST_FOREACH(req
, &bs
->tracked_requests
, list
) {
1458 if (tracked_request_overlaps(req
, cluster_sector_num
,
1459 cluster_nb_sectors
)) {
1460 /* Hitting this means there was a reentrant request, for
1461 * example, a block driver issuing nested requests. This must
1462 * never happen since it means deadlock.
1464 assert(qemu_coroutine_self() != req
->co
);
1466 qemu_co_queue_wait(&req
->wait_queue
);
1477 * -EINVAL - backing format specified, but no file
1478 * -ENOSPC - can't update the backing file because no space is left in the
1480 * -ENOTSUP - format driver doesn't support changing the backing file
1482 int bdrv_change_backing_file(BlockDriverState
*bs
,
1483 const char *backing_file
, const char *backing_fmt
)
1485 BlockDriver
*drv
= bs
->drv
;
1488 /* Backing file format doesn't make sense without a backing file */
1489 if (backing_fmt
&& !backing_file
) {
1493 if (drv
->bdrv_change_backing_file
!= NULL
) {
1494 ret
= drv
->bdrv_change_backing_file(bs
, backing_file
, backing_fmt
);
1500 pstrcpy(bs
->backing_file
, sizeof(bs
->backing_file
), backing_file
?: "");
1501 pstrcpy(bs
->backing_format
, sizeof(bs
->backing_format
), backing_fmt
?: "");
1506 static int bdrv_check_byte_request(BlockDriverState
*bs
, int64_t offset
,
1511 if (!bdrv_is_inserted(bs
))
1517 len
= bdrv_getlength(bs
);
1522 if ((offset
> len
) || (len
- offset
< size
))
1528 static int bdrv_check_request(BlockDriverState
*bs
, int64_t sector_num
,
1531 return bdrv_check_byte_request(bs
, sector_num
* BDRV_SECTOR_SIZE
,
1532 nb_sectors
* BDRV_SECTOR_SIZE
);
1535 typedef struct RwCo
{
1536 BlockDriverState
*bs
;
1544 static void coroutine_fn
bdrv_rw_co_entry(void *opaque
)
1546 RwCo
*rwco
= opaque
;
1548 if (!rwco
->is_write
) {
1549 rwco
->ret
= bdrv_co_do_readv(rwco
->bs
, rwco
->sector_num
,
1550 rwco
->nb_sectors
, rwco
->qiov
, 0);
1552 rwco
->ret
= bdrv_co_do_writev(rwco
->bs
, rwco
->sector_num
,
1553 rwco
->nb_sectors
, rwco
->qiov
, 0);
1558 * Process a synchronous request using coroutines
1560 static int bdrv_rw_co(BlockDriverState
*bs
, int64_t sector_num
, uint8_t *buf
,
1561 int nb_sectors
, bool is_write
)
1564 struct iovec iov
= {
1565 .iov_base
= (void *)buf
,
1566 .iov_len
= nb_sectors
* BDRV_SECTOR_SIZE
,
1571 .sector_num
= sector_num
,
1572 .nb_sectors
= nb_sectors
,
1574 .is_write
= is_write
,
1578 qemu_iovec_init_external(&qiov
, &iov
, 1);
1581 * In sync call context, when the vcpu is blocked, this throttling timer
1582 * will not fire; so the I/O throttling function has to be disabled here
1583 * if it has been enabled.
1585 if (bs
->io_limits_enabled
) {
1586 fprintf(stderr
, "Disabling I/O throttling on '%s' due "
1587 "to synchronous I/O.\n", bdrv_get_device_name(bs
));
1588 bdrv_io_limits_disable(bs
);
1591 if (qemu_in_coroutine()) {
1592 /* Fast-path if already in coroutine context */
1593 bdrv_rw_co_entry(&rwco
);
1595 co
= qemu_coroutine_create(bdrv_rw_co_entry
);
1596 qemu_coroutine_enter(co
, &rwco
);
1597 while (rwco
.ret
== NOT_DONE
) {
1604 /* return < 0 if error. See bdrv_write() for the return codes */
1605 int bdrv_read(BlockDriverState
*bs
, int64_t sector_num
,
1606 uint8_t *buf
, int nb_sectors
)
1608 return bdrv_rw_co(bs
, sector_num
, buf
, nb_sectors
, false);
1611 #define BITS_PER_LONG (sizeof(unsigned long) * 8)
1613 static void set_dirty_bitmap(BlockDriverState
*bs
, int64_t sector_num
,
1614 int nb_sectors
, int dirty
)
1617 unsigned long val
, idx
, bit
;
1619 start
= sector_num
/ BDRV_SECTORS_PER_DIRTY_CHUNK
;
1620 end
= (sector_num
+ nb_sectors
- 1) / BDRV_SECTORS_PER_DIRTY_CHUNK
;
1622 for (; start
<= end
; start
++) {
1623 idx
= start
/ BITS_PER_LONG
;
1624 bit
= start
% BITS_PER_LONG
;
1625 val
= bs
->dirty_bitmap
[idx
];
1627 if (!(val
& (1UL << bit
))) {
1632 if (val
& (1UL << bit
)) {
1634 val
&= ~(1UL << bit
);
1637 bs
->dirty_bitmap
[idx
] = val
;
1641 /* Return < 0 if error. Important errors are:
1642 -EIO generic I/O error (may happen for all errors)
1643 -ENOMEDIUM No media inserted.
1644 -EINVAL Invalid sector number or nb_sectors
1645 -EACCES Trying to write a read-only device
1647 int bdrv_write(BlockDriverState
*bs
, int64_t sector_num
,
1648 const uint8_t *buf
, int nb_sectors
)
1650 return bdrv_rw_co(bs
, sector_num
, (uint8_t *)buf
, nb_sectors
, true);
1653 int bdrv_pread(BlockDriverState
*bs
, int64_t offset
,
1654 void *buf
, int count1
)
1656 uint8_t tmp_buf
[BDRV_SECTOR_SIZE
];
1657 int len
, nb_sectors
, count
;
1662 /* first read to align to sector start */
1663 len
= (BDRV_SECTOR_SIZE
- offset
) & (BDRV_SECTOR_SIZE
- 1);
1666 sector_num
= offset
>> BDRV_SECTOR_BITS
;
1668 if ((ret
= bdrv_read(bs
, sector_num
, tmp_buf
, 1)) < 0)
1670 memcpy(buf
, tmp_buf
+ (offset
& (BDRV_SECTOR_SIZE
- 1)), len
);
1678 /* read the sectors "in place" */
1679 nb_sectors
= count
>> BDRV_SECTOR_BITS
;
1680 if (nb_sectors
> 0) {
1681 if ((ret
= bdrv_read(bs
, sector_num
, buf
, nb_sectors
)) < 0)
1683 sector_num
+= nb_sectors
;
1684 len
= nb_sectors
<< BDRV_SECTOR_BITS
;
1689 /* add data from the last sector */
1691 if ((ret
= bdrv_read(bs
, sector_num
, tmp_buf
, 1)) < 0)
1693 memcpy(buf
, tmp_buf
, count
);
1698 int bdrv_pwrite(BlockDriverState
*bs
, int64_t offset
,
1699 const void *buf
, int count1
)
1701 uint8_t tmp_buf
[BDRV_SECTOR_SIZE
];
1702 int len
, nb_sectors
, count
;
1707 /* first write to align to sector start */
1708 len
= (BDRV_SECTOR_SIZE
- offset
) & (BDRV_SECTOR_SIZE
- 1);
1711 sector_num
= offset
>> BDRV_SECTOR_BITS
;
1713 if ((ret
= bdrv_read(bs
, sector_num
, tmp_buf
, 1)) < 0)
1715 memcpy(tmp_buf
+ (offset
& (BDRV_SECTOR_SIZE
- 1)), buf
, len
);
1716 if ((ret
= bdrv_write(bs
, sector_num
, tmp_buf
, 1)) < 0)
1725 /* write the sectors "in place" */
1726 nb_sectors
= count
>> BDRV_SECTOR_BITS
;
1727 if (nb_sectors
> 0) {
1728 if ((ret
= bdrv_write(bs
, sector_num
, buf
, nb_sectors
)) < 0)
1730 sector_num
+= nb_sectors
;
1731 len
= nb_sectors
<< BDRV_SECTOR_BITS
;
1736 /* add data from the last sector */
1738 if ((ret
= bdrv_read(bs
, sector_num
, tmp_buf
, 1)) < 0)
1740 memcpy(tmp_buf
, buf
, count
);
1741 if ((ret
= bdrv_write(bs
, sector_num
, tmp_buf
, 1)) < 0)
1748 * Writes to the file and ensures that no writes are reordered across this
1749 * request (acts as a barrier)
1751 * Returns 0 on success, -errno in error cases.
1753 int bdrv_pwrite_sync(BlockDriverState
*bs
, int64_t offset
,
1754 const void *buf
, int count
)
1758 ret
= bdrv_pwrite(bs
, offset
, buf
, count
);
1763 /* No flush needed for cache modes that already do it */
1764 if (bs
->enable_write_cache
) {
1771 static int coroutine_fn
bdrv_co_do_copy_on_readv(BlockDriverState
*bs
,
1772 int64_t sector_num
, int nb_sectors
, QEMUIOVector
*qiov
)
1774 /* Perform I/O through a temporary buffer so that users who scribble over
1775 * their read buffer while the operation is in progress do not end up
1776 * modifying the image file. This is critical for zero-copy guest I/O
1777 * where anything might happen inside guest memory.
1779 void *bounce_buffer
;
1781 BlockDriver
*drv
= bs
->drv
;
1783 QEMUIOVector bounce_qiov
;
1784 int64_t cluster_sector_num
;
1785 int cluster_nb_sectors
;
1789 /* Cover entire cluster so no additional backing file I/O is required when
1790 * allocating cluster in the image file.
1792 round_to_clusters(bs
, sector_num
, nb_sectors
,
1793 &cluster_sector_num
, &cluster_nb_sectors
);
1795 trace_bdrv_co_do_copy_on_readv(bs
, sector_num
, nb_sectors
,
1796 cluster_sector_num
, cluster_nb_sectors
);
1798 iov
.iov_len
= cluster_nb_sectors
* BDRV_SECTOR_SIZE
;
1799 iov
.iov_base
= bounce_buffer
= qemu_blockalign(bs
, iov
.iov_len
);
1800 qemu_iovec_init_external(&bounce_qiov
, &iov
, 1);
1802 ret
= drv
->bdrv_co_readv(bs
, cluster_sector_num
, cluster_nb_sectors
,
1808 if (drv
->bdrv_co_write_zeroes
&&
1809 buffer_is_zero(bounce_buffer
, iov
.iov_len
)) {
1810 ret
= bdrv_co_do_write_zeroes(bs
, cluster_sector_num
,
1811 cluster_nb_sectors
);
1813 /* This does not change the data on the disk, it is not necessary
1814 * to flush even in cache=writethrough mode.
1816 ret
= drv
->bdrv_co_writev(bs
, cluster_sector_num
, cluster_nb_sectors
,
1821 /* It might be okay to ignore write errors for guest requests. If this
1822 * is a deliberate copy-on-read then we don't want to ignore the error.
1823 * Simply report it in all cases.
1828 skip_bytes
= (sector_num
- cluster_sector_num
) * BDRV_SECTOR_SIZE
;
1829 qemu_iovec_from_buffer(qiov
, bounce_buffer
+ skip_bytes
,
1830 nb_sectors
* BDRV_SECTOR_SIZE
);
1833 qemu_vfree(bounce_buffer
);
1838 * Handle a read request in coroutine context
1840 static int coroutine_fn
bdrv_co_do_readv(BlockDriverState
*bs
,
1841 int64_t sector_num
, int nb_sectors
, QEMUIOVector
*qiov
,
1842 BdrvRequestFlags flags
)
1844 BlockDriver
*drv
= bs
->drv
;
1845 BdrvTrackedRequest req
;
1851 if (bdrv_check_request(bs
, sector_num
, nb_sectors
)) {
1855 /* throttling disk read I/O */
1856 if (bs
->io_limits_enabled
) {
1857 bdrv_io_limits_intercept(bs
, false, nb_sectors
);
1860 if (bs
->copy_on_read
) {
1861 flags
|= BDRV_REQ_COPY_ON_READ
;
1863 if (flags
& BDRV_REQ_COPY_ON_READ
) {
1864 bs
->copy_on_read_in_flight
++;
1867 if (bs
->copy_on_read_in_flight
) {
1868 wait_for_overlapping_requests(bs
, sector_num
, nb_sectors
);
1871 tracked_request_begin(&req
, bs
, sector_num
, nb_sectors
, false);
1873 if (flags
& BDRV_REQ_COPY_ON_READ
) {
1876 ret
= bdrv_co_is_allocated(bs
, sector_num
, nb_sectors
, &pnum
);
1881 if (!ret
|| pnum
!= nb_sectors
) {
1882 ret
= bdrv_co_do_copy_on_readv(bs
, sector_num
, nb_sectors
, qiov
);
1887 ret
= drv
->bdrv_co_readv(bs
, sector_num
, nb_sectors
, qiov
);
1890 tracked_request_end(&req
);
1892 if (flags
& BDRV_REQ_COPY_ON_READ
) {
1893 bs
->copy_on_read_in_flight
--;
1899 int coroutine_fn
bdrv_co_readv(BlockDriverState
*bs
, int64_t sector_num
,
1900 int nb_sectors
, QEMUIOVector
*qiov
)
1902 trace_bdrv_co_readv(bs
, sector_num
, nb_sectors
);
1904 return bdrv_co_do_readv(bs
, sector_num
, nb_sectors
, qiov
, 0);
1907 int coroutine_fn
bdrv_co_copy_on_readv(BlockDriverState
*bs
,
1908 int64_t sector_num
, int nb_sectors
, QEMUIOVector
*qiov
)
1910 trace_bdrv_co_copy_on_readv(bs
, sector_num
, nb_sectors
);
1912 return bdrv_co_do_readv(bs
, sector_num
, nb_sectors
, qiov
,
1913 BDRV_REQ_COPY_ON_READ
);
1916 static int coroutine_fn
bdrv_co_do_write_zeroes(BlockDriverState
*bs
,
1917 int64_t sector_num
, int nb_sectors
)
1919 BlockDriver
*drv
= bs
->drv
;
1924 /* TODO Emulate only part of misaligned requests instead of letting block
1925 * drivers return -ENOTSUP and emulate everything */
1927 /* First try the efficient write zeroes operation */
1928 if (drv
->bdrv_co_write_zeroes
) {
1929 ret
= drv
->bdrv_co_write_zeroes(bs
, sector_num
, nb_sectors
);
1930 if (ret
!= -ENOTSUP
) {
1935 /* Fall back to bounce buffer if write zeroes is unsupported */
1936 iov
.iov_len
= nb_sectors
* BDRV_SECTOR_SIZE
;
1937 iov
.iov_base
= qemu_blockalign(bs
, iov
.iov_len
);
1938 memset(iov
.iov_base
, 0, iov
.iov_len
);
1939 qemu_iovec_init_external(&qiov
, &iov
, 1);
1941 ret
= drv
->bdrv_co_writev(bs
, sector_num
, nb_sectors
, &qiov
);
1943 qemu_vfree(iov
.iov_base
);
1948 * Handle a write request in coroutine context
1950 static int coroutine_fn
bdrv_co_do_writev(BlockDriverState
*bs
,
1951 int64_t sector_num
, int nb_sectors
, QEMUIOVector
*qiov
,
1952 BdrvRequestFlags flags
)
1954 BlockDriver
*drv
= bs
->drv
;
1955 BdrvTrackedRequest req
;
1961 if (bs
->read_only
) {
1964 if (bdrv_check_request(bs
, sector_num
, nb_sectors
)) {
1968 /* throttling disk write I/O */
1969 if (bs
->io_limits_enabled
) {
1970 bdrv_io_limits_intercept(bs
, true, nb_sectors
);
1973 if (bs
->copy_on_read_in_flight
) {
1974 wait_for_overlapping_requests(bs
, sector_num
, nb_sectors
);
1977 tracked_request_begin(&req
, bs
, sector_num
, nb_sectors
, true);
1979 if (flags
& BDRV_REQ_ZERO_WRITE
) {
1980 ret
= bdrv_co_do_write_zeroes(bs
, sector_num
, nb_sectors
);
1982 ret
= drv
->bdrv_co_writev(bs
, sector_num
, nb_sectors
, qiov
);
1985 if (ret
== 0 && !bs
->enable_write_cache
) {
1986 ret
= bdrv_co_flush(bs
);
1989 if (bs
->dirty_bitmap
) {
1990 set_dirty_bitmap(bs
, sector_num
, nb_sectors
, 1);
1993 if (bs
->wr_highest_sector
< sector_num
+ nb_sectors
- 1) {
1994 bs
->wr_highest_sector
= sector_num
+ nb_sectors
- 1;
1997 tracked_request_end(&req
);
2002 int coroutine_fn
bdrv_co_writev(BlockDriverState
*bs
, int64_t sector_num
,
2003 int nb_sectors
, QEMUIOVector
*qiov
)
2005 trace_bdrv_co_writev(bs
, sector_num
, nb_sectors
);
2007 return bdrv_co_do_writev(bs
, sector_num
, nb_sectors
, qiov
, 0);
2010 int coroutine_fn
bdrv_co_write_zeroes(BlockDriverState
*bs
,
2011 int64_t sector_num
, int nb_sectors
)
2013 trace_bdrv_co_write_zeroes(bs
, sector_num
, nb_sectors
);
2015 return bdrv_co_do_writev(bs
, sector_num
, nb_sectors
, NULL
,
2016 BDRV_REQ_ZERO_WRITE
);
2020 * Truncate file to 'offset' bytes (needed only for file protocols)
2022 int bdrv_truncate(BlockDriverState
*bs
, int64_t offset
)
2024 BlockDriver
*drv
= bs
->drv
;
2028 if (!drv
->bdrv_truncate
)
2032 if (bdrv_in_use(bs
))
2034 ret
= drv
->bdrv_truncate(bs
, offset
);
2036 ret
= refresh_total_sectors(bs
, offset
>> BDRV_SECTOR_BITS
);
2037 bdrv_dev_resize_cb(bs
);
2043 * Length of a allocated file in bytes. Sparse files are counted by actual
2044 * allocated space. Return < 0 if error or unknown.
2046 int64_t bdrv_get_allocated_file_size(BlockDriverState
*bs
)
2048 BlockDriver
*drv
= bs
->drv
;
2052 if (drv
->bdrv_get_allocated_file_size
) {
2053 return drv
->bdrv_get_allocated_file_size(bs
);
2056 return bdrv_get_allocated_file_size(bs
->file
);
2062 * Length of a file in bytes. Return < 0 if error or unknown.
2064 int64_t bdrv_getlength(BlockDriverState
*bs
)
2066 BlockDriver
*drv
= bs
->drv
;
2070 if (bs
->growable
|| bdrv_dev_has_removable_media(bs
)) {
2071 if (drv
->bdrv_getlength
) {
2072 return drv
->bdrv_getlength(bs
);
2075 return bs
->total_sectors
* BDRV_SECTOR_SIZE
;
2078 /* return 0 as number of sectors if no device present or error */
2079 void bdrv_get_geometry(BlockDriverState
*bs
, uint64_t *nb_sectors_ptr
)
2082 length
= bdrv_getlength(bs
);
2086 length
= length
>> BDRV_SECTOR_BITS
;
2087 *nb_sectors_ptr
= length
;
2091 uint8_t boot_ind
; /* 0x80 - active */
2092 uint8_t head
; /* starting head */
2093 uint8_t sector
; /* starting sector */
2094 uint8_t cyl
; /* starting cylinder */
2095 uint8_t sys_ind
; /* What partition type */
2096 uint8_t end_head
; /* end head */
2097 uint8_t end_sector
; /* end sector */
2098 uint8_t end_cyl
; /* end cylinder */
2099 uint32_t start_sect
; /* starting sector counting from 0 */
2100 uint32_t nr_sects
; /* nr of sectors in partition */
2103 /* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
2104 static int guess_disk_lchs(BlockDriverState
*bs
,
2105 int *pcylinders
, int *pheads
, int *psectors
)
2107 uint8_t buf
[BDRV_SECTOR_SIZE
];
2108 int ret
, i
, heads
, sectors
, cylinders
;
2109 struct partition
*p
;
2111 uint64_t nb_sectors
;
2114 bdrv_get_geometry(bs
, &nb_sectors
);
2117 * The function will be invoked during startup not only in sync I/O mode,
2118 * but also in async I/O mode. So the I/O throttling function has to
2119 * be disabled temporarily here, not permanently.
2121 enabled
= bs
->io_limits_enabled
;
2122 bs
->io_limits_enabled
= false;
2123 ret
= bdrv_read(bs
, 0, buf
, 1);
2124 bs
->io_limits_enabled
= enabled
;
2127 /* test msdos magic */
2128 if (buf
[510] != 0x55 || buf
[511] != 0xaa)
2130 for(i
= 0; i
< 4; i
++) {
2131 p
= ((struct partition
*)(buf
+ 0x1be)) + i
;
2132 nr_sects
= le32_to_cpu(p
->nr_sects
);
2133 if (nr_sects
&& p
->end_head
) {
2134 /* We make the assumption that the partition terminates on
2135 a cylinder boundary */
2136 heads
= p
->end_head
+ 1;
2137 sectors
= p
->end_sector
& 63;
2140 cylinders
= nb_sectors
/ (heads
* sectors
);
2141 if (cylinders
< 1 || cylinders
> 16383)
2144 *psectors
= sectors
;
2145 *pcylinders
= cylinders
;
2147 printf("guessed geometry: LCHS=%d %d %d\n",
2148 cylinders
, heads
, sectors
);
2156 void bdrv_guess_geometry(BlockDriverState
*bs
, int *pcyls
, int *pheads
, int *psecs
)
2158 int translation
, lba_detected
= 0;
2159 int cylinders
, heads
, secs
;
2160 uint64_t nb_sectors
;
2162 /* if a geometry hint is available, use it */
2163 bdrv_get_geometry(bs
, &nb_sectors
);
2164 bdrv_get_geometry_hint(bs
, &cylinders
, &heads
, &secs
);
2165 translation
= bdrv_get_translation_hint(bs
);
2166 if (cylinders
!= 0) {
2171 if (guess_disk_lchs(bs
, &cylinders
, &heads
, &secs
) == 0) {
2173 /* if heads > 16, it means that a BIOS LBA
2174 translation was active, so the default
2175 hardware geometry is OK */
2177 goto default_geometry
;
2182 /* disable any translation to be in sync with
2183 the logical geometry */
2184 if (translation
== BIOS_ATA_TRANSLATION_AUTO
) {
2185 bdrv_set_translation_hint(bs
,
2186 BIOS_ATA_TRANSLATION_NONE
);
2191 /* if no geometry, use a standard physical disk geometry */
2192 cylinders
= nb_sectors
/ (16 * 63);
2194 if (cylinders
> 16383)
2196 else if (cylinders
< 2)
2201 if ((lba_detected
== 1) && (translation
== BIOS_ATA_TRANSLATION_AUTO
)) {
2202 if ((*pcyls
* *pheads
) <= 131072) {
2203 bdrv_set_translation_hint(bs
,
2204 BIOS_ATA_TRANSLATION_LARGE
);
2206 bdrv_set_translation_hint(bs
,
2207 BIOS_ATA_TRANSLATION_LBA
);
2211 bdrv_set_geometry_hint(bs
, *pcyls
, *pheads
, *psecs
);
2215 void bdrv_set_geometry_hint(BlockDriverState
*bs
,
2216 int cyls
, int heads
, int secs
)
2223 void bdrv_set_translation_hint(BlockDriverState
*bs
, int translation
)
2225 bs
->translation
= translation
;
2228 void bdrv_get_geometry_hint(BlockDriverState
*bs
,
2229 int *pcyls
, int *pheads
, int *psecs
)
2232 *pheads
= bs
->heads
;
2236 /* throttling disk io limits */
2237 void bdrv_set_io_limits(BlockDriverState
*bs
,
2238 BlockIOLimit
*io_limits
)
2240 bs
->io_limits
= *io_limits
;
2241 bs
->io_limits_enabled
= bdrv_io_limits_enabled(bs
);
2244 /* Recognize floppy formats */
2245 typedef struct FDFormat
{
2253 static const FDFormat fd_formats
[] = {
2254 /* First entry is default format */
2255 /* 1.44 MB 3"1/2 floppy disks */
2256 { FDRIVE_DRV_144
, 18, 80, 1, FDRIVE_RATE_500K
, },
2257 { FDRIVE_DRV_144
, 20, 80, 1, FDRIVE_RATE_500K
, },
2258 { FDRIVE_DRV_144
, 21, 80, 1, FDRIVE_RATE_500K
, },
2259 { FDRIVE_DRV_144
, 21, 82, 1, FDRIVE_RATE_500K
, },
2260 { FDRIVE_DRV_144
, 21, 83, 1, FDRIVE_RATE_500K
, },
2261 { FDRIVE_DRV_144
, 22, 80, 1, FDRIVE_RATE_500K
, },
2262 { FDRIVE_DRV_144
, 23, 80, 1, FDRIVE_RATE_500K
, },
2263 { FDRIVE_DRV_144
, 24, 80, 1, FDRIVE_RATE_500K
, },
2264 /* 2.88 MB 3"1/2 floppy disks */
2265 { FDRIVE_DRV_288
, 36, 80, 1, FDRIVE_RATE_1M
, },
2266 { FDRIVE_DRV_288
, 39, 80, 1, FDRIVE_RATE_1M
, },
2267 { FDRIVE_DRV_288
, 40, 80, 1, FDRIVE_RATE_1M
, },
2268 { FDRIVE_DRV_288
, 44, 80, 1, FDRIVE_RATE_1M
, },
2269 { FDRIVE_DRV_288
, 48, 80, 1, FDRIVE_RATE_1M
, },
2270 /* 720 kB 3"1/2 floppy disks */
2271 { FDRIVE_DRV_144
, 9, 80, 1, FDRIVE_RATE_250K
, },
2272 { FDRIVE_DRV_144
, 10, 80, 1, FDRIVE_RATE_250K
, },
2273 { FDRIVE_DRV_144
, 10, 82, 1, FDRIVE_RATE_250K
, },
2274 { FDRIVE_DRV_144
, 10, 83, 1, FDRIVE_RATE_250K
, },
2275 { FDRIVE_DRV_144
, 13, 80, 1, FDRIVE_RATE_250K
, },
2276 { FDRIVE_DRV_144
, 14, 80, 1, FDRIVE_RATE_250K
, },
2277 /* 1.2 MB 5"1/4 floppy disks */
2278 { FDRIVE_DRV_120
, 15, 80, 1, FDRIVE_RATE_500K
, },
2279 { FDRIVE_DRV_120
, 18, 80, 1, FDRIVE_RATE_500K
, },
2280 { FDRIVE_DRV_120
, 18, 82, 1, FDRIVE_RATE_500K
, },
2281 { FDRIVE_DRV_120
, 18, 83, 1, FDRIVE_RATE_500K
, },
2282 { FDRIVE_DRV_120
, 20, 80, 1, FDRIVE_RATE_500K
, },
2283 /* 720 kB 5"1/4 floppy disks */
2284 { FDRIVE_DRV_120
, 9, 80, 1, FDRIVE_RATE_250K
, },
2285 { FDRIVE_DRV_120
, 11, 80, 1, FDRIVE_RATE_250K
, },
2286 /* 360 kB 5"1/4 floppy disks */
2287 { FDRIVE_DRV_120
, 9, 40, 1, FDRIVE_RATE_300K
, },
2288 { FDRIVE_DRV_120
, 9, 40, 0, FDRIVE_RATE_300K
, },
2289 { FDRIVE_DRV_120
, 10, 41, 1, FDRIVE_RATE_300K
, },
2290 { FDRIVE_DRV_120
, 10, 42, 1, FDRIVE_RATE_300K
, },
2291 /* 320 kB 5"1/4 floppy disks */
2292 { FDRIVE_DRV_120
, 8, 40, 1, FDRIVE_RATE_250K
, },
2293 { FDRIVE_DRV_120
, 8, 40, 0, FDRIVE_RATE_250K
, },
2294 /* 360 kB must match 5"1/4 better than 3"1/2... */
2295 { FDRIVE_DRV_144
, 9, 80, 0, FDRIVE_RATE_250K
, },
2297 { FDRIVE_DRV_NONE
, -1, -1, 0, 0, },
2300 void bdrv_get_floppy_geometry_hint(BlockDriverState
*bs
, int *nb_heads
,
2301 int *max_track
, int *last_sect
,
2302 FDriveType drive_in
, FDriveType
*drive
,
2305 const FDFormat
*parse
;
2306 uint64_t nb_sectors
, size
;
2307 int i
, first_match
, match
;
2309 bdrv_get_geometry_hint(bs
, nb_heads
, max_track
, last_sect
);
2310 if (*nb_heads
!= 0 && *max_track
!= 0 && *last_sect
!= 0) {
2311 /* User defined disk */
2312 *rate
= FDRIVE_RATE_500K
;
2314 bdrv_get_geometry(bs
, &nb_sectors
);
2317 for (i
= 0; ; i
++) {
2318 parse
= &fd_formats
[i
];
2319 if (parse
->drive
== FDRIVE_DRV_NONE
) {
2322 if (drive_in
== parse
->drive
||
2323 drive_in
== FDRIVE_DRV_NONE
) {
2324 size
= (parse
->max_head
+ 1) * parse
->max_track
*
2326 if (nb_sectors
== size
) {
2330 if (first_match
== -1) {
2336 if (first_match
== -1) {
2339 match
= first_match
;
2341 parse
= &fd_formats
[match
];
2343 *nb_heads
= parse
->max_head
+ 1;
2344 *max_track
= parse
->max_track
;
2345 *last_sect
= parse
->last_sect
;
2346 *drive
= parse
->drive
;
2347 *rate
= parse
->rate
;
2351 int bdrv_get_translation_hint(BlockDriverState
*bs
)
2353 return bs
->translation
;
2356 void bdrv_set_on_error(BlockDriverState
*bs
, BlockErrorAction on_read_error
,
2357 BlockErrorAction on_write_error
)
2359 bs
->on_read_error
= on_read_error
;
2360 bs
->on_write_error
= on_write_error
;
2363 BlockErrorAction
bdrv_get_on_error(BlockDriverState
*bs
, int is_read
)
2365 return is_read
? bs
->on_read_error
: bs
->on_write_error
;
2368 int bdrv_is_read_only(BlockDriverState
*bs
)
2370 return bs
->read_only
;
2373 int bdrv_is_sg(BlockDriverState
*bs
)
2378 int bdrv_enable_write_cache(BlockDriverState
*bs
)
2380 return bs
->enable_write_cache
;
2383 int bdrv_is_encrypted(BlockDriverState
*bs
)
2385 if (bs
->backing_hd
&& bs
->backing_hd
->encrypted
)
2387 return bs
->encrypted
;
2390 int bdrv_key_required(BlockDriverState
*bs
)
2392 BlockDriverState
*backing_hd
= bs
->backing_hd
;
2394 if (backing_hd
&& backing_hd
->encrypted
&& !backing_hd
->valid_key
)
2396 return (bs
->encrypted
&& !bs
->valid_key
);
2399 int bdrv_set_key(BlockDriverState
*bs
, const char *key
)
2402 if (bs
->backing_hd
&& bs
->backing_hd
->encrypted
) {
2403 ret
= bdrv_set_key(bs
->backing_hd
, key
);
2409 if (!bs
->encrypted
) {
2411 } else if (!bs
->drv
|| !bs
->drv
->bdrv_set_key
) {
2414 ret
= bs
->drv
->bdrv_set_key(bs
, key
);
2417 } else if (!bs
->valid_key
) {
2419 /* call the change callback now, we skipped it on open */
2420 bdrv_dev_change_media_cb(bs
, true);
2425 void bdrv_get_format(BlockDriverState
*bs
, char *buf
, int buf_size
)
2430 pstrcpy(buf
, buf_size
, bs
->drv
->format_name
);
2434 void bdrv_iterate_format(void (*it
)(void *opaque
, const char *name
),
2439 QLIST_FOREACH(drv
, &bdrv_drivers
, list
) {
2440 it(opaque
, drv
->format_name
);
2444 BlockDriverState
*bdrv_find(const char *name
)
2446 BlockDriverState
*bs
;
2448 QTAILQ_FOREACH(bs
, &bdrv_states
, list
) {
2449 if (!strcmp(name
, bs
->device_name
)) {
2456 BlockDriverState
*bdrv_next(BlockDriverState
*bs
)
2459 return QTAILQ_FIRST(&bdrv_states
);
2461 return QTAILQ_NEXT(bs
, list
);
2464 void bdrv_iterate(void (*it
)(void *opaque
, BlockDriverState
*bs
), void *opaque
)
2466 BlockDriverState
*bs
;
2468 QTAILQ_FOREACH(bs
, &bdrv_states
, list
) {
2473 const char *bdrv_get_device_name(BlockDriverState
*bs
)
2475 return bs
->device_name
;
2478 int bdrv_get_flags(BlockDriverState
*bs
)
2480 return bs
->open_flags
;
2483 void bdrv_flush_all(void)
2485 BlockDriverState
*bs
;
2487 QTAILQ_FOREACH(bs
, &bdrv_states
, list
) {
2492 int bdrv_has_zero_init(BlockDriverState
*bs
)
2496 if (bs
->drv
->bdrv_has_zero_init
) {
2497 return bs
->drv
->bdrv_has_zero_init(bs
);
2503 typedef struct BdrvCoIsAllocatedData
{
2504 BlockDriverState
*bs
;
2510 } BdrvCoIsAllocatedData
;
2513 * Returns true iff the specified sector is present in the disk image. Drivers
2514 * not implementing the functionality are assumed to not support backing files,
2515 * hence all their sectors are reported as allocated.
2517 * If 'sector_num' is beyond the end of the disk image the return value is 0
2518 * and 'pnum' is set to 0.
2520 * 'pnum' is set to the number of sectors (including and immediately following
2521 * the specified sector) that are known to be in the same
2522 * allocated/unallocated state.
2524 * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
2525 * beyond the end of the disk image it will be clamped.
2527 int coroutine_fn
bdrv_co_is_allocated(BlockDriverState
*bs
, int64_t sector_num
,
2528 int nb_sectors
, int *pnum
)
2532 if (sector_num
>= bs
->total_sectors
) {
2537 n
= bs
->total_sectors
- sector_num
;
2538 if (n
< nb_sectors
) {
2542 if (!bs
->drv
->bdrv_co_is_allocated
) {
2547 return bs
->drv
->bdrv_co_is_allocated(bs
, sector_num
, nb_sectors
, pnum
);
2550 /* Coroutine wrapper for bdrv_is_allocated() */
2551 static void coroutine_fn
bdrv_is_allocated_co_entry(void *opaque
)
2553 BdrvCoIsAllocatedData
*data
= opaque
;
2554 BlockDriverState
*bs
= data
->bs
;
2556 data
->ret
= bdrv_co_is_allocated(bs
, data
->sector_num
, data
->nb_sectors
,
2562 * Synchronous wrapper around bdrv_co_is_allocated().
2564 * See bdrv_co_is_allocated() for details.
2566 int bdrv_is_allocated(BlockDriverState
*bs
, int64_t sector_num
, int nb_sectors
,
2570 BdrvCoIsAllocatedData data
= {
2572 .sector_num
= sector_num
,
2573 .nb_sectors
= nb_sectors
,
2578 co
= qemu_coroutine_create(bdrv_is_allocated_co_entry
);
2579 qemu_coroutine_enter(co
, &data
);
2580 while (!data
.done
) {
2587 * Given an image chain: ... -> [BASE] -> [INTER1] -> [INTER2] -> [TOP]
2589 * Return true if the given sector is allocated in any image between
2590 * BASE and TOP (inclusive). BASE can be NULL to check if the given
2591 * sector is allocated in any image of the chain. Return false otherwise.
2593 * 'pnum' is set to the number of sectors (including and immediately following
2594 * the specified sector) that are known to be in the same
2595 * allocated/unallocated state.
2598 int coroutine_fn
bdrv_co_is_allocated_above(BlockDriverState
*top
,
2599 BlockDriverState
*base
,
2601 int nb_sectors
, int *pnum
)
2603 BlockDriverState
*intermediate
;
2604 int ret
, n
= nb_sectors
;
2607 while (intermediate
&& intermediate
!= base
) {
2609 ret
= bdrv_co_is_allocated(intermediate
, sector_num
, nb_sectors
,
2619 * [sector_num, nb_sectors] is unallocated on top but intermediate
2622 * [sector_num+x, nr_sectors] allocated.
2624 if (n
> pnum_inter
) {
2628 intermediate
= intermediate
->backing_hd
;
2635 BlockInfoList
*qmp_query_block(Error
**errp
)
2637 BlockInfoList
*head
= NULL
, *cur_item
= NULL
;
2638 BlockDriverState
*bs
;
2640 QTAILQ_FOREACH(bs
, &bdrv_states
, list
) {
2641 BlockInfoList
*info
= g_malloc0(sizeof(*info
));
2643 info
->value
= g_malloc0(sizeof(*info
->value
));
2644 info
->value
->device
= g_strdup(bs
->device_name
);
2645 info
->value
->type
= g_strdup("unknown");
2646 info
->value
->locked
= bdrv_dev_is_medium_locked(bs
);
2647 info
->value
->removable
= bdrv_dev_has_removable_media(bs
);
2649 if (bdrv_dev_has_removable_media(bs
)) {
2650 info
->value
->has_tray_open
= true;
2651 info
->value
->tray_open
= bdrv_dev_is_tray_open(bs
);
2654 if (bdrv_iostatus_is_enabled(bs
)) {
2655 info
->value
->has_io_status
= true;
2656 info
->value
->io_status
= bs
->iostatus
;
2660 info
->value
->has_inserted
= true;
2661 info
->value
->inserted
= g_malloc0(sizeof(*info
->value
->inserted
));
2662 info
->value
->inserted
->file
= g_strdup(bs
->filename
);
2663 info
->value
->inserted
->ro
= bs
->read_only
;
2664 info
->value
->inserted
->drv
= g_strdup(bs
->drv
->format_name
);
2665 info
->value
->inserted
->encrypted
= bs
->encrypted
;
2666 if (bs
->backing_file
[0]) {
2667 info
->value
->inserted
->has_backing_file
= true;
2668 info
->value
->inserted
->backing_file
= g_strdup(bs
->backing_file
);
2671 if (bs
->io_limits_enabled
) {
2672 info
->value
->inserted
->bps
=
2673 bs
->io_limits
.bps
[BLOCK_IO_LIMIT_TOTAL
];
2674 info
->value
->inserted
->bps_rd
=
2675 bs
->io_limits
.bps
[BLOCK_IO_LIMIT_READ
];
2676 info
->value
->inserted
->bps_wr
=
2677 bs
->io_limits
.bps
[BLOCK_IO_LIMIT_WRITE
];
2678 info
->value
->inserted
->iops
=
2679 bs
->io_limits
.iops
[BLOCK_IO_LIMIT_TOTAL
];
2680 info
->value
->inserted
->iops_rd
=
2681 bs
->io_limits
.iops
[BLOCK_IO_LIMIT_READ
];
2682 info
->value
->inserted
->iops_wr
=
2683 bs
->io_limits
.iops
[BLOCK_IO_LIMIT_WRITE
];
2687 /* XXX: waiting for the qapi to support GSList */
2689 head
= cur_item
= info
;
2691 cur_item
->next
= info
;
2699 /* Consider exposing this as a full fledged QMP command */
2700 static BlockStats
*qmp_query_blockstat(const BlockDriverState
*bs
, Error
**errp
)
2704 s
= g_malloc0(sizeof(*s
));
2706 if (bs
->device_name
[0]) {
2707 s
->has_device
= true;
2708 s
->device
= g_strdup(bs
->device_name
);
2711 s
->stats
= g_malloc0(sizeof(*s
->stats
));
2712 s
->stats
->rd_bytes
= bs
->nr_bytes
[BDRV_ACCT_READ
];
2713 s
->stats
->wr_bytes
= bs
->nr_bytes
[BDRV_ACCT_WRITE
];
2714 s
->stats
->rd_operations
= bs
->nr_ops
[BDRV_ACCT_READ
];
2715 s
->stats
->wr_operations
= bs
->nr_ops
[BDRV_ACCT_WRITE
];
2716 s
->stats
->wr_highest_offset
= bs
->wr_highest_sector
* BDRV_SECTOR_SIZE
;
2717 s
->stats
->flush_operations
= bs
->nr_ops
[BDRV_ACCT_FLUSH
];
2718 s
->stats
->wr_total_time_ns
= bs
->total_time_ns
[BDRV_ACCT_WRITE
];
2719 s
->stats
->rd_total_time_ns
= bs
->total_time_ns
[BDRV_ACCT_READ
];
2720 s
->stats
->flush_total_time_ns
= bs
->total_time_ns
[BDRV_ACCT_FLUSH
];
2723 s
->has_parent
= true;
2724 s
->parent
= qmp_query_blockstat(bs
->file
, NULL
);
2730 BlockStatsList
*qmp_query_blockstats(Error
**errp
)
2732 BlockStatsList
*head
= NULL
, *cur_item
= NULL
;
2733 BlockDriverState
*bs
;
2735 QTAILQ_FOREACH(bs
, &bdrv_states
, list
) {
2736 BlockStatsList
*info
= g_malloc0(sizeof(*info
));
2737 info
->value
= qmp_query_blockstat(bs
, NULL
);
2739 /* XXX: waiting for the qapi to support GSList */
2741 head
= cur_item
= info
;
2743 cur_item
->next
= info
;
2751 const char *bdrv_get_encrypted_filename(BlockDriverState
*bs
)
2753 if (bs
->backing_hd
&& bs
->backing_hd
->encrypted
)
2754 return bs
->backing_file
;
2755 else if (bs
->encrypted
)
2756 return bs
->filename
;
2761 void bdrv_get_backing_filename(BlockDriverState
*bs
,
2762 char *filename
, int filename_size
)
2764 pstrcpy(filename
, filename_size
, bs
->backing_file
);
2767 int bdrv_write_compressed(BlockDriverState
*bs
, int64_t sector_num
,
2768 const uint8_t *buf
, int nb_sectors
)
2770 BlockDriver
*drv
= bs
->drv
;
2773 if (!drv
->bdrv_write_compressed
)
2775 if (bdrv_check_request(bs
, sector_num
, nb_sectors
))
2778 if (bs
->dirty_bitmap
) {
2779 set_dirty_bitmap(bs
, sector_num
, nb_sectors
, 1);
2782 return drv
->bdrv_write_compressed(bs
, sector_num
, buf
, nb_sectors
);
2785 int bdrv_get_info(BlockDriverState
*bs
, BlockDriverInfo
*bdi
)
2787 BlockDriver
*drv
= bs
->drv
;
2790 if (!drv
->bdrv_get_info
)
2792 memset(bdi
, 0, sizeof(*bdi
));
2793 return drv
->bdrv_get_info(bs
, bdi
);
2796 int bdrv_save_vmstate(BlockDriverState
*bs
, const uint8_t *buf
,
2797 int64_t pos
, int size
)
2799 BlockDriver
*drv
= bs
->drv
;
2802 if (drv
->bdrv_save_vmstate
)
2803 return drv
->bdrv_save_vmstate(bs
, buf
, pos
, size
);
2805 return bdrv_save_vmstate(bs
->file
, buf
, pos
, size
);
2809 int bdrv_load_vmstate(BlockDriverState
*bs
, uint8_t *buf
,
2810 int64_t pos
, int size
)
2812 BlockDriver
*drv
= bs
->drv
;
2815 if (drv
->bdrv_load_vmstate
)
2816 return drv
->bdrv_load_vmstate(bs
, buf
, pos
, size
);
2818 return bdrv_load_vmstate(bs
->file
, buf
, pos
, size
);
2822 void bdrv_debug_event(BlockDriverState
*bs
, BlkDebugEvent event
)
2824 BlockDriver
*drv
= bs
->drv
;
2826 if (!drv
|| !drv
->bdrv_debug_event
) {
2830 return drv
->bdrv_debug_event(bs
, event
);
2834 /**************************************************************/
2835 /* handling of snapshots */
2837 int bdrv_can_snapshot(BlockDriverState
*bs
)
2839 BlockDriver
*drv
= bs
->drv
;
2840 if (!drv
|| !bdrv_is_inserted(bs
) || bdrv_is_read_only(bs
)) {
2844 if (!drv
->bdrv_snapshot_create
) {
2845 if (bs
->file
!= NULL
) {
2846 return bdrv_can_snapshot(bs
->file
);
2854 int bdrv_is_snapshot(BlockDriverState
*bs
)
2856 return !!(bs
->open_flags
& BDRV_O_SNAPSHOT
);
2859 BlockDriverState
*bdrv_snapshots(void)
2861 BlockDriverState
*bs
;
2864 return bs_snapshots
;
2868 while ((bs
= bdrv_next(bs
))) {
2869 if (bdrv_can_snapshot(bs
)) {
2877 int bdrv_snapshot_create(BlockDriverState
*bs
,
2878 QEMUSnapshotInfo
*sn_info
)
2880 BlockDriver
*drv
= bs
->drv
;
2883 if (drv
->bdrv_snapshot_create
)
2884 return drv
->bdrv_snapshot_create(bs
, sn_info
);
2886 return bdrv_snapshot_create(bs
->file
, sn_info
);
2890 int bdrv_snapshot_goto(BlockDriverState
*bs
,
2891 const char *snapshot_id
)
2893 BlockDriver
*drv
= bs
->drv
;
2898 if (drv
->bdrv_snapshot_goto
)
2899 return drv
->bdrv_snapshot_goto(bs
, snapshot_id
);
2902 drv
->bdrv_close(bs
);
2903 ret
= bdrv_snapshot_goto(bs
->file
, snapshot_id
);
2904 open_ret
= drv
->bdrv_open(bs
, bs
->open_flags
);
2906 bdrv_delete(bs
->file
);
2916 int bdrv_snapshot_delete(BlockDriverState
*bs
, const char *snapshot_id
)
2918 BlockDriver
*drv
= bs
->drv
;
2921 if (drv
->bdrv_snapshot_delete
)
2922 return drv
->bdrv_snapshot_delete(bs
, snapshot_id
);
2924 return bdrv_snapshot_delete(bs
->file
, snapshot_id
);
2928 int bdrv_snapshot_list(BlockDriverState
*bs
,
2929 QEMUSnapshotInfo
**psn_info
)
2931 BlockDriver
*drv
= bs
->drv
;
2934 if (drv
->bdrv_snapshot_list
)
2935 return drv
->bdrv_snapshot_list(bs
, psn_info
);
2937 return bdrv_snapshot_list(bs
->file
, psn_info
);
2941 int bdrv_snapshot_load_tmp(BlockDriverState
*bs
,
2942 const char *snapshot_name
)
2944 BlockDriver
*drv
= bs
->drv
;
2948 if (!bs
->read_only
) {
2951 if (drv
->bdrv_snapshot_load_tmp
) {
2952 return drv
->bdrv_snapshot_load_tmp(bs
, snapshot_name
);
2957 BlockDriverState
*bdrv_find_backing_image(BlockDriverState
*bs
,
2958 const char *backing_file
)
2964 if (bs
->backing_hd
) {
2965 if (strcmp(bs
->backing_file
, backing_file
) == 0) {
2966 return bs
->backing_hd
;
2968 return bdrv_find_backing_image(bs
->backing_hd
, backing_file
);
2975 #define NB_SUFFIXES 4
2977 char *get_human_readable_size(char *buf
, int buf_size
, int64_t size
)
2979 static const char suffixes
[NB_SUFFIXES
] = "KMGT";
2984 snprintf(buf
, buf_size
, "%" PRId64
, size
);
2987 for(i
= 0; i
< NB_SUFFIXES
; i
++) {
2988 if (size
< (10 * base
)) {
2989 snprintf(buf
, buf_size
, "%0.1f%c",
2990 (double)size
/ base
,
2993 } else if (size
< (1000 * base
) || i
== (NB_SUFFIXES
- 1)) {
2994 snprintf(buf
, buf_size
, "%" PRId64
"%c",
2995 ((size
+ (base
>> 1)) / base
),
3005 char *bdrv_snapshot_dump(char *buf
, int buf_size
, QEMUSnapshotInfo
*sn
)
3007 char buf1
[128], date_buf
[128], clock_buf
[128];
3017 snprintf(buf
, buf_size
,
3018 "%-10s%-20s%7s%20s%15s",
3019 "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
3023 ptm
= localtime(&ti
);
3024 strftime(date_buf
, sizeof(date_buf
),
3025 "%Y-%m-%d %H:%M:%S", ptm
);
3027 localtime_r(&ti
, &tm
);
3028 strftime(date_buf
, sizeof(date_buf
),
3029 "%Y-%m-%d %H:%M:%S", &tm
);
3031 secs
= sn
->vm_clock_nsec
/ 1000000000;
3032 snprintf(clock_buf
, sizeof(clock_buf
),
3033 "%02d:%02d:%02d.%03d",
3035 (int)((secs
/ 60) % 60),
3037 (int)((sn
->vm_clock_nsec
/ 1000000) % 1000));
3038 snprintf(buf
, buf_size
,
3039 "%-10s%-20s%7s%20s%15s",
3040 sn
->id_str
, sn
->name
,
3041 get_human_readable_size(buf1
, sizeof(buf1
), sn
->vm_state_size
),
3048 /**************************************************************/
3051 BlockDriverAIOCB
*bdrv_aio_readv(BlockDriverState
*bs
, int64_t sector_num
,
3052 QEMUIOVector
*qiov
, int nb_sectors
,
3053 BlockDriverCompletionFunc
*cb
, void *opaque
)
3055 trace_bdrv_aio_readv(bs
, sector_num
, nb_sectors
, opaque
);
3057 return bdrv_co_aio_rw_vector(bs
, sector_num
, qiov
, nb_sectors
,
3061 BlockDriverAIOCB
*bdrv_aio_writev(BlockDriverState
*bs
, int64_t sector_num
,
3062 QEMUIOVector
*qiov
, int nb_sectors
,
3063 BlockDriverCompletionFunc
*cb
, void *opaque
)
3065 trace_bdrv_aio_writev(bs
, sector_num
, nb_sectors
, opaque
);
3067 return bdrv_co_aio_rw_vector(bs
, sector_num
, qiov
, nb_sectors
,
3072 typedef struct MultiwriteCB
{
3077 BlockDriverCompletionFunc
*cb
;
3079 QEMUIOVector
*free_qiov
;
3083 static void multiwrite_user_cb(MultiwriteCB
*mcb
)
3087 for (i
= 0; i
< mcb
->num_callbacks
; i
++) {
3088 mcb
->callbacks
[i
].cb(mcb
->callbacks
[i
].opaque
, mcb
->error
);
3089 if (mcb
->callbacks
[i
].free_qiov
) {
3090 qemu_iovec_destroy(mcb
->callbacks
[i
].free_qiov
);
3092 g_free(mcb
->callbacks
[i
].free_qiov
);
3096 static void multiwrite_cb(void *opaque
, int ret
)
3098 MultiwriteCB
*mcb
= opaque
;
3100 trace_multiwrite_cb(mcb
, ret
);
3102 if (ret
< 0 && !mcb
->error
) {
3106 mcb
->num_requests
--;
3107 if (mcb
->num_requests
== 0) {
3108 multiwrite_user_cb(mcb
);
3113 static int multiwrite_req_compare(const void *a
, const void *b
)
3115 const BlockRequest
*req1
= a
, *req2
= b
;
3118 * Note that we can't simply subtract req2->sector from req1->sector
3119 * here as that could overflow the return value.
3121 if (req1
->sector
> req2
->sector
) {
3123 } else if (req1
->sector
< req2
->sector
) {
3131 * Takes a bunch of requests and tries to merge them. Returns the number of
3132 * requests that remain after merging.
3134 static int multiwrite_merge(BlockDriverState
*bs
, BlockRequest
*reqs
,
3135 int num_reqs
, MultiwriteCB
*mcb
)
3139 // Sort requests by start sector
3140 qsort(reqs
, num_reqs
, sizeof(*reqs
), &multiwrite_req_compare
);
3142 // Check if adjacent requests touch the same clusters. If so, combine them,
3143 // filling up gaps with zero sectors.
3145 for (i
= 1; i
< num_reqs
; i
++) {
3147 int64_t oldreq_last
= reqs
[outidx
].sector
+ reqs
[outidx
].nb_sectors
;
3149 // Handle exactly sequential writes and overlapping writes.
3150 if (reqs
[i
].sector
<= oldreq_last
) {
3154 if (reqs
[outidx
].qiov
->niov
+ reqs
[i
].qiov
->niov
+ 1 > IOV_MAX
) {
3160 QEMUIOVector
*qiov
= g_malloc0(sizeof(*qiov
));
3161 qemu_iovec_init(qiov
,
3162 reqs
[outidx
].qiov
->niov
+ reqs
[i
].qiov
->niov
+ 1);
3164 // Add the first request to the merged one. If the requests are
3165 // overlapping, drop the last sectors of the first request.
3166 size
= (reqs
[i
].sector
- reqs
[outidx
].sector
) << 9;
3167 qemu_iovec_concat(qiov
, reqs
[outidx
].qiov
, size
);
3169 // We should need to add any zeros between the two requests
3170 assert (reqs
[i
].sector
<= oldreq_last
);
3172 // Add the second request
3173 qemu_iovec_concat(qiov
, reqs
[i
].qiov
, reqs
[i
].qiov
->size
);
3175 reqs
[outidx
].nb_sectors
= qiov
->size
>> 9;
3176 reqs
[outidx
].qiov
= qiov
;
3178 mcb
->callbacks
[i
].free_qiov
= reqs
[outidx
].qiov
;
3181 reqs
[outidx
].sector
= reqs
[i
].sector
;
3182 reqs
[outidx
].nb_sectors
= reqs
[i
].nb_sectors
;
3183 reqs
[outidx
].qiov
= reqs
[i
].qiov
;
3191 * Submit multiple AIO write requests at once.
3193 * On success, the function returns 0 and all requests in the reqs array have
3194 * been submitted. In error case this function returns -1, and any of the
3195 * requests may or may not be submitted yet. In particular, this means that the
3196 * callback will be called for some of the requests, for others it won't. The
3197 * caller must check the error field of the BlockRequest to wait for the right
3198 * callbacks (if error != 0, no callback will be called).
3200 * The implementation may modify the contents of the reqs array, e.g. to merge
3201 * requests. However, the fields opaque and error are left unmodified as they
3202 * are used to signal failure for a single request to the caller.
3204 int bdrv_aio_multiwrite(BlockDriverState
*bs
, BlockRequest
*reqs
, int num_reqs
)
3209 /* don't submit writes if we don't have a medium */
3210 if (bs
->drv
== NULL
) {
3211 for (i
= 0; i
< num_reqs
; i
++) {
3212 reqs
[i
].error
= -ENOMEDIUM
;
3217 if (num_reqs
== 0) {
3221 // Create MultiwriteCB structure
3222 mcb
= g_malloc0(sizeof(*mcb
) + num_reqs
* sizeof(*mcb
->callbacks
));
3223 mcb
->num_requests
= 0;
3224 mcb
->num_callbacks
= num_reqs
;
3226 for (i
= 0; i
< num_reqs
; i
++) {
3227 mcb
->callbacks
[i
].cb
= reqs
[i
].cb
;
3228 mcb
->callbacks
[i
].opaque
= reqs
[i
].opaque
;
3231 // Check for mergable requests
3232 num_reqs
= multiwrite_merge(bs
, reqs
, num_reqs
, mcb
);
3234 trace_bdrv_aio_multiwrite(mcb
, mcb
->num_callbacks
, num_reqs
);
3236 /* Run the aio requests. */
3237 mcb
->num_requests
= num_reqs
;
3238 for (i
= 0; i
< num_reqs
; i
++) {
3239 bdrv_aio_writev(bs
, reqs
[i
].sector
, reqs
[i
].qiov
,
3240 reqs
[i
].nb_sectors
, multiwrite_cb
, mcb
);
3246 void bdrv_aio_cancel(BlockDriverAIOCB
*acb
)
3248 acb
->pool
->cancel(acb
);
3251 /* block I/O throttling */
3252 static bool bdrv_exceed_bps_limits(BlockDriverState
*bs
, int nb_sectors
,
3253 bool is_write
, double elapsed_time
, uint64_t *wait
)
3255 uint64_t bps_limit
= 0;
3256 double bytes_limit
, bytes_base
, bytes_res
;
3257 double slice_time
, wait_time
;
3259 if (bs
->io_limits
.bps
[BLOCK_IO_LIMIT_TOTAL
]) {
3260 bps_limit
= bs
->io_limits
.bps
[BLOCK_IO_LIMIT_TOTAL
];
3261 } else if (bs
->io_limits
.bps
[is_write
]) {
3262 bps_limit
= bs
->io_limits
.bps
[is_write
];
3271 slice_time
= bs
->slice_end
- bs
->slice_start
;
3272 slice_time
/= (NANOSECONDS_PER_SECOND
);
3273 bytes_limit
= bps_limit
* slice_time
;
3274 bytes_base
= bs
->nr_bytes
[is_write
] - bs
->io_base
.bytes
[is_write
];
3275 if (bs
->io_limits
.bps
[BLOCK_IO_LIMIT_TOTAL
]) {
3276 bytes_base
+= bs
->nr_bytes
[!is_write
] - bs
->io_base
.bytes
[!is_write
];
3279 /* bytes_base: the bytes of data which have been read/written; and
3280 * it is obtained from the history statistic info.
3281 * bytes_res: the remaining bytes of data which need to be read/written.
3282 * (bytes_base + bytes_res) / bps_limit: used to calcuate
3283 * the total time for completing reading/writting all data.
3285 bytes_res
= (unsigned) nb_sectors
* BDRV_SECTOR_SIZE
;
3287 if (bytes_base
+ bytes_res
<= bytes_limit
) {
3295 /* Calc approx time to dispatch */
3296 wait_time
= (bytes_base
+ bytes_res
) / bps_limit
- elapsed_time
;
3298 /* When the I/O rate at runtime exceeds the limits,
3299 * bs->slice_end need to be extended in order that the current statistic
3300 * info can be kept until the timer fire, so it is increased and tuned
3301 * based on the result of experiment.
3303 bs
->slice_time
= wait_time
* BLOCK_IO_SLICE_TIME
* 10;
3304 bs
->slice_end
+= bs
->slice_time
- 3 * BLOCK_IO_SLICE_TIME
;
3306 *wait
= wait_time
* BLOCK_IO_SLICE_TIME
* 10;
3312 static bool bdrv_exceed_iops_limits(BlockDriverState
*bs
, bool is_write
,
3313 double elapsed_time
, uint64_t *wait
)
3315 uint64_t iops_limit
= 0;
3316 double ios_limit
, ios_base
;
3317 double slice_time
, wait_time
;
3319 if (bs
->io_limits
.iops
[BLOCK_IO_LIMIT_TOTAL
]) {
3320 iops_limit
= bs
->io_limits
.iops
[BLOCK_IO_LIMIT_TOTAL
];
3321 } else if (bs
->io_limits
.iops
[is_write
]) {
3322 iops_limit
= bs
->io_limits
.iops
[is_write
];
3331 slice_time
= bs
->slice_end
- bs
->slice_start
;
3332 slice_time
/= (NANOSECONDS_PER_SECOND
);
3333 ios_limit
= iops_limit
* slice_time
;
3334 ios_base
= bs
->nr_ops
[is_write
] - bs
->io_base
.ios
[is_write
];
3335 if (bs
->io_limits
.iops
[BLOCK_IO_LIMIT_TOTAL
]) {
3336 ios_base
+= bs
->nr_ops
[!is_write
] - bs
->io_base
.ios
[!is_write
];
3339 if (ios_base
+ 1 <= ios_limit
) {
3347 /* Calc approx time to dispatch */
3348 wait_time
= (ios_base
+ 1) / iops_limit
;
3349 if (wait_time
> elapsed_time
) {
3350 wait_time
= wait_time
- elapsed_time
;
3355 bs
->slice_time
= wait_time
* BLOCK_IO_SLICE_TIME
* 10;
3356 bs
->slice_end
+= bs
->slice_time
- 3 * BLOCK_IO_SLICE_TIME
;
3358 *wait
= wait_time
* BLOCK_IO_SLICE_TIME
* 10;
3364 static bool bdrv_exceed_io_limits(BlockDriverState
*bs
, int nb_sectors
,
3365 bool is_write
, int64_t *wait
)
3367 int64_t now
, max_wait
;
3368 uint64_t bps_wait
= 0, iops_wait
= 0;
3369 double elapsed_time
;
3370 int bps_ret
, iops_ret
;
3372 now
= qemu_get_clock_ns(vm_clock
);
3373 if ((bs
->slice_start
< now
)
3374 && (bs
->slice_end
> now
)) {
3375 bs
->slice_end
= now
+ bs
->slice_time
;
3377 bs
->slice_time
= 5 * BLOCK_IO_SLICE_TIME
;
3378 bs
->slice_start
= now
;
3379 bs
->slice_end
= now
+ bs
->slice_time
;
3381 bs
->io_base
.bytes
[is_write
] = bs
->nr_bytes
[is_write
];
3382 bs
->io_base
.bytes
[!is_write
] = bs
->nr_bytes
[!is_write
];
3384 bs
->io_base
.ios
[is_write
] = bs
->nr_ops
[is_write
];
3385 bs
->io_base
.ios
[!is_write
] = bs
->nr_ops
[!is_write
];
3388 elapsed_time
= now
- bs
->slice_start
;
3389 elapsed_time
/= (NANOSECONDS_PER_SECOND
);
3391 bps_ret
= bdrv_exceed_bps_limits(bs
, nb_sectors
,
3392 is_write
, elapsed_time
, &bps_wait
);
3393 iops_ret
= bdrv_exceed_iops_limits(bs
, is_write
,
3394 elapsed_time
, &iops_wait
);
3395 if (bps_ret
|| iops_ret
) {
3396 max_wait
= bps_wait
> iops_wait
? bps_wait
: iops_wait
;
3401 now
= qemu_get_clock_ns(vm_clock
);
3402 if (bs
->slice_end
< now
+ max_wait
) {
3403 bs
->slice_end
= now
+ max_wait
;
3416 /**************************************************************/
3417 /* async block device emulation */
3419 typedef struct BlockDriverAIOCBSync
{
3420 BlockDriverAIOCB common
;
3423 /* vector translation state */
3427 } BlockDriverAIOCBSync
;
3429 static void bdrv_aio_cancel_em(BlockDriverAIOCB
*blockacb
)
3431 BlockDriverAIOCBSync
*acb
=
3432 container_of(blockacb
, BlockDriverAIOCBSync
, common
);
3433 qemu_bh_delete(acb
->bh
);
3435 qemu_aio_release(acb
);
3438 static AIOPool bdrv_em_aio_pool
= {
3439 .aiocb_size
= sizeof(BlockDriverAIOCBSync
),
3440 .cancel
= bdrv_aio_cancel_em
,
3443 static void bdrv_aio_bh_cb(void *opaque
)
3445 BlockDriverAIOCBSync
*acb
= opaque
;
3448 qemu_iovec_from_buffer(acb
->qiov
, acb
->bounce
, acb
->qiov
->size
);
3449 qemu_vfree(acb
->bounce
);
3450 acb
->common
.cb(acb
->common
.opaque
, acb
->ret
);
3451 qemu_bh_delete(acb
->bh
);
3453 qemu_aio_release(acb
);
3456 static BlockDriverAIOCB
*bdrv_aio_rw_vector(BlockDriverState
*bs
,
3460 BlockDriverCompletionFunc
*cb
,
3465 BlockDriverAIOCBSync
*acb
;
3467 acb
= qemu_aio_get(&bdrv_em_aio_pool
, bs
, cb
, opaque
);
3468 acb
->is_write
= is_write
;
3470 acb
->bounce
= qemu_blockalign(bs
, qiov
->size
);
3471 acb
->bh
= qemu_bh_new(bdrv_aio_bh_cb
, acb
);
3474 qemu_iovec_to_buffer(acb
->qiov
, acb
->bounce
);
3475 acb
->ret
= bs
->drv
->bdrv_write(bs
, sector_num
, acb
->bounce
, nb_sectors
);
3477 acb
->ret
= bs
->drv
->bdrv_read(bs
, sector_num
, acb
->bounce
, nb_sectors
);
3480 qemu_bh_schedule(acb
->bh
);
3482 return &acb
->common
;
3485 static BlockDriverAIOCB
*bdrv_aio_readv_em(BlockDriverState
*bs
,
3486 int64_t sector_num
, QEMUIOVector
*qiov
, int nb_sectors
,
3487 BlockDriverCompletionFunc
*cb
, void *opaque
)
3489 return bdrv_aio_rw_vector(bs
, sector_num
, qiov
, nb_sectors
, cb
, opaque
, 0);
3492 static BlockDriverAIOCB
*bdrv_aio_writev_em(BlockDriverState
*bs
,
3493 int64_t sector_num
, QEMUIOVector
*qiov
, int nb_sectors
,
3494 BlockDriverCompletionFunc
*cb
, void *opaque
)
3496 return bdrv_aio_rw_vector(bs
, sector_num
, qiov
, nb_sectors
, cb
, opaque
, 1);
3500 typedef struct BlockDriverAIOCBCoroutine
{
3501 BlockDriverAIOCB common
;
3505 } BlockDriverAIOCBCoroutine
;
3507 static void bdrv_aio_co_cancel_em(BlockDriverAIOCB
*blockacb
)
3512 static AIOPool bdrv_em_co_aio_pool
= {
3513 .aiocb_size
= sizeof(BlockDriverAIOCBCoroutine
),
3514 .cancel
= bdrv_aio_co_cancel_em
,
3517 static void bdrv_co_em_bh(void *opaque
)
3519 BlockDriverAIOCBCoroutine
*acb
= opaque
;
3521 acb
->common
.cb(acb
->common
.opaque
, acb
->req
.error
);
3522 qemu_bh_delete(acb
->bh
);
3523 qemu_aio_release(acb
);
3526 /* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
3527 static void coroutine_fn
bdrv_co_do_rw(void *opaque
)
3529 BlockDriverAIOCBCoroutine
*acb
= opaque
;
3530 BlockDriverState
*bs
= acb
->common
.bs
;
3532 if (!acb
->is_write
) {
3533 acb
->req
.error
= bdrv_co_do_readv(bs
, acb
->req
.sector
,
3534 acb
->req
.nb_sectors
, acb
->req
.qiov
, 0);
3536 acb
->req
.error
= bdrv_co_do_writev(bs
, acb
->req
.sector
,
3537 acb
->req
.nb_sectors
, acb
->req
.qiov
, 0);
3540 acb
->bh
= qemu_bh_new(bdrv_co_em_bh
, acb
);
3541 qemu_bh_schedule(acb
->bh
);
3544 static BlockDriverAIOCB
*bdrv_co_aio_rw_vector(BlockDriverState
*bs
,
3548 BlockDriverCompletionFunc
*cb
,
3553 BlockDriverAIOCBCoroutine
*acb
;
3555 acb
= qemu_aio_get(&bdrv_em_co_aio_pool
, bs
, cb
, opaque
);
3556 acb
->req
.sector
= sector_num
;
3557 acb
->req
.nb_sectors
= nb_sectors
;
3558 acb
->req
.qiov
= qiov
;
3559 acb
->is_write
= is_write
;
3561 co
= qemu_coroutine_create(bdrv_co_do_rw
);
3562 qemu_coroutine_enter(co
, acb
);
3564 return &acb
->common
;
3567 static void coroutine_fn
bdrv_aio_flush_co_entry(void *opaque
)
3569 BlockDriverAIOCBCoroutine
*acb
= opaque
;
3570 BlockDriverState
*bs
= acb
->common
.bs
;
3572 acb
->req
.error
= bdrv_co_flush(bs
);
3573 acb
->bh
= qemu_bh_new(bdrv_co_em_bh
, acb
);
3574 qemu_bh_schedule(acb
->bh
);
3577 BlockDriverAIOCB
*bdrv_aio_flush(BlockDriverState
*bs
,
3578 BlockDriverCompletionFunc
*cb
, void *opaque
)
3580 trace_bdrv_aio_flush(bs
, opaque
);
3583 BlockDriverAIOCBCoroutine
*acb
;
3585 acb
= qemu_aio_get(&bdrv_em_co_aio_pool
, bs
, cb
, opaque
);
3586 co
= qemu_coroutine_create(bdrv_aio_flush_co_entry
);
3587 qemu_coroutine_enter(co
, acb
);
3589 return &acb
->common
;
3592 static void coroutine_fn
bdrv_aio_discard_co_entry(void *opaque
)
3594 BlockDriverAIOCBCoroutine
*acb
= opaque
;
3595 BlockDriverState
*bs
= acb
->common
.bs
;
3597 acb
->req
.error
= bdrv_co_discard(bs
, acb
->req
.sector
, acb
->req
.nb_sectors
);
3598 acb
->bh
= qemu_bh_new(bdrv_co_em_bh
, acb
);
3599 qemu_bh_schedule(acb
->bh
);
3602 BlockDriverAIOCB
*bdrv_aio_discard(BlockDriverState
*bs
,
3603 int64_t sector_num
, int nb_sectors
,
3604 BlockDriverCompletionFunc
*cb
, void *opaque
)
3607 BlockDriverAIOCBCoroutine
*acb
;
3609 trace_bdrv_aio_discard(bs
, sector_num
, nb_sectors
, opaque
);
3611 acb
= qemu_aio_get(&bdrv_em_co_aio_pool
, bs
, cb
, opaque
);
3612 acb
->req
.sector
= sector_num
;
3613 acb
->req
.nb_sectors
= nb_sectors
;
3614 co
= qemu_coroutine_create(bdrv_aio_discard_co_entry
);
3615 qemu_coroutine_enter(co
, acb
);
3617 return &acb
->common
;
3620 void bdrv_init(void)
3622 module_call_init(MODULE_INIT_BLOCK
);
3625 void bdrv_init_with_whitelist(void)
3627 use_bdrv_whitelist
= 1;
3631 void *qemu_aio_get(AIOPool
*pool
, BlockDriverState
*bs
,
3632 BlockDriverCompletionFunc
*cb
, void *opaque
)
3634 BlockDriverAIOCB
*acb
;
3636 if (pool
->free_aiocb
) {
3637 acb
= pool
->free_aiocb
;
3638 pool
->free_aiocb
= acb
->next
;
3640 acb
= g_malloc0(pool
->aiocb_size
);
3645 acb
->opaque
= opaque
;
3649 void qemu_aio_release(void *p
)
3651 BlockDriverAIOCB
*acb
= (BlockDriverAIOCB
*)p
;
3652 AIOPool
*pool
= acb
->pool
;
3653 acb
->next
= pool
->free_aiocb
;
3654 pool
->free_aiocb
= acb
;
3657 /**************************************************************/
3658 /* Coroutine block device emulation */
3660 typedef struct CoroutineIOCompletion
{
3661 Coroutine
*coroutine
;
3663 } CoroutineIOCompletion
;
3665 static void bdrv_co_io_em_complete(void *opaque
, int ret
)
3667 CoroutineIOCompletion
*co
= opaque
;
3670 qemu_coroutine_enter(co
->coroutine
, NULL
);
3673 static int coroutine_fn
bdrv_co_io_em(BlockDriverState
*bs
, int64_t sector_num
,
3674 int nb_sectors
, QEMUIOVector
*iov
,
3677 CoroutineIOCompletion co
= {
3678 .coroutine
= qemu_coroutine_self(),
3680 BlockDriverAIOCB
*acb
;
3683 acb
= bs
->drv
->bdrv_aio_writev(bs
, sector_num
, iov
, nb_sectors
,
3684 bdrv_co_io_em_complete
, &co
);
3686 acb
= bs
->drv
->bdrv_aio_readv(bs
, sector_num
, iov
, nb_sectors
,
3687 bdrv_co_io_em_complete
, &co
);
3690 trace_bdrv_co_io_em(bs
, sector_num
, nb_sectors
, is_write
, acb
);
3694 qemu_coroutine_yield();
3699 static int coroutine_fn
bdrv_co_readv_em(BlockDriverState
*bs
,
3700 int64_t sector_num
, int nb_sectors
,
3703 return bdrv_co_io_em(bs
, sector_num
, nb_sectors
, iov
, false);
3706 static int coroutine_fn
bdrv_co_writev_em(BlockDriverState
*bs
,
3707 int64_t sector_num
, int nb_sectors
,
3710 return bdrv_co_io_em(bs
, sector_num
, nb_sectors
, iov
, true);
3713 static void coroutine_fn
bdrv_flush_co_entry(void *opaque
)
3715 RwCo
*rwco
= opaque
;
3717 rwco
->ret
= bdrv_co_flush(rwco
->bs
);
3720 int coroutine_fn
bdrv_co_flush(BlockDriverState
*bs
)
3724 if (!bs
|| !bdrv_is_inserted(bs
) || bdrv_is_read_only(bs
)) {
3728 /* Write back cached data to the OS even with cache=unsafe */
3729 if (bs
->drv
->bdrv_co_flush_to_os
) {
3730 ret
= bs
->drv
->bdrv_co_flush_to_os(bs
);
3736 /* But don't actually force it to the disk with cache=unsafe */
3737 if (bs
->open_flags
& BDRV_O_NO_FLUSH
) {
3741 if (bs
->drv
->bdrv_co_flush_to_disk
) {
3742 ret
= bs
->drv
->bdrv_co_flush_to_disk(bs
);
3743 } else if (bs
->drv
->bdrv_aio_flush
) {
3744 BlockDriverAIOCB
*acb
;
3745 CoroutineIOCompletion co
= {
3746 .coroutine
= qemu_coroutine_self(),
3749 acb
= bs
->drv
->bdrv_aio_flush(bs
, bdrv_co_io_em_complete
, &co
);
3753 qemu_coroutine_yield();
3758 * Some block drivers always operate in either writethrough or unsafe
3759 * mode and don't support bdrv_flush therefore. Usually qemu doesn't
3760 * know how the server works (because the behaviour is hardcoded or
3761 * depends on server-side configuration), so we can't ensure that
3762 * everything is safe on disk. Returning an error doesn't work because
3763 * that would break guests even if the server operates in writethrough
3766 * Let's hope the user knows what he's doing.
3774 /* Now flush the underlying protocol. It will also have BDRV_O_NO_FLUSH
3775 * in the case of cache=unsafe, so there are no useless flushes.
3777 return bdrv_co_flush(bs
->file
);
3780 void bdrv_invalidate_cache(BlockDriverState
*bs
)
3782 if (bs
->drv
&& bs
->drv
->bdrv_invalidate_cache
) {
3783 bs
->drv
->bdrv_invalidate_cache(bs
);
3787 void bdrv_invalidate_cache_all(void)
3789 BlockDriverState
*bs
;
3791 QTAILQ_FOREACH(bs
, &bdrv_states
, list
) {
3792 bdrv_invalidate_cache(bs
);
3796 void bdrv_clear_incoming_migration_all(void)
3798 BlockDriverState
*bs
;
3800 QTAILQ_FOREACH(bs
, &bdrv_states
, list
) {
3801 bs
->open_flags
= bs
->open_flags
& ~(BDRV_O_INCOMING
);
3805 int bdrv_flush(BlockDriverState
*bs
)
3813 if (qemu_in_coroutine()) {
3814 /* Fast-path if already in coroutine context */
3815 bdrv_flush_co_entry(&rwco
);
3817 co
= qemu_coroutine_create(bdrv_flush_co_entry
);
3818 qemu_coroutine_enter(co
, &rwco
);
3819 while (rwco
.ret
== NOT_DONE
) {
3827 static void coroutine_fn
bdrv_discard_co_entry(void *opaque
)
3829 RwCo
*rwco
= opaque
;
3831 rwco
->ret
= bdrv_co_discard(rwco
->bs
, rwco
->sector_num
, rwco
->nb_sectors
);
3834 int coroutine_fn
bdrv_co_discard(BlockDriverState
*bs
, int64_t sector_num
,
3839 } else if (bdrv_check_request(bs
, sector_num
, nb_sectors
)) {
3841 } else if (bs
->read_only
) {
3843 } else if (bs
->drv
->bdrv_co_discard
) {
3844 return bs
->drv
->bdrv_co_discard(bs
, sector_num
, nb_sectors
);
3845 } else if (bs
->drv
->bdrv_aio_discard
) {
3846 BlockDriverAIOCB
*acb
;
3847 CoroutineIOCompletion co
= {
3848 .coroutine
= qemu_coroutine_self(),
3851 acb
= bs
->drv
->bdrv_aio_discard(bs
, sector_num
, nb_sectors
,
3852 bdrv_co_io_em_complete
, &co
);
3856 qemu_coroutine_yield();
3864 int bdrv_discard(BlockDriverState
*bs
, int64_t sector_num
, int nb_sectors
)
3869 .sector_num
= sector_num
,
3870 .nb_sectors
= nb_sectors
,
3874 if (qemu_in_coroutine()) {
3875 /* Fast-path if already in coroutine context */
3876 bdrv_discard_co_entry(&rwco
);
3878 co
= qemu_coroutine_create(bdrv_discard_co_entry
);
3879 qemu_coroutine_enter(co
, &rwco
);
3880 while (rwco
.ret
== NOT_DONE
) {
3888 /**************************************************************/
3889 /* removable device support */
3892 * Return TRUE if the media is present
3894 int bdrv_is_inserted(BlockDriverState
*bs
)
3896 BlockDriver
*drv
= bs
->drv
;
3900 if (!drv
->bdrv_is_inserted
)
3902 return drv
->bdrv_is_inserted(bs
);
3906 * Return whether the media changed since the last call to this
3907 * function, or -ENOTSUP if we don't know. Most drivers don't know.
3909 int bdrv_media_changed(BlockDriverState
*bs
)
3911 BlockDriver
*drv
= bs
->drv
;
3913 if (drv
&& drv
->bdrv_media_changed
) {
3914 return drv
->bdrv_media_changed(bs
);
3920 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3922 void bdrv_eject(BlockDriverState
*bs
, bool eject_flag
)
3924 BlockDriver
*drv
= bs
->drv
;
3926 if (drv
&& drv
->bdrv_eject
) {
3927 drv
->bdrv_eject(bs
, eject_flag
);
3930 if (bs
->device_name
[0] != '\0') {
3931 bdrv_emit_qmp_eject_event(bs
, eject_flag
);
3936 * Lock or unlock the media (if it is locked, the user won't be able
3937 * to eject it manually).
3939 void bdrv_lock_medium(BlockDriverState
*bs
, bool locked
)
3941 BlockDriver
*drv
= bs
->drv
;
3943 trace_bdrv_lock_medium(bs
, locked
);
3945 if (drv
&& drv
->bdrv_lock_medium
) {
3946 drv
->bdrv_lock_medium(bs
, locked
);
3950 /* needed for generic scsi interface */
3952 int bdrv_ioctl(BlockDriverState
*bs
, unsigned long int req
, void *buf
)
3954 BlockDriver
*drv
= bs
->drv
;
3956 if (drv
&& drv
->bdrv_ioctl
)
3957 return drv
->bdrv_ioctl(bs
, req
, buf
);
3961 BlockDriverAIOCB
*bdrv_aio_ioctl(BlockDriverState
*bs
,
3962 unsigned long int req
, void *buf
,
3963 BlockDriverCompletionFunc
*cb
, void *opaque
)
3965 BlockDriver
*drv
= bs
->drv
;
3967 if (drv
&& drv
->bdrv_aio_ioctl
)
3968 return drv
->bdrv_aio_ioctl(bs
, req
, buf
, cb
, opaque
);
3972 void bdrv_set_buffer_alignment(BlockDriverState
*bs
, int align
)
3974 bs
->buffer_alignment
= align
;
3977 void *qemu_blockalign(BlockDriverState
*bs
, size_t size
)
3979 return qemu_memalign((bs
&& bs
->buffer_alignment
) ? bs
->buffer_alignment
: 512, size
);
3982 void bdrv_set_dirty_tracking(BlockDriverState
*bs
, int enable
)
3984 int64_t bitmap_size
;
3986 bs
->dirty_count
= 0;
3988 if (!bs
->dirty_bitmap
) {
3989 bitmap_size
= (bdrv_getlength(bs
) >> BDRV_SECTOR_BITS
) +
3990 BDRV_SECTORS_PER_DIRTY_CHUNK
* BITS_PER_LONG
- 1;
3991 bitmap_size
/= BDRV_SECTORS_PER_DIRTY_CHUNK
* BITS_PER_LONG
;
3993 bs
->dirty_bitmap
= g_new0(unsigned long, bitmap_size
);
3996 if (bs
->dirty_bitmap
) {
3997 g_free(bs
->dirty_bitmap
);
3998 bs
->dirty_bitmap
= NULL
;
4003 int bdrv_get_dirty(BlockDriverState
*bs
, int64_t sector
)
4005 int64_t chunk
= sector
/ (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK
;
4007 if (bs
->dirty_bitmap
&&
4008 (sector
<< BDRV_SECTOR_BITS
) < bdrv_getlength(bs
)) {
4009 return !!(bs
->dirty_bitmap
[chunk
/ (sizeof(unsigned long) * 8)] &
4010 (1UL << (chunk
% (sizeof(unsigned long) * 8))));
4016 void bdrv_reset_dirty(BlockDriverState
*bs
, int64_t cur_sector
,
4019 set_dirty_bitmap(bs
, cur_sector
, nr_sectors
, 0);
4022 int64_t bdrv_get_dirty_count(BlockDriverState
*bs
)
4024 return bs
->dirty_count
;
4027 void bdrv_set_in_use(BlockDriverState
*bs
, int in_use
)
4029 assert(bs
->in_use
!= in_use
);
4030 bs
->in_use
= in_use
;
4033 int bdrv_in_use(BlockDriverState
*bs
)
4038 void bdrv_iostatus_enable(BlockDriverState
*bs
)
4040 bs
->iostatus_enabled
= true;
4041 bs
->iostatus
= BLOCK_DEVICE_IO_STATUS_OK
;
4044 /* The I/O status is only enabled if the drive explicitly
4045 * enables it _and_ the VM is configured to stop on errors */
4046 bool bdrv_iostatus_is_enabled(const BlockDriverState
*bs
)
4048 return (bs
->iostatus_enabled
&&
4049 (bs
->on_write_error
== BLOCK_ERR_STOP_ENOSPC
||
4050 bs
->on_write_error
== BLOCK_ERR_STOP_ANY
||
4051 bs
->on_read_error
== BLOCK_ERR_STOP_ANY
));
4054 void bdrv_iostatus_disable(BlockDriverState
*bs
)
4056 bs
->iostatus_enabled
= false;
4059 void bdrv_iostatus_reset(BlockDriverState
*bs
)
4061 if (bdrv_iostatus_is_enabled(bs
)) {
4062 bs
->iostatus
= BLOCK_DEVICE_IO_STATUS_OK
;
4066 /* XXX: Today this is set by device models because it makes the implementation
4067 quite simple. However, the block layer knows about the error, so it's
4068 possible to implement this without device models being involved */
4069 void bdrv_iostatus_set_err(BlockDriverState
*bs
, int error
)
4071 if (bdrv_iostatus_is_enabled(bs
) &&
4072 bs
->iostatus
== BLOCK_DEVICE_IO_STATUS_OK
) {
4074 bs
->iostatus
= error
== ENOSPC
? BLOCK_DEVICE_IO_STATUS_NOSPACE
:
4075 BLOCK_DEVICE_IO_STATUS_FAILED
;
4080 bdrv_acct_start(BlockDriverState
*bs
, BlockAcctCookie
*cookie
, int64_t bytes
,
4081 enum BlockAcctType type
)
4083 assert(type
< BDRV_MAX_IOTYPE
);
4085 cookie
->bytes
= bytes
;
4086 cookie
->start_time_ns
= get_clock();
4087 cookie
->type
= type
;
4091 bdrv_acct_done(BlockDriverState
*bs
, BlockAcctCookie
*cookie
)
4093 assert(cookie
->type
< BDRV_MAX_IOTYPE
);
4095 bs
->nr_bytes
[cookie
->type
] += cookie
->bytes
;
4096 bs
->nr_ops
[cookie
->type
]++;
4097 bs
->total_time_ns
[cookie
->type
] += get_clock() - cookie
->start_time_ns
;
4100 int bdrv_img_create(const char *filename
, const char *fmt
,
4101 const char *base_filename
, const char *base_fmt
,
4102 char *options
, uint64_t img_size
, int flags
)
4104 QEMUOptionParameter
*param
= NULL
, *create_options
= NULL
;
4105 QEMUOptionParameter
*backing_fmt
, *backing_file
, *size
;
4106 BlockDriverState
*bs
= NULL
;
4107 BlockDriver
*drv
, *proto_drv
;
4108 BlockDriver
*backing_drv
= NULL
;
4111 /* Find driver and parse its options */
4112 drv
= bdrv_find_format(fmt
);
4114 error_report("Unknown file format '%s'", fmt
);
4119 proto_drv
= bdrv_find_protocol(filename
);
4121 error_report("Unknown protocol '%s'", filename
);
4126 create_options
= append_option_parameters(create_options
,
4127 drv
->create_options
);
4128 create_options
= append_option_parameters(create_options
,
4129 proto_drv
->create_options
);
4131 /* Create parameter list with default values */
4132 param
= parse_option_parameters("", create_options
, param
);
4134 set_option_parameter_int(param
, BLOCK_OPT_SIZE
, img_size
);
4136 /* Parse -o options */
4138 param
= parse_option_parameters(options
, create_options
, param
);
4139 if (param
== NULL
) {
4140 error_report("Invalid options for file format '%s'.", fmt
);
4146 if (base_filename
) {
4147 if (set_option_parameter(param
, BLOCK_OPT_BACKING_FILE
,
4149 error_report("Backing file not supported for file format '%s'",
4157 if (set_option_parameter(param
, BLOCK_OPT_BACKING_FMT
, base_fmt
)) {
4158 error_report("Backing file format not supported for file "
4159 "format '%s'", fmt
);
4165 backing_file
= get_option_parameter(param
, BLOCK_OPT_BACKING_FILE
);
4166 if (backing_file
&& backing_file
->value
.s
) {
4167 if (!strcmp(filename
, backing_file
->value
.s
)) {
4168 error_report("Error: Trying to create an image with the "
4169 "same filename as the backing file");
4175 backing_fmt
= get_option_parameter(param
, BLOCK_OPT_BACKING_FMT
);
4176 if (backing_fmt
&& backing_fmt
->value
.s
) {
4177 backing_drv
= bdrv_find_format(backing_fmt
->value
.s
);
4179 error_report("Unknown backing file format '%s'",
4180 backing_fmt
->value
.s
);
4186 // The size for the image must always be specified, with one exception:
4187 // If we are using a backing file, we can obtain the size from there
4188 size
= get_option_parameter(param
, BLOCK_OPT_SIZE
);
4189 if (size
&& size
->value
.n
== -1) {
4190 if (backing_file
&& backing_file
->value
.s
) {
4195 /* backing files always opened read-only */
4197 flags
& ~(BDRV_O_RDWR
| BDRV_O_SNAPSHOT
| BDRV_O_NO_BACKING
);
4201 ret
= bdrv_open(bs
, backing_file
->value
.s
, back_flags
, backing_drv
);
4203 error_report("Could not open '%s'", backing_file
->value
.s
);
4206 bdrv_get_geometry(bs
, &size
);
4209 snprintf(buf
, sizeof(buf
), "%" PRId64
, size
);
4210 set_option_parameter(param
, BLOCK_OPT_SIZE
, buf
);
4212 error_report("Image creation needs a size parameter");
4218 printf("Formatting '%s', fmt=%s ", filename
, fmt
);
4219 print_option_parameters(param
);
4222 ret
= bdrv_create(drv
, filename
, param
);
4225 if (ret
== -ENOTSUP
) {
4226 error_report("Formatting or formatting option not supported for "
4227 "file format '%s'", fmt
);
4228 } else if (ret
== -EFBIG
) {
4229 error_report("The image size is too large for file format '%s'",
4232 error_report("%s: error while creating %s: %s", filename
, fmt
,
4238 free_option_parameters(create_options
);
4239 free_option_parameters(param
);
4248 void *block_job_create(const BlockJobType
*job_type
, BlockDriverState
*bs
,
4249 int64_t speed
, BlockDriverCompletionFunc
*cb
,
4250 void *opaque
, Error
**errp
)
4254 if (bs
->job
|| bdrv_in_use(bs
)) {
4255 error_set(errp
, QERR_DEVICE_IN_USE
, bdrv_get_device_name(bs
));
4258 bdrv_set_in_use(bs
, 1);
4260 job
= g_malloc0(job_type
->instance_size
);
4261 job
->job_type
= job_type
;
4264 job
->opaque
= opaque
;
4268 /* Only set speed when necessary to avoid NotSupported error */
4270 Error
*local_err
= NULL
;
4272 block_job_set_speed(job
, speed
, &local_err
);
4273 if (error_is_set(&local_err
)) {
4276 bdrv_set_in_use(bs
, 0);
4277 error_propagate(errp
, local_err
);
4284 void block_job_complete(BlockJob
*job
, int ret
)
4286 BlockDriverState
*bs
= job
->bs
;
4288 assert(bs
->job
== job
);
4289 job
->cb(job
->opaque
, ret
);
4292 bdrv_set_in_use(bs
, 0);
4295 void block_job_set_speed(BlockJob
*job
, int64_t speed
, Error
**errp
)
4297 Error
*local_err
= NULL
;
4299 if (!job
->job_type
->set_speed
) {
4300 error_set(errp
, QERR_NOT_SUPPORTED
);
4303 job
->job_type
->set_speed(job
, speed
, &local_err
);
4304 if (error_is_set(&local_err
)) {
4305 error_propagate(errp
, local_err
);
4312 void block_job_cancel(BlockJob
*job
)
4314 job
->cancelled
= true;
4315 if (job
->co
&& !job
->busy
) {
4316 qemu_coroutine_enter(job
->co
, NULL
);
4320 bool block_job_is_cancelled(BlockJob
*job
)
4322 return job
->cancelled
;
4325 struct BlockCancelData
{
4327 BlockDriverCompletionFunc
*cb
;
4333 static void block_job_cancel_cb(void *opaque
, int ret
)
4335 struct BlockCancelData
*data
= opaque
;
4337 data
->cancelled
= block_job_is_cancelled(data
->job
);
4339 data
->cb(data
->opaque
, ret
);
4342 int block_job_cancel_sync(BlockJob
*job
)
4344 struct BlockCancelData data
;
4345 BlockDriverState
*bs
= job
->bs
;
4347 assert(bs
->job
== job
);
4349 /* Set up our own callback to store the result and chain to
4350 * the original callback.
4354 data
.opaque
= job
->opaque
;
4355 data
.ret
= -EINPROGRESS
;
4356 job
->cb
= block_job_cancel_cb
;
4357 job
->opaque
= &data
;
4358 block_job_cancel(job
);
4359 while (data
.ret
== -EINPROGRESS
) {
4362 return (data
.cancelled
&& data
.ret
== 0) ? -ECANCELED
: data
.ret
;
4365 void block_job_sleep_ns(BlockJob
*job
, QEMUClock
*clock
, int64_t ns
)
4367 /* Check cancellation *before* setting busy = false, too! */
4368 if (!block_job_is_cancelled(job
)) {
4370 co_sleep_ns(clock
, ns
);