2 * QEMU System Emulator block driver
4 * Copyright (c) 2003 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 #include "config-host.h"
25 #include "qemu-common.h"
28 #include "block_int.h"
31 #include "qemu-coroutine.h"
32 #include "qmp-commands.h"
33 #include "qemu-timer.h"
36 #include <sys/types.h>
38 #include <sys/ioctl.h>
39 #include <sys/queue.h>
49 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
52 BDRV_REQ_COPY_ON_READ
= 0x1,
53 BDRV_REQ_ZERO_WRITE
= 0x2,
56 static void bdrv_dev_change_media_cb(BlockDriverState
*bs
, bool load
);
57 static BlockDriverAIOCB
*bdrv_aio_readv_em(BlockDriverState
*bs
,
58 int64_t sector_num
, QEMUIOVector
*qiov
, int nb_sectors
,
59 BlockDriverCompletionFunc
*cb
, void *opaque
);
60 static BlockDriverAIOCB
*bdrv_aio_writev_em(BlockDriverState
*bs
,
61 int64_t sector_num
, QEMUIOVector
*qiov
, int nb_sectors
,
62 BlockDriverCompletionFunc
*cb
, void *opaque
);
63 static int coroutine_fn
bdrv_co_readv_em(BlockDriverState
*bs
,
64 int64_t sector_num
, int nb_sectors
,
66 static int coroutine_fn
bdrv_co_writev_em(BlockDriverState
*bs
,
67 int64_t sector_num
, int nb_sectors
,
69 static int coroutine_fn
bdrv_co_do_readv(BlockDriverState
*bs
,
70 int64_t sector_num
, int nb_sectors
, QEMUIOVector
*qiov
,
71 BdrvRequestFlags flags
);
72 static int coroutine_fn
bdrv_co_do_writev(BlockDriverState
*bs
,
73 int64_t sector_num
, int nb_sectors
, QEMUIOVector
*qiov
,
74 BdrvRequestFlags flags
);
75 static BlockDriverAIOCB
*bdrv_co_aio_rw_vector(BlockDriverState
*bs
,
79 BlockDriverCompletionFunc
*cb
,
82 static void coroutine_fn
bdrv_co_do_rw(void *opaque
);
83 static int coroutine_fn
bdrv_co_do_write_zeroes(BlockDriverState
*bs
,
84 int64_t sector_num
, int nb_sectors
);
86 static bool bdrv_exceed_bps_limits(BlockDriverState
*bs
, int nb_sectors
,
87 bool is_write
, double elapsed_time
, uint64_t *wait
);
88 static bool bdrv_exceed_iops_limits(BlockDriverState
*bs
, bool is_write
,
89 double elapsed_time
, uint64_t *wait
);
90 static bool bdrv_exceed_io_limits(BlockDriverState
*bs
, int nb_sectors
,
91 bool is_write
, int64_t *wait
);
93 static QTAILQ_HEAD(, BlockDriverState
) bdrv_states
=
94 QTAILQ_HEAD_INITIALIZER(bdrv_states
);
96 static QLIST_HEAD(, BlockDriver
) bdrv_drivers
=
97 QLIST_HEAD_INITIALIZER(bdrv_drivers
);
99 /* The device to use for VM snapshots */
100 static BlockDriverState
*bs_snapshots
;
102 /* If non-zero, use only whitelisted block drivers */
103 static int use_bdrv_whitelist
;
106 static int is_windows_drive_prefix(const char *filename
)
108 return (((filename
[0] >= 'a' && filename
[0] <= 'z') ||
109 (filename
[0] >= 'A' && filename
[0] <= 'Z')) &&
113 int is_windows_drive(const char *filename
)
115 if (is_windows_drive_prefix(filename
) &&
118 if (strstart(filename
, "\\\\.\\", NULL
) ||
119 strstart(filename
, "//./", NULL
))
125 /* throttling disk I/O limits */
126 void bdrv_io_limits_disable(BlockDriverState
*bs
)
128 bs
->io_limits_enabled
= false;
130 while (qemu_co_queue_next(&bs
->throttled_reqs
));
132 if (bs
->block_timer
) {
133 qemu_del_timer(bs
->block_timer
);
134 qemu_free_timer(bs
->block_timer
);
135 bs
->block_timer
= NULL
;
141 memset(&bs
->io_base
, 0, sizeof(bs
->io_base
));
144 static void bdrv_block_timer(void *opaque
)
146 BlockDriverState
*bs
= opaque
;
148 qemu_co_queue_next(&bs
->throttled_reqs
);
151 void bdrv_io_limits_enable(BlockDriverState
*bs
)
153 qemu_co_queue_init(&bs
->throttled_reqs
);
154 bs
->block_timer
= qemu_new_timer_ns(vm_clock
, bdrv_block_timer
, bs
);
155 bs
->slice_time
= 5 * BLOCK_IO_SLICE_TIME
;
156 bs
->slice_start
= qemu_get_clock_ns(vm_clock
);
157 bs
->slice_end
= bs
->slice_start
+ bs
->slice_time
;
158 memset(&bs
->io_base
, 0, sizeof(bs
->io_base
));
159 bs
->io_limits_enabled
= true;
162 bool bdrv_io_limits_enabled(BlockDriverState
*bs
)
164 BlockIOLimit
*io_limits
= &bs
->io_limits
;
165 return io_limits
->bps
[BLOCK_IO_LIMIT_READ
]
166 || io_limits
->bps
[BLOCK_IO_LIMIT_WRITE
]
167 || io_limits
->bps
[BLOCK_IO_LIMIT_TOTAL
]
168 || io_limits
->iops
[BLOCK_IO_LIMIT_READ
]
169 || io_limits
->iops
[BLOCK_IO_LIMIT_WRITE
]
170 || io_limits
->iops
[BLOCK_IO_LIMIT_TOTAL
];
173 static void bdrv_io_limits_intercept(BlockDriverState
*bs
,
174 bool is_write
, int nb_sectors
)
176 int64_t wait_time
= -1;
178 if (!qemu_co_queue_empty(&bs
->throttled_reqs
)) {
179 qemu_co_queue_wait(&bs
->throttled_reqs
);
182 /* In fact, we hope to keep each request's timing, in FIFO mode. The next
183 * throttled requests will not be dequeued until the current request is
184 * allowed to be serviced. So if the current request still exceeds the
185 * limits, it will be inserted to the head. All requests followed it will
186 * be still in throttled_reqs queue.
189 while (bdrv_exceed_io_limits(bs
, nb_sectors
, is_write
, &wait_time
)) {
190 qemu_mod_timer(bs
->block_timer
,
191 wait_time
+ qemu_get_clock_ns(vm_clock
));
192 qemu_co_queue_wait_insert_head(&bs
->throttled_reqs
);
195 qemu_co_queue_next(&bs
->throttled_reqs
);
198 /* check if the path starts with "<protocol>:" */
199 static int path_has_protocol(const char *path
)
202 if (is_windows_drive(path
) ||
203 is_windows_drive_prefix(path
)) {
208 return strchr(path
, ':') != NULL
;
211 int path_is_absolute(const char *path
)
215 /* specific case for names like: "\\.\d:" */
216 if (*path
== '/' || *path
== '\\')
219 p
= strchr(path
, ':');
225 return (*p
== '/' || *p
== '\\');
231 /* if filename is absolute, just copy it to dest. Otherwise, build a
232 path to it by considering it is relative to base_path. URL are
234 void path_combine(char *dest
, int dest_size
,
235 const char *base_path
,
236 const char *filename
)
243 if (path_is_absolute(filename
)) {
244 pstrcpy(dest
, dest_size
, filename
);
246 p
= strchr(base_path
, ':');
251 p1
= strrchr(base_path
, '/');
255 p2
= strrchr(base_path
, '\\');
267 if (len
> dest_size
- 1)
269 memcpy(dest
, base_path
, len
);
271 pstrcat(dest
, dest_size
, filename
);
275 void bdrv_register(BlockDriver
*bdrv
)
277 /* Block drivers without coroutine functions need emulation */
278 if (!bdrv
->bdrv_co_readv
) {
279 bdrv
->bdrv_co_readv
= bdrv_co_readv_em
;
280 bdrv
->bdrv_co_writev
= bdrv_co_writev_em
;
282 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
283 * the block driver lacks aio we need to emulate that too.
285 if (!bdrv
->bdrv_aio_readv
) {
286 /* add AIO emulation layer */
287 bdrv
->bdrv_aio_readv
= bdrv_aio_readv_em
;
288 bdrv
->bdrv_aio_writev
= bdrv_aio_writev_em
;
292 QLIST_INSERT_HEAD(&bdrv_drivers
, bdrv
, list
);
295 /* create a new block device (by default it is empty) */
296 BlockDriverState
*bdrv_new(const char *device_name
)
298 BlockDriverState
*bs
;
300 bs
= g_malloc0(sizeof(BlockDriverState
));
301 pstrcpy(bs
->device_name
, sizeof(bs
->device_name
), device_name
);
302 if (device_name
[0] != '\0') {
303 QTAILQ_INSERT_TAIL(&bdrv_states
, bs
, list
);
305 bdrv_iostatus_disable(bs
);
309 BlockDriver
*bdrv_find_format(const char *format_name
)
312 QLIST_FOREACH(drv1
, &bdrv_drivers
, list
) {
313 if (!strcmp(drv1
->format_name
, format_name
)) {
320 static int bdrv_is_whitelisted(BlockDriver
*drv
)
322 static const char *whitelist
[] = {
323 CONFIG_BDRV_WHITELIST
328 return 1; /* no whitelist, anything goes */
330 for (p
= whitelist
; *p
; p
++) {
331 if (!strcmp(drv
->format_name
, *p
)) {
338 BlockDriver
*bdrv_find_whitelisted_format(const char *format_name
)
340 BlockDriver
*drv
= bdrv_find_format(format_name
);
341 return drv
&& bdrv_is_whitelisted(drv
) ? drv
: NULL
;
344 typedef struct CreateCo
{
347 QEMUOptionParameter
*options
;
351 static void coroutine_fn
bdrv_create_co_entry(void *opaque
)
353 CreateCo
*cco
= opaque
;
356 cco
->ret
= cco
->drv
->bdrv_create(cco
->filename
, cco
->options
);
359 int bdrv_create(BlockDriver
*drv
, const char* filename
,
360 QEMUOptionParameter
*options
)
367 .filename
= g_strdup(filename
),
372 if (!drv
->bdrv_create
) {
376 if (qemu_in_coroutine()) {
377 /* Fast-path if already in coroutine context */
378 bdrv_create_co_entry(&cco
);
380 co
= qemu_coroutine_create(bdrv_create_co_entry
);
381 qemu_coroutine_enter(co
, &cco
);
382 while (cco
.ret
== NOT_DONE
) {
388 g_free(cco
.filename
);
393 int bdrv_create_file(const char* filename
, QEMUOptionParameter
*options
)
397 drv
= bdrv_find_protocol(filename
);
402 return bdrv_create(drv
, filename
, options
);
406 void get_tmp_filename(char *filename
, int size
)
408 char temp_dir
[MAX_PATH
];
410 GetTempPath(MAX_PATH
, temp_dir
);
411 GetTempFileName(temp_dir
, "qem", 0, filename
);
414 void get_tmp_filename(char *filename
, int size
)
418 /* XXX: race condition possible */
419 tmpdir
= getenv("TMPDIR");
422 snprintf(filename
, size
, "%s/vl.XXXXXX", tmpdir
);
423 fd
= mkstemp(filename
);
429 * Detect host devices. By convention, /dev/cdrom[N] is always
430 * recognized as a host CDROM.
432 static BlockDriver
*find_hdev_driver(const char *filename
)
434 int score_max
= 0, score
;
435 BlockDriver
*drv
= NULL
, *d
;
437 QLIST_FOREACH(d
, &bdrv_drivers
, list
) {
438 if (d
->bdrv_probe_device
) {
439 score
= d
->bdrv_probe_device(filename
);
440 if (score
> score_max
) {
450 BlockDriver
*bdrv_find_protocol(const char *filename
)
457 /* TODO Drivers without bdrv_file_open must be specified explicitly */
460 * XXX(hch): we really should not let host device detection
461 * override an explicit protocol specification, but moving this
462 * later breaks access to device names with colons in them.
463 * Thanks to the brain-dead persistent naming schemes on udev-
464 * based Linux systems those actually are quite common.
466 drv1
= find_hdev_driver(filename
);
471 if (!path_has_protocol(filename
)) {
472 return bdrv_find_format("file");
474 p
= strchr(filename
, ':');
477 if (len
> sizeof(protocol
) - 1)
478 len
= sizeof(protocol
) - 1;
479 memcpy(protocol
, filename
, len
);
480 protocol
[len
] = '\0';
481 QLIST_FOREACH(drv1
, &bdrv_drivers
, list
) {
482 if (drv1
->protocol_name
&&
483 !strcmp(drv1
->protocol_name
, protocol
)) {
490 static int find_image_format(const char *filename
, BlockDriver
**pdrv
)
492 int ret
, score
, score_max
;
493 BlockDriver
*drv1
, *drv
;
495 BlockDriverState
*bs
;
497 ret
= bdrv_file_open(&bs
, filename
, 0);
503 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
504 if (bs
->sg
|| !bdrv_is_inserted(bs
)) {
506 drv
= bdrv_find_format("raw");
514 ret
= bdrv_pread(bs
, 0, buf
, sizeof(buf
));
523 QLIST_FOREACH(drv1
, &bdrv_drivers
, list
) {
524 if (drv1
->bdrv_probe
) {
525 score
= drv1
->bdrv_probe(buf
, ret
, filename
);
526 if (score
> score_max
) {
540 * Set the current 'total_sectors' value
542 static int refresh_total_sectors(BlockDriverState
*bs
, int64_t hint
)
544 BlockDriver
*drv
= bs
->drv
;
546 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
550 /* query actual device if possible, otherwise just trust the hint */
551 if (drv
->bdrv_getlength
) {
552 int64_t length
= drv
->bdrv_getlength(bs
);
556 hint
= length
>> BDRV_SECTOR_BITS
;
559 bs
->total_sectors
= hint
;
564 * Set open flags for a given cache mode
566 * Return 0 on success, -1 if the cache mode was invalid.
568 int bdrv_parse_cache_flags(const char *mode
, int *flags
)
570 *flags
&= ~BDRV_O_CACHE_MASK
;
572 if (!strcmp(mode
, "off") || !strcmp(mode
, "none")) {
573 *flags
|= BDRV_O_NOCACHE
| BDRV_O_CACHE_WB
;
574 } else if (!strcmp(mode
, "directsync")) {
575 *flags
|= BDRV_O_NOCACHE
;
576 } else if (!strcmp(mode
, "writeback")) {
577 *flags
|= BDRV_O_CACHE_WB
;
578 } else if (!strcmp(mode
, "unsafe")) {
579 *flags
|= BDRV_O_CACHE_WB
;
580 *flags
|= BDRV_O_NO_FLUSH
;
581 } else if (!strcmp(mode
, "writethrough")) {
582 /* this is the default */
591 * The copy-on-read flag is actually a reference count so multiple users may
592 * use the feature without worrying about clobbering its previous state.
593 * Copy-on-read stays enabled until all users have called to disable it.
595 void bdrv_enable_copy_on_read(BlockDriverState
*bs
)
600 void bdrv_disable_copy_on_read(BlockDriverState
*bs
)
602 assert(bs
->copy_on_read
> 0);
607 * Common part for opening disk images and files
609 static int bdrv_open_common(BlockDriverState
*bs
, const char *filename
,
610 int flags
, BlockDriver
*drv
)
616 trace_bdrv_open_common(bs
, filename
, flags
, drv
->format_name
);
619 bs
->total_sectors
= 0;
623 bs
->open_flags
= flags
;
625 bs
->buffer_alignment
= 512;
627 assert(bs
->copy_on_read
== 0); /* bdrv_new() and bdrv_close() make it so */
628 if ((flags
& BDRV_O_RDWR
) && (flags
& BDRV_O_COPY_ON_READ
)) {
629 bdrv_enable_copy_on_read(bs
);
632 pstrcpy(bs
->filename
, sizeof(bs
->filename
), filename
);
633 bs
->backing_file
[0] = '\0';
635 if (use_bdrv_whitelist
&& !bdrv_is_whitelisted(drv
)) {
640 bs
->opaque
= g_malloc0(drv
->instance_size
);
642 bs
->enable_write_cache
= !!(flags
& BDRV_O_CACHE_WB
);
645 * Clear flags that are internal to the block layer before opening the
648 open_flags
= flags
& ~(BDRV_O_SNAPSHOT
| BDRV_O_NO_BACKING
);
651 * Snapshots should be writable.
653 if (bs
->is_temporary
) {
654 open_flags
|= BDRV_O_RDWR
;
657 bs
->keep_read_only
= bs
->read_only
= !(open_flags
& BDRV_O_RDWR
);
659 /* Open the image, either directly or using a protocol */
660 if (drv
->bdrv_file_open
) {
661 ret
= drv
->bdrv_file_open(bs
, filename
, open_flags
);
663 ret
= bdrv_file_open(&bs
->file
, filename
, open_flags
);
665 ret
= drv
->bdrv_open(bs
, open_flags
);
673 ret
= refresh_total_sectors(bs
, bs
->total_sectors
);
679 if (bs
->is_temporary
) {
687 bdrv_delete(bs
->file
);
697 * Opens a file using a protocol (file, host_device, nbd, ...)
699 int bdrv_file_open(BlockDriverState
**pbs
, const char *filename
, int flags
)
701 BlockDriverState
*bs
;
705 drv
= bdrv_find_protocol(filename
);
711 ret
= bdrv_open_common(bs
, filename
, flags
, drv
);
722 * Opens a disk image (raw, qcow2, vmdk, ...)
724 int bdrv_open(BlockDriverState
*bs
, const char *filename
, int flags
,
728 char tmp_filename
[PATH_MAX
];
730 if (flags
& BDRV_O_SNAPSHOT
) {
731 BlockDriverState
*bs1
;
734 BlockDriver
*bdrv_qcow2
;
735 QEMUOptionParameter
*options
;
736 char backing_filename
[PATH_MAX
];
738 /* if snapshot, we create a temporary backing file and open it
739 instead of opening 'filename' directly */
741 /* if there is a backing file, use it */
743 ret
= bdrv_open(bs1
, filename
, 0, drv
);
748 total_size
= bdrv_getlength(bs1
) & BDRV_SECTOR_MASK
;
750 if (bs1
->drv
&& bs1
->drv
->protocol_name
)
755 get_tmp_filename(tmp_filename
, sizeof(tmp_filename
));
757 /* Real path is meaningless for protocols */
759 snprintf(backing_filename
, sizeof(backing_filename
),
761 else if (!realpath(filename
, backing_filename
))
764 bdrv_qcow2
= bdrv_find_format("qcow2");
765 options
= parse_option_parameters("", bdrv_qcow2
->create_options
, NULL
);
767 set_option_parameter_int(options
, BLOCK_OPT_SIZE
, total_size
);
768 set_option_parameter(options
, BLOCK_OPT_BACKING_FILE
, backing_filename
);
770 set_option_parameter(options
, BLOCK_OPT_BACKING_FMT
,
774 ret
= bdrv_create(bdrv_qcow2
, tmp_filename
, options
);
775 free_option_parameters(options
);
780 filename
= tmp_filename
;
782 bs
->is_temporary
= 1;
785 /* Find the right image format driver */
787 ret
= find_image_format(filename
, &drv
);
791 goto unlink_and_fail
;
795 ret
= bdrv_open_common(bs
, filename
, flags
, drv
);
797 goto unlink_and_fail
;
800 /* If there is a backing file, use it */
801 if ((flags
& BDRV_O_NO_BACKING
) == 0 && bs
->backing_file
[0] != '\0') {
802 char backing_filename
[PATH_MAX
];
804 BlockDriver
*back_drv
= NULL
;
806 bs
->backing_hd
= bdrv_new("");
808 if (path_has_protocol(bs
->backing_file
)) {
809 pstrcpy(backing_filename
, sizeof(backing_filename
),
812 path_combine(backing_filename
, sizeof(backing_filename
),
813 filename
, bs
->backing_file
);
816 if (bs
->backing_format
[0] != '\0') {
817 back_drv
= bdrv_find_format(bs
->backing_format
);
820 /* backing files always opened read-only */
822 flags
& ~(BDRV_O_RDWR
| BDRV_O_SNAPSHOT
| BDRV_O_NO_BACKING
);
824 ret
= bdrv_open(bs
->backing_hd
, backing_filename
, back_flags
, back_drv
);
829 if (bs
->is_temporary
) {
830 bs
->backing_hd
->keep_read_only
= !(flags
& BDRV_O_RDWR
);
832 /* base image inherits from "parent" */
833 bs
->backing_hd
->keep_read_only
= bs
->keep_read_only
;
837 if (!bdrv_key_required(bs
)) {
838 bdrv_dev_change_media_cb(bs
, true);
841 /* throttling disk I/O limits */
842 if (bs
->io_limits_enabled
) {
843 bdrv_io_limits_enable(bs
);
849 if (bs
->is_temporary
) {
855 void bdrv_close(BlockDriverState
*bs
)
860 block_job_cancel_sync(bs
->job
);
864 if (bs
== bs_snapshots
) {
867 if (bs
->backing_hd
) {
868 bdrv_delete(bs
->backing_hd
);
869 bs
->backing_hd
= NULL
;
871 bs
->drv
->bdrv_close(bs
);
874 if (bs
->is_temporary
) {
875 unlink(bs
->filename
);
880 bs
->copy_on_read
= 0;
882 if (bs
->file
!= NULL
) {
883 bdrv_close(bs
->file
);
886 bdrv_dev_change_media_cb(bs
, false);
889 /*throttling disk I/O limits*/
890 if (bs
->io_limits_enabled
) {
891 bdrv_io_limits_disable(bs
);
895 void bdrv_close_all(void)
897 BlockDriverState
*bs
;
899 QTAILQ_FOREACH(bs
, &bdrv_states
, list
) {
905 * Wait for pending requests to complete across all BlockDriverStates
907 * This function does not flush data to disk, use bdrv_flush_all() for that
908 * after calling this function.
910 void bdrv_drain_all(void)
912 BlockDriverState
*bs
;
916 /* If requests are still pending there is a bug somewhere */
917 QTAILQ_FOREACH(bs
, &bdrv_states
, list
) {
918 assert(QLIST_EMPTY(&bs
->tracked_requests
));
919 assert(qemu_co_queue_empty(&bs
->throttled_reqs
));
923 /* make a BlockDriverState anonymous by removing from bdrv_state list.
924 Also, NULL terminate the device_name to prevent double remove */
925 void bdrv_make_anon(BlockDriverState
*bs
)
927 if (bs
->device_name
[0] != '\0') {
928 QTAILQ_REMOVE(&bdrv_states
, bs
, list
);
930 bs
->device_name
[0] = '\0';
934 * Add new bs contents at the top of an image chain while the chain is
935 * live, while keeping required fields on the top layer.
937 * This will modify the BlockDriverState fields, and swap contents
938 * between bs_new and bs_top. Both bs_new and bs_top are modified.
940 * bs_new is required to be anonymous.
942 * This function does not create any image files.
944 void bdrv_append(BlockDriverState
*bs_new
, BlockDriverState
*bs_top
)
946 BlockDriverState tmp
;
948 /* bs_new must be anonymous */
949 assert(bs_new
->device_name
[0] == '\0');
953 /* there are some fields that need to stay on the top layer: */
956 tmp
.dev_ops
= bs_top
->dev_ops
;
957 tmp
.dev_opaque
= bs_top
->dev_opaque
;
958 tmp
.dev
= bs_top
->dev
;
959 tmp
.buffer_alignment
= bs_top
->buffer_alignment
;
960 tmp
.copy_on_read
= bs_top
->copy_on_read
;
962 /* i/o timing parameters */
963 tmp
.slice_time
= bs_top
->slice_time
;
964 tmp
.slice_start
= bs_top
->slice_start
;
965 tmp
.slice_end
= bs_top
->slice_end
;
966 tmp
.io_limits
= bs_top
->io_limits
;
967 tmp
.io_base
= bs_top
->io_base
;
968 tmp
.throttled_reqs
= bs_top
->throttled_reqs
;
969 tmp
.block_timer
= bs_top
->block_timer
;
970 tmp
.io_limits_enabled
= bs_top
->io_limits_enabled
;
973 tmp
.cyls
= bs_top
->cyls
;
974 tmp
.heads
= bs_top
->heads
;
975 tmp
.secs
= bs_top
->secs
;
976 tmp
.translation
= bs_top
->translation
;
979 tmp
.on_read_error
= bs_top
->on_read_error
;
980 tmp
.on_write_error
= bs_top
->on_write_error
;
983 tmp
.iostatus_enabled
= bs_top
->iostatus_enabled
;
984 tmp
.iostatus
= bs_top
->iostatus
;
986 /* keep the same entry in bdrv_states */
987 pstrcpy(tmp
.device_name
, sizeof(tmp
.device_name
), bs_top
->device_name
);
988 tmp
.list
= bs_top
->list
;
990 /* The contents of 'tmp' will become bs_top, as we are
991 * swapping bs_new and bs_top contents. */
992 tmp
.backing_hd
= bs_new
;
993 pstrcpy(tmp
.backing_file
, sizeof(tmp
.backing_file
), bs_top
->filename
);
994 bdrv_get_format(bs_top
, tmp
.backing_format
, sizeof(tmp
.backing_format
));
996 /* swap contents of the fixed new bs and the current top */
1000 /* device_name[] was carried over from the old bs_top. bs_new
1001 * shouldn't be in bdrv_states, so we need to make device_name[]
1002 * reflect the anonymity of bs_new
1004 bs_new
->device_name
[0] = '\0';
1006 /* clear the copied fields in the new backing file */
1007 bdrv_detach_dev(bs_new
, bs_new
->dev
);
1009 qemu_co_queue_init(&bs_new
->throttled_reqs
);
1010 memset(&bs_new
->io_base
, 0, sizeof(bs_new
->io_base
));
1011 memset(&bs_new
->io_limits
, 0, sizeof(bs_new
->io_limits
));
1012 bdrv_iostatus_disable(bs_new
);
1014 /* we don't use bdrv_io_limits_disable() for this, because we don't want
1015 * to affect or delete the block_timer, as it has been moved to bs_top */
1016 bs_new
->io_limits_enabled
= false;
1017 bs_new
->block_timer
= NULL
;
1018 bs_new
->slice_time
= 0;
1019 bs_new
->slice_start
= 0;
1020 bs_new
->slice_end
= 0;
1023 void bdrv_delete(BlockDriverState
*bs
)
1027 assert(!bs
->in_use
);
1029 /* remove from list, if necessary */
1033 if (bs
->file
!= NULL
) {
1034 bdrv_delete(bs
->file
);
1037 assert(bs
!= bs_snapshots
);
1041 int bdrv_attach_dev(BlockDriverState
*bs
, void *dev
)
1042 /* TODO change to DeviceState *dev when all users are qdevified */
1048 bdrv_iostatus_reset(bs
);
1052 /* TODO qdevified devices don't use this, remove when devices are qdevified */
1053 void bdrv_attach_dev_nofail(BlockDriverState
*bs
, void *dev
)
1055 if (bdrv_attach_dev(bs
, dev
) < 0) {
1060 void bdrv_detach_dev(BlockDriverState
*bs
, void *dev
)
1061 /* TODO change to DeviceState *dev when all users are qdevified */
1063 assert(bs
->dev
== dev
);
1066 bs
->dev_opaque
= NULL
;
1067 bs
->buffer_alignment
= 512;
1070 /* TODO change to return DeviceState * when all users are qdevified */
1071 void *bdrv_get_attached_dev(BlockDriverState
*bs
)
1076 void bdrv_set_dev_ops(BlockDriverState
*bs
, const BlockDevOps
*ops
,
1080 bs
->dev_opaque
= opaque
;
1081 if (bdrv_dev_has_removable_media(bs
) && bs
== bs_snapshots
) {
1082 bs_snapshots
= NULL
;
1086 void bdrv_emit_qmp_error_event(const BlockDriverState
*bdrv
,
1087 BlockQMPEventAction action
, int is_read
)
1090 const char *action_str
;
1093 case BDRV_ACTION_REPORT
:
1094 action_str
= "report";
1096 case BDRV_ACTION_IGNORE
:
1097 action_str
= "ignore";
1099 case BDRV_ACTION_STOP
:
1100 action_str
= "stop";
1106 data
= qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
1109 is_read
? "read" : "write");
1110 monitor_protocol_event(QEVENT_BLOCK_IO_ERROR
, data
);
1112 qobject_decref(data
);
1115 static void bdrv_emit_qmp_eject_event(BlockDriverState
*bs
, bool ejected
)
1119 data
= qobject_from_jsonf("{ 'device': %s, 'tray-open': %i }",
1120 bdrv_get_device_name(bs
), ejected
);
1121 monitor_protocol_event(QEVENT_DEVICE_TRAY_MOVED
, data
);
1123 qobject_decref(data
);
1126 static void bdrv_dev_change_media_cb(BlockDriverState
*bs
, bool load
)
1128 if (bs
->dev_ops
&& bs
->dev_ops
->change_media_cb
) {
1129 bool tray_was_closed
= !bdrv_dev_is_tray_open(bs
);
1130 bs
->dev_ops
->change_media_cb(bs
->dev_opaque
, load
);
1131 if (tray_was_closed
) {
1133 bdrv_emit_qmp_eject_event(bs
, true);
1137 bdrv_emit_qmp_eject_event(bs
, false);
1142 bool bdrv_dev_has_removable_media(BlockDriverState
*bs
)
1144 return !bs
->dev
|| (bs
->dev_ops
&& bs
->dev_ops
->change_media_cb
);
1147 void bdrv_dev_eject_request(BlockDriverState
*bs
, bool force
)
1149 if (bs
->dev_ops
&& bs
->dev_ops
->eject_request_cb
) {
1150 bs
->dev_ops
->eject_request_cb(bs
->dev_opaque
, force
);
1154 bool bdrv_dev_is_tray_open(BlockDriverState
*bs
)
1156 if (bs
->dev_ops
&& bs
->dev_ops
->is_tray_open
) {
1157 return bs
->dev_ops
->is_tray_open(bs
->dev_opaque
);
1162 static void bdrv_dev_resize_cb(BlockDriverState
*bs
)
1164 if (bs
->dev_ops
&& bs
->dev_ops
->resize_cb
) {
1165 bs
->dev_ops
->resize_cb(bs
->dev_opaque
);
1169 bool bdrv_dev_is_medium_locked(BlockDriverState
*bs
)
1171 if (bs
->dev_ops
&& bs
->dev_ops
->is_medium_locked
) {
1172 return bs
->dev_ops
->is_medium_locked(bs
->dev_opaque
);
1178 * Run consistency checks on an image
1180 * Returns 0 if the check could be completed (it doesn't mean that the image is
1181 * free of errors) or -errno when an internal error occurred. The results of the
1182 * check are stored in res.
1184 int bdrv_check(BlockDriverState
*bs
, BdrvCheckResult
*res
)
1186 if (bs
->drv
->bdrv_check
== NULL
) {
1190 memset(res
, 0, sizeof(*res
));
1191 return bs
->drv
->bdrv_check(bs
, res
);
1194 #define COMMIT_BUF_SECTORS 2048
1196 /* commit COW file into the raw image */
1197 int bdrv_commit(BlockDriverState
*bs
)
1199 BlockDriver
*drv
= bs
->drv
;
1200 BlockDriver
*backing_drv
;
1201 int64_t sector
, total_sectors
;
1202 int n
, ro
, open_flags
;
1203 int ret
= 0, rw_ret
= 0;
1205 char filename
[1024];
1206 BlockDriverState
*bs_rw
, *bs_ro
;
1211 if (!bs
->backing_hd
) {
1215 if (bs
->backing_hd
->keep_read_only
) {
1219 if (bdrv_in_use(bs
) || bdrv_in_use(bs
->backing_hd
)) {
1223 backing_drv
= bs
->backing_hd
->drv
;
1224 ro
= bs
->backing_hd
->read_only
;
1225 strncpy(filename
, bs
->backing_hd
->filename
, sizeof(filename
));
1226 open_flags
= bs
->backing_hd
->open_flags
;
1230 bdrv_delete(bs
->backing_hd
);
1231 bs
->backing_hd
= NULL
;
1232 bs_rw
= bdrv_new("");
1233 rw_ret
= bdrv_open(bs_rw
, filename
, open_flags
| BDRV_O_RDWR
,
1237 /* try to re-open read-only */
1238 bs_ro
= bdrv_new("");
1239 ret
= bdrv_open(bs_ro
, filename
, open_flags
& ~BDRV_O_RDWR
,
1243 /* drive not functional anymore */
1247 bs
->backing_hd
= bs_ro
;
1250 bs
->backing_hd
= bs_rw
;
1253 total_sectors
= bdrv_getlength(bs
) >> BDRV_SECTOR_BITS
;
1254 buf
= g_malloc(COMMIT_BUF_SECTORS
* BDRV_SECTOR_SIZE
);
1256 for (sector
= 0; sector
< total_sectors
; sector
+= n
) {
1257 if (bdrv_is_allocated(bs
, sector
, COMMIT_BUF_SECTORS
, &n
)) {
1259 if (bdrv_read(bs
, sector
, buf
, n
) != 0) {
1264 if (bdrv_write(bs
->backing_hd
, sector
, buf
, n
) != 0) {
1271 if (drv
->bdrv_make_empty
) {
1272 ret
= drv
->bdrv_make_empty(bs
);
1277 * Make sure all data we wrote to the backing device is actually
1281 bdrv_flush(bs
->backing_hd
);
1288 bdrv_delete(bs
->backing_hd
);
1289 bs
->backing_hd
= NULL
;
1290 bs_ro
= bdrv_new("");
1291 ret
= bdrv_open(bs_ro
, filename
, open_flags
& ~BDRV_O_RDWR
,
1295 /* drive not functional anymore */
1299 bs
->backing_hd
= bs_ro
;
1300 bs
->backing_hd
->keep_read_only
= 0;
1306 int bdrv_commit_all(void)
1308 BlockDriverState
*bs
;
1310 QTAILQ_FOREACH(bs
, &bdrv_states
, list
) {
1311 int ret
= bdrv_commit(bs
);
1319 struct BdrvTrackedRequest
{
1320 BlockDriverState
*bs
;
1324 QLIST_ENTRY(BdrvTrackedRequest
) list
;
1325 Coroutine
*co
; /* owner, used for deadlock detection */
1326 CoQueue wait_queue
; /* coroutines blocked on this request */
1330 * Remove an active request from the tracked requests list
1332 * This function should be called when a tracked request is completing.
1334 static void tracked_request_end(BdrvTrackedRequest
*req
)
1336 QLIST_REMOVE(req
, list
);
1337 qemu_co_queue_restart_all(&req
->wait_queue
);
1341 * Add an active request to the tracked requests list
1343 static void tracked_request_begin(BdrvTrackedRequest
*req
,
1344 BlockDriverState
*bs
,
1346 int nb_sectors
, bool is_write
)
1348 *req
= (BdrvTrackedRequest
){
1350 .sector_num
= sector_num
,
1351 .nb_sectors
= nb_sectors
,
1352 .is_write
= is_write
,
1353 .co
= qemu_coroutine_self(),
1356 qemu_co_queue_init(&req
->wait_queue
);
1358 QLIST_INSERT_HEAD(&bs
->tracked_requests
, req
, list
);
1362 * Round a region to cluster boundaries
1364 static void round_to_clusters(BlockDriverState
*bs
,
1365 int64_t sector_num
, int nb_sectors
,
1366 int64_t *cluster_sector_num
,
1367 int *cluster_nb_sectors
)
1369 BlockDriverInfo bdi
;
1371 if (bdrv_get_info(bs
, &bdi
) < 0 || bdi
.cluster_size
== 0) {
1372 *cluster_sector_num
= sector_num
;
1373 *cluster_nb_sectors
= nb_sectors
;
1375 int64_t c
= bdi
.cluster_size
/ BDRV_SECTOR_SIZE
;
1376 *cluster_sector_num
= QEMU_ALIGN_DOWN(sector_num
, c
);
1377 *cluster_nb_sectors
= QEMU_ALIGN_UP(sector_num
- *cluster_sector_num
+
1382 static bool tracked_request_overlaps(BdrvTrackedRequest
*req
,
1383 int64_t sector_num
, int nb_sectors
) {
1385 if (sector_num
>= req
->sector_num
+ req
->nb_sectors
) {
1389 if (req
->sector_num
>= sector_num
+ nb_sectors
) {
1395 static void coroutine_fn
wait_for_overlapping_requests(BlockDriverState
*bs
,
1396 int64_t sector_num
, int nb_sectors
)
1398 BdrvTrackedRequest
*req
;
1399 int64_t cluster_sector_num
;
1400 int cluster_nb_sectors
;
1403 /* If we touch the same cluster it counts as an overlap. This guarantees
1404 * that allocating writes will be serialized and not race with each other
1405 * for the same cluster. For example, in copy-on-read it ensures that the
1406 * CoR read and write operations are atomic and guest writes cannot
1407 * interleave between them.
1409 round_to_clusters(bs
, sector_num
, nb_sectors
,
1410 &cluster_sector_num
, &cluster_nb_sectors
);
1414 QLIST_FOREACH(req
, &bs
->tracked_requests
, list
) {
1415 if (tracked_request_overlaps(req
, cluster_sector_num
,
1416 cluster_nb_sectors
)) {
1417 /* Hitting this means there was a reentrant request, for
1418 * example, a block driver issuing nested requests. This must
1419 * never happen since it means deadlock.
1421 assert(qemu_coroutine_self() != req
->co
);
1423 qemu_co_queue_wait(&req
->wait_queue
);
1434 * -EINVAL - backing format specified, but no file
1435 * -ENOSPC - can't update the backing file because no space is left in the
1437 * -ENOTSUP - format driver doesn't support changing the backing file
1439 int bdrv_change_backing_file(BlockDriverState
*bs
,
1440 const char *backing_file
, const char *backing_fmt
)
1442 BlockDriver
*drv
= bs
->drv
;
1444 if (drv
->bdrv_change_backing_file
!= NULL
) {
1445 return drv
->bdrv_change_backing_file(bs
, backing_file
, backing_fmt
);
1451 static int bdrv_check_byte_request(BlockDriverState
*bs
, int64_t offset
,
1456 if (!bdrv_is_inserted(bs
))
1462 len
= bdrv_getlength(bs
);
1467 if ((offset
> len
) || (len
- offset
< size
))
1473 static int bdrv_check_request(BlockDriverState
*bs
, int64_t sector_num
,
1476 return bdrv_check_byte_request(bs
, sector_num
* BDRV_SECTOR_SIZE
,
1477 nb_sectors
* BDRV_SECTOR_SIZE
);
1480 typedef struct RwCo
{
1481 BlockDriverState
*bs
;
1489 static void coroutine_fn
bdrv_rw_co_entry(void *opaque
)
1491 RwCo
*rwco
= opaque
;
1493 if (!rwco
->is_write
) {
1494 rwco
->ret
= bdrv_co_do_readv(rwco
->bs
, rwco
->sector_num
,
1495 rwco
->nb_sectors
, rwco
->qiov
, 0);
1497 rwco
->ret
= bdrv_co_do_writev(rwco
->bs
, rwco
->sector_num
,
1498 rwco
->nb_sectors
, rwco
->qiov
, 0);
1503 * Process a synchronous request using coroutines
1505 static int bdrv_rw_co(BlockDriverState
*bs
, int64_t sector_num
, uint8_t *buf
,
1506 int nb_sectors
, bool is_write
)
1509 struct iovec iov
= {
1510 .iov_base
= (void *)buf
,
1511 .iov_len
= nb_sectors
* BDRV_SECTOR_SIZE
,
1516 .sector_num
= sector_num
,
1517 .nb_sectors
= nb_sectors
,
1519 .is_write
= is_write
,
1523 qemu_iovec_init_external(&qiov
, &iov
, 1);
1526 * In sync call context, when the vcpu is blocked, this throttling timer
1527 * will not fire; so the I/O throttling function has to be disabled here
1528 * if it has been enabled.
1530 if (bs
->io_limits_enabled
) {
1531 fprintf(stderr
, "Disabling I/O throttling on '%s' due "
1532 "to synchronous I/O.\n", bdrv_get_device_name(bs
));
1533 bdrv_io_limits_disable(bs
);
1536 if (qemu_in_coroutine()) {
1537 /* Fast-path if already in coroutine context */
1538 bdrv_rw_co_entry(&rwco
);
1540 co
= qemu_coroutine_create(bdrv_rw_co_entry
);
1541 qemu_coroutine_enter(co
, &rwco
);
1542 while (rwco
.ret
== NOT_DONE
) {
1549 /* return < 0 if error. See bdrv_write() for the return codes */
1550 int bdrv_read(BlockDriverState
*bs
, int64_t sector_num
,
1551 uint8_t *buf
, int nb_sectors
)
1553 return bdrv_rw_co(bs
, sector_num
, buf
, nb_sectors
, false);
1556 static void set_dirty_bitmap(BlockDriverState
*bs
, int64_t sector_num
,
1557 int nb_sectors
, int dirty
)
1560 unsigned long val
, idx
, bit
;
1562 start
= sector_num
/ BDRV_SECTORS_PER_DIRTY_CHUNK
;
1563 end
= (sector_num
+ nb_sectors
- 1) / BDRV_SECTORS_PER_DIRTY_CHUNK
;
1565 for (; start
<= end
; start
++) {
1566 idx
= start
/ (sizeof(unsigned long) * 8);
1567 bit
= start
% (sizeof(unsigned long) * 8);
1568 val
= bs
->dirty_bitmap
[idx
];
1570 if (!(val
& (1UL << bit
))) {
1575 if (val
& (1UL << bit
)) {
1577 val
&= ~(1UL << bit
);
1580 bs
->dirty_bitmap
[idx
] = val
;
1584 /* Return < 0 if error. Important errors are:
1585 -EIO generic I/O error (may happen for all errors)
1586 -ENOMEDIUM No media inserted.
1587 -EINVAL Invalid sector number or nb_sectors
1588 -EACCES Trying to write a read-only device
1590 int bdrv_write(BlockDriverState
*bs
, int64_t sector_num
,
1591 const uint8_t *buf
, int nb_sectors
)
1593 return bdrv_rw_co(bs
, sector_num
, (uint8_t *)buf
, nb_sectors
, true);
1596 int bdrv_pread(BlockDriverState
*bs
, int64_t offset
,
1597 void *buf
, int count1
)
1599 uint8_t tmp_buf
[BDRV_SECTOR_SIZE
];
1600 int len
, nb_sectors
, count
;
1605 /* first read to align to sector start */
1606 len
= (BDRV_SECTOR_SIZE
- offset
) & (BDRV_SECTOR_SIZE
- 1);
1609 sector_num
= offset
>> BDRV_SECTOR_BITS
;
1611 if ((ret
= bdrv_read(bs
, sector_num
, tmp_buf
, 1)) < 0)
1613 memcpy(buf
, tmp_buf
+ (offset
& (BDRV_SECTOR_SIZE
- 1)), len
);
1621 /* read the sectors "in place" */
1622 nb_sectors
= count
>> BDRV_SECTOR_BITS
;
1623 if (nb_sectors
> 0) {
1624 if ((ret
= bdrv_read(bs
, sector_num
, buf
, nb_sectors
)) < 0)
1626 sector_num
+= nb_sectors
;
1627 len
= nb_sectors
<< BDRV_SECTOR_BITS
;
1632 /* add data from the last sector */
1634 if ((ret
= bdrv_read(bs
, sector_num
, tmp_buf
, 1)) < 0)
1636 memcpy(buf
, tmp_buf
, count
);
1641 int bdrv_pwrite(BlockDriverState
*bs
, int64_t offset
,
1642 const void *buf
, int count1
)
1644 uint8_t tmp_buf
[BDRV_SECTOR_SIZE
];
1645 int len
, nb_sectors
, count
;
1650 /* first write to align to sector start */
1651 len
= (BDRV_SECTOR_SIZE
- offset
) & (BDRV_SECTOR_SIZE
- 1);
1654 sector_num
= offset
>> BDRV_SECTOR_BITS
;
1656 if ((ret
= bdrv_read(bs
, sector_num
, tmp_buf
, 1)) < 0)
1658 memcpy(tmp_buf
+ (offset
& (BDRV_SECTOR_SIZE
- 1)), buf
, len
);
1659 if ((ret
= bdrv_write(bs
, sector_num
, tmp_buf
, 1)) < 0)
1668 /* write the sectors "in place" */
1669 nb_sectors
= count
>> BDRV_SECTOR_BITS
;
1670 if (nb_sectors
> 0) {
1671 if ((ret
= bdrv_write(bs
, sector_num
, buf
, nb_sectors
)) < 0)
1673 sector_num
+= nb_sectors
;
1674 len
= nb_sectors
<< BDRV_SECTOR_BITS
;
1679 /* add data from the last sector */
1681 if ((ret
= bdrv_read(bs
, sector_num
, tmp_buf
, 1)) < 0)
1683 memcpy(tmp_buf
, buf
, count
);
1684 if ((ret
= bdrv_write(bs
, sector_num
, tmp_buf
, 1)) < 0)
1691 * Writes to the file and ensures that no writes are reordered across this
1692 * request (acts as a barrier)
1694 * Returns 0 on success, -errno in error cases.
1696 int bdrv_pwrite_sync(BlockDriverState
*bs
, int64_t offset
,
1697 const void *buf
, int count
)
1701 ret
= bdrv_pwrite(bs
, offset
, buf
, count
);
1706 /* No flush needed for cache modes that use O_DSYNC */
1707 if ((bs
->open_flags
& BDRV_O_CACHE_WB
) != 0) {
1714 static int coroutine_fn
bdrv_co_do_copy_on_readv(BlockDriverState
*bs
,
1715 int64_t sector_num
, int nb_sectors
, QEMUIOVector
*qiov
)
1717 /* Perform I/O through a temporary buffer so that users who scribble over
1718 * their read buffer while the operation is in progress do not end up
1719 * modifying the image file. This is critical for zero-copy guest I/O
1720 * where anything might happen inside guest memory.
1722 void *bounce_buffer
;
1724 BlockDriver
*drv
= bs
->drv
;
1726 QEMUIOVector bounce_qiov
;
1727 int64_t cluster_sector_num
;
1728 int cluster_nb_sectors
;
1732 /* Cover entire cluster so no additional backing file I/O is required when
1733 * allocating cluster in the image file.
1735 round_to_clusters(bs
, sector_num
, nb_sectors
,
1736 &cluster_sector_num
, &cluster_nb_sectors
);
1738 trace_bdrv_co_do_copy_on_readv(bs
, sector_num
, nb_sectors
,
1739 cluster_sector_num
, cluster_nb_sectors
);
1741 iov
.iov_len
= cluster_nb_sectors
* BDRV_SECTOR_SIZE
;
1742 iov
.iov_base
= bounce_buffer
= qemu_blockalign(bs
, iov
.iov_len
);
1743 qemu_iovec_init_external(&bounce_qiov
, &iov
, 1);
1745 ret
= drv
->bdrv_co_readv(bs
, cluster_sector_num
, cluster_nb_sectors
,
1751 if (drv
->bdrv_co_write_zeroes
&&
1752 buffer_is_zero(bounce_buffer
, iov
.iov_len
)) {
1753 ret
= bdrv_co_do_write_zeroes(bs
, cluster_sector_num
,
1754 cluster_nb_sectors
);
1756 ret
= drv
->bdrv_co_writev(bs
, cluster_sector_num
, cluster_nb_sectors
,
1761 /* It might be okay to ignore write errors for guest requests. If this
1762 * is a deliberate copy-on-read then we don't want to ignore the error.
1763 * Simply report it in all cases.
1768 skip_bytes
= (sector_num
- cluster_sector_num
) * BDRV_SECTOR_SIZE
;
1769 qemu_iovec_from_buffer(qiov
, bounce_buffer
+ skip_bytes
,
1770 nb_sectors
* BDRV_SECTOR_SIZE
);
1773 qemu_vfree(bounce_buffer
);
1778 * Handle a read request in coroutine context
1780 static int coroutine_fn
bdrv_co_do_readv(BlockDriverState
*bs
,
1781 int64_t sector_num
, int nb_sectors
, QEMUIOVector
*qiov
,
1782 BdrvRequestFlags flags
)
1784 BlockDriver
*drv
= bs
->drv
;
1785 BdrvTrackedRequest req
;
1791 if (bdrv_check_request(bs
, sector_num
, nb_sectors
)) {
1795 /* throttling disk read I/O */
1796 if (bs
->io_limits_enabled
) {
1797 bdrv_io_limits_intercept(bs
, false, nb_sectors
);
1800 if (bs
->copy_on_read
) {
1801 flags
|= BDRV_REQ_COPY_ON_READ
;
1803 if (flags
& BDRV_REQ_COPY_ON_READ
) {
1804 bs
->copy_on_read_in_flight
++;
1807 if (bs
->copy_on_read_in_flight
) {
1808 wait_for_overlapping_requests(bs
, sector_num
, nb_sectors
);
1811 tracked_request_begin(&req
, bs
, sector_num
, nb_sectors
, false);
1813 if (flags
& BDRV_REQ_COPY_ON_READ
) {
1816 ret
= bdrv_co_is_allocated(bs
, sector_num
, nb_sectors
, &pnum
);
1821 if (!ret
|| pnum
!= nb_sectors
) {
1822 ret
= bdrv_co_do_copy_on_readv(bs
, sector_num
, nb_sectors
, qiov
);
1827 ret
= drv
->bdrv_co_readv(bs
, sector_num
, nb_sectors
, qiov
);
1830 tracked_request_end(&req
);
1832 if (flags
& BDRV_REQ_COPY_ON_READ
) {
1833 bs
->copy_on_read_in_flight
--;
1839 int coroutine_fn
bdrv_co_readv(BlockDriverState
*bs
, int64_t sector_num
,
1840 int nb_sectors
, QEMUIOVector
*qiov
)
1842 trace_bdrv_co_readv(bs
, sector_num
, nb_sectors
);
1844 return bdrv_co_do_readv(bs
, sector_num
, nb_sectors
, qiov
, 0);
1847 int coroutine_fn
bdrv_co_copy_on_readv(BlockDriverState
*bs
,
1848 int64_t sector_num
, int nb_sectors
, QEMUIOVector
*qiov
)
1850 trace_bdrv_co_copy_on_readv(bs
, sector_num
, nb_sectors
);
1852 return bdrv_co_do_readv(bs
, sector_num
, nb_sectors
, qiov
,
1853 BDRV_REQ_COPY_ON_READ
);
1856 static int coroutine_fn
bdrv_co_do_write_zeroes(BlockDriverState
*bs
,
1857 int64_t sector_num
, int nb_sectors
)
1859 BlockDriver
*drv
= bs
->drv
;
1864 /* TODO Emulate only part of misaligned requests instead of letting block
1865 * drivers return -ENOTSUP and emulate everything */
1867 /* First try the efficient write zeroes operation */
1868 if (drv
->bdrv_co_write_zeroes
) {
1869 ret
= drv
->bdrv_co_write_zeroes(bs
, sector_num
, nb_sectors
);
1870 if (ret
!= -ENOTSUP
) {
1875 /* Fall back to bounce buffer if write zeroes is unsupported */
1876 iov
.iov_len
= nb_sectors
* BDRV_SECTOR_SIZE
;
1877 iov
.iov_base
= qemu_blockalign(bs
, iov
.iov_len
);
1878 memset(iov
.iov_base
, 0, iov
.iov_len
);
1879 qemu_iovec_init_external(&qiov
, &iov
, 1);
1881 ret
= drv
->bdrv_co_writev(bs
, sector_num
, nb_sectors
, &qiov
);
1883 qemu_vfree(iov
.iov_base
);
1888 * Handle a write request in coroutine context
1890 static int coroutine_fn
bdrv_co_do_writev(BlockDriverState
*bs
,
1891 int64_t sector_num
, int nb_sectors
, QEMUIOVector
*qiov
,
1892 BdrvRequestFlags flags
)
1894 BlockDriver
*drv
= bs
->drv
;
1895 BdrvTrackedRequest req
;
1901 if (bs
->read_only
) {
1904 if (bdrv_check_request(bs
, sector_num
, nb_sectors
)) {
1908 /* throttling disk write I/O */
1909 if (bs
->io_limits_enabled
) {
1910 bdrv_io_limits_intercept(bs
, true, nb_sectors
);
1913 if (bs
->copy_on_read_in_flight
) {
1914 wait_for_overlapping_requests(bs
, sector_num
, nb_sectors
);
1917 tracked_request_begin(&req
, bs
, sector_num
, nb_sectors
, true);
1919 if (flags
& BDRV_REQ_ZERO_WRITE
) {
1920 ret
= bdrv_co_do_write_zeroes(bs
, sector_num
, nb_sectors
);
1922 ret
= drv
->bdrv_co_writev(bs
, sector_num
, nb_sectors
, qiov
);
1925 if (bs
->dirty_bitmap
) {
1926 set_dirty_bitmap(bs
, sector_num
, nb_sectors
, 1);
1929 if (bs
->wr_highest_sector
< sector_num
+ nb_sectors
- 1) {
1930 bs
->wr_highest_sector
= sector_num
+ nb_sectors
- 1;
1933 tracked_request_end(&req
);
1938 int coroutine_fn
bdrv_co_writev(BlockDriverState
*bs
, int64_t sector_num
,
1939 int nb_sectors
, QEMUIOVector
*qiov
)
1941 trace_bdrv_co_writev(bs
, sector_num
, nb_sectors
);
1943 return bdrv_co_do_writev(bs
, sector_num
, nb_sectors
, qiov
, 0);
1946 int coroutine_fn
bdrv_co_write_zeroes(BlockDriverState
*bs
,
1947 int64_t sector_num
, int nb_sectors
)
1949 trace_bdrv_co_write_zeroes(bs
, sector_num
, nb_sectors
);
1951 return bdrv_co_do_writev(bs
, sector_num
, nb_sectors
, NULL
,
1952 BDRV_REQ_ZERO_WRITE
);
1956 * Truncate file to 'offset' bytes (needed only for file protocols)
1958 int bdrv_truncate(BlockDriverState
*bs
, int64_t offset
)
1960 BlockDriver
*drv
= bs
->drv
;
1964 if (!drv
->bdrv_truncate
)
1968 if (bdrv_in_use(bs
))
1970 ret
= drv
->bdrv_truncate(bs
, offset
);
1972 ret
= refresh_total_sectors(bs
, offset
>> BDRV_SECTOR_BITS
);
1973 bdrv_dev_resize_cb(bs
);
1979 * Length of a allocated file in bytes. Sparse files are counted by actual
1980 * allocated space. Return < 0 if error or unknown.
1982 int64_t bdrv_get_allocated_file_size(BlockDriverState
*bs
)
1984 BlockDriver
*drv
= bs
->drv
;
1988 if (drv
->bdrv_get_allocated_file_size
) {
1989 return drv
->bdrv_get_allocated_file_size(bs
);
1992 return bdrv_get_allocated_file_size(bs
->file
);
1998 * Length of a file in bytes. Return < 0 if error or unknown.
2000 int64_t bdrv_getlength(BlockDriverState
*bs
)
2002 BlockDriver
*drv
= bs
->drv
;
2006 if (bs
->growable
|| bdrv_dev_has_removable_media(bs
)) {
2007 if (drv
->bdrv_getlength
) {
2008 return drv
->bdrv_getlength(bs
);
2011 return bs
->total_sectors
* BDRV_SECTOR_SIZE
;
2014 /* return 0 as number of sectors if no device present or error */
2015 void bdrv_get_geometry(BlockDriverState
*bs
, uint64_t *nb_sectors_ptr
)
2018 length
= bdrv_getlength(bs
);
2022 length
= length
>> BDRV_SECTOR_BITS
;
2023 *nb_sectors_ptr
= length
;
2027 uint8_t boot_ind
; /* 0x80 - active */
2028 uint8_t head
; /* starting head */
2029 uint8_t sector
; /* starting sector */
2030 uint8_t cyl
; /* starting cylinder */
2031 uint8_t sys_ind
; /* What partition type */
2032 uint8_t end_head
; /* end head */
2033 uint8_t end_sector
; /* end sector */
2034 uint8_t end_cyl
; /* end cylinder */
2035 uint32_t start_sect
; /* starting sector counting from 0 */
2036 uint32_t nr_sects
; /* nr of sectors in partition */
2039 /* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
2040 static int guess_disk_lchs(BlockDriverState
*bs
,
2041 int *pcylinders
, int *pheads
, int *psectors
)
2043 uint8_t buf
[BDRV_SECTOR_SIZE
];
2044 int ret
, i
, heads
, sectors
, cylinders
;
2045 struct partition
*p
;
2047 uint64_t nb_sectors
;
2050 bdrv_get_geometry(bs
, &nb_sectors
);
2053 * The function will be invoked during startup not only in sync I/O mode,
2054 * but also in async I/O mode. So the I/O throttling function has to
2055 * be disabled temporarily here, not permanently.
2057 enabled
= bs
->io_limits_enabled
;
2058 bs
->io_limits_enabled
= false;
2059 ret
= bdrv_read(bs
, 0, buf
, 1);
2060 bs
->io_limits_enabled
= enabled
;
2063 /* test msdos magic */
2064 if (buf
[510] != 0x55 || buf
[511] != 0xaa)
2066 for(i
= 0; i
< 4; i
++) {
2067 p
= ((struct partition
*)(buf
+ 0x1be)) + i
;
2068 nr_sects
= le32_to_cpu(p
->nr_sects
);
2069 if (nr_sects
&& p
->end_head
) {
2070 /* We make the assumption that the partition terminates on
2071 a cylinder boundary */
2072 heads
= p
->end_head
+ 1;
2073 sectors
= p
->end_sector
& 63;
2076 cylinders
= nb_sectors
/ (heads
* sectors
);
2077 if (cylinders
< 1 || cylinders
> 16383)
2080 *psectors
= sectors
;
2081 *pcylinders
= cylinders
;
2083 printf("guessed geometry: LCHS=%d %d %d\n",
2084 cylinders
, heads
, sectors
);
2092 void bdrv_guess_geometry(BlockDriverState
*bs
, int *pcyls
, int *pheads
, int *psecs
)
2094 int translation
, lba_detected
= 0;
2095 int cylinders
, heads
, secs
;
2096 uint64_t nb_sectors
;
2098 /* if a geometry hint is available, use it */
2099 bdrv_get_geometry(bs
, &nb_sectors
);
2100 bdrv_get_geometry_hint(bs
, &cylinders
, &heads
, &secs
);
2101 translation
= bdrv_get_translation_hint(bs
);
2102 if (cylinders
!= 0) {
2107 if (guess_disk_lchs(bs
, &cylinders
, &heads
, &secs
) == 0) {
2109 /* if heads > 16, it means that a BIOS LBA
2110 translation was active, so the default
2111 hardware geometry is OK */
2113 goto default_geometry
;
2118 /* disable any translation to be in sync with
2119 the logical geometry */
2120 if (translation
== BIOS_ATA_TRANSLATION_AUTO
) {
2121 bdrv_set_translation_hint(bs
,
2122 BIOS_ATA_TRANSLATION_NONE
);
2127 /* if no geometry, use a standard physical disk geometry */
2128 cylinders
= nb_sectors
/ (16 * 63);
2130 if (cylinders
> 16383)
2132 else if (cylinders
< 2)
2137 if ((lba_detected
== 1) && (translation
== BIOS_ATA_TRANSLATION_AUTO
)) {
2138 if ((*pcyls
* *pheads
) <= 131072) {
2139 bdrv_set_translation_hint(bs
,
2140 BIOS_ATA_TRANSLATION_LARGE
);
2142 bdrv_set_translation_hint(bs
,
2143 BIOS_ATA_TRANSLATION_LBA
);
2147 bdrv_set_geometry_hint(bs
, *pcyls
, *pheads
, *psecs
);
2151 void bdrv_set_geometry_hint(BlockDriverState
*bs
,
2152 int cyls
, int heads
, int secs
)
2159 void bdrv_set_translation_hint(BlockDriverState
*bs
, int translation
)
2161 bs
->translation
= translation
;
2164 void bdrv_get_geometry_hint(BlockDriverState
*bs
,
2165 int *pcyls
, int *pheads
, int *psecs
)
2168 *pheads
= bs
->heads
;
2172 /* throttling disk io limits */
2173 void bdrv_set_io_limits(BlockDriverState
*bs
,
2174 BlockIOLimit
*io_limits
)
2176 bs
->io_limits
= *io_limits
;
2177 bs
->io_limits_enabled
= bdrv_io_limits_enabled(bs
);
2180 /* Recognize floppy formats */
2181 typedef struct FDFormat
{
2189 static const FDFormat fd_formats
[] = {
2190 /* First entry is default format */
2191 /* 1.44 MB 3"1/2 floppy disks */
2192 { FDRIVE_DRV_144
, 18, 80, 1, FDRIVE_RATE_500K
, },
2193 { FDRIVE_DRV_144
, 20, 80, 1, FDRIVE_RATE_500K
, },
2194 { FDRIVE_DRV_144
, 21, 80, 1, FDRIVE_RATE_500K
, },
2195 { FDRIVE_DRV_144
, 21, 82, 1, FDRIVE_RATE_500K
, },
2196 { FDRIVE_DRV_144
, 21, 83, 1, FDRIVE_RATE_500K
, },
2197 { FDRIVE_DRV_144
, 22, 80, 1, FDRIVE_RATE_500K
, },
2198 { FDRIVE_DRV_144
, 23, 80, 1, FDRIVE_RATE_500K
, },
2199 { FDRIVE_DRV_144
, 24, 80, 1, FDRIVE_RATE_500K
, },
2200 /* 2.88 MB 3"1/2 floppy disks */
2201 { FDRIVE_DRV_288
, 36, 80, 1, FDRIVE_RATE_1M
, },
2202 { FDRIVE_DRV_288
, 39, 80, 1, FDRIVE_RATE_1M
, },
2203 { FDRIVE_DRV_288
, 40, 80, 1, FDRIVE_RATE_1M
, },
2204 { FDRIVE_DRV_288
, 44, 80, 1, FDRIVE_RATE_1M
, },
2205 { FDRIVE_DRV_288
, 48, 80, 1, FDRIVE_RATE_1M
, },
2206 /* 720 kB 3"1/2 floppy disks */
2207 { FDRIVE_DRV_144
, 9, 80, 1, FDRIVE_RATE_250K
, },
2208 { FDRIVE_DRV_144
, 10, 80, 1, FDRIVE_RATE_250K
, },
2209 { FDRIVE_DRV_144
, 10, 82, 1, FDRIVE_RATE_250K
, },
2210 { FDRIVE_DRV_144
, 10, 83, 1, FDRIVE_RATE_250K
, },
2211 { FDRIVE_DRV_144
, 13, 80, 1, FDRIVE_RATE_250K
, },
2212 { FDRIVE_DRV_144
, 14, 80, 1, FDRIVE_RATE_250K
, },
2213 /* 1.2 MB 5"1/4 floppy disks */
2214 { FDRIVE_DRV_120
, 15, 80, 1, FDRIVE_RATE_500K
, },
2215 { FDRIVE_DRV_120
, 18, 80, 1, FDRIVE_RATE_500K
, },
2216 { FDRIVE_DRV_120
, 18, 82, 1, FDRIVE_RATE_500K
, },
2217 { FDRIVE_DRV_120
, 18, 83, 1, FDRIVE_RATE_500K
, },
2218 { FDRIVE_DRV_120
, 20, 80, 1, FDRIVE_RATE_500K
, },
2219 /* 720 kB 5"1/4 floppy disks */
2220 { FDRIVE_DRV_120
, 9, 80, 1, FDRIVE_RATE_250K
, },
2221 { FDRIVE_DRV_120
, 11, 80, 1, FDRIVE_RATE_250K
, },
2222 /* 360 kB 5"1/4 floppy disks */
2223 { FDRIVE_DRV_120
, 9, 40, 1, FDRIVE_RATE_300K
, },
2224 { FDRIVE_DRV_120
, 9, 40, 0, FDRIVE_RATE_300K
, },
2225 { FDRIVE_DRV_120
, 10, 41, 1, FDRIVE_RATE_300K
, },
2226 { FDRIVE_DRV_120
, 10, 42, 1, FDRIVE_RATE_300K
, },
2227 /* 320 kB 5"1/4 floppy disks */
2228 { FDRIVE_DRV_120
, 8, 40, 1, FDRIVE_RATE_250K
, },
2229 { FDRIVE_DRV_120
, 8, 40, 0, FDRIVE_RATE_250K
, },
2230 /* 360 kB must match 5"1/4 better than 3"1/2... */
2231 { FDRIVE_DRV_144
, 9, 80, 0, FDRIVE_RATE_250K
, },
2233 { FDRIVE_DRV_NONE
, -1, -1, 0, 0, },
2236 void bdrv_get_floppy_geometry_hint(BlockDriverState
*bs
, int *nb_heads
,
2237 int *max_track
, int *last_sect
,
2238 FDriveType drive_in
, FDriveType
*drive
,
2241 const FDFormat
*parse
;
2242 uint64_t nb_sectors
, size
;
2243 int i
, first_match
, match
;
2245 bdrv_get_geometry_hint(bs
, nb_heads
, max_track
, last_sect
);
2246 if (*nb_heads
!= 0 && *max_track
!= 0 && *last_sect
!= 0) {
2247 /* User defined disk */
2248 *rate
= FDRIVE_RATE_500K
;
2250 bdrv_get_geometry(bs
, &nb_sectors
);
2253 for (i
= 0; ; i
++) {
2254 parse
= &fd_formats
[i
];
2255 if (parse
->drive
== FDRIVE_DRV_NONE
) {
2258 if (drive_in
== parse
->drive
||
2259 drive_in
== FDRIVE_DRV_NONE
) {
2260 size
= (parse
->max_head
+ 1) * parse
->max_track
*
2262 if (nb_sectors
== size
) {
2266 if (first_match
== -1) {
2272 if (first_match
== -1) {
2275 match
= first_match
;
2277 parse
= &fd_formats
[match
];
2279 *nb_heads
= parse
->max_head
+ 1;
2280 *max_track
= parse
->max_track
;
2281 *last_sect
= parse
->last_sect
;
2282 *drive
= parse
->drive
;
2283 *rate
= parse
->rate
;
2287 int bdrv_get_translation_hint(BlockDriverState
*bs
)
2289 return bs
->translation
;
2292 void bdrv_set_on_error(BlockDriverState
*bs
, BlockErrorAction on_read_error
,
2293 BlockErrorAction on_write_error
)
2295 bs
->on_read_error
= on_read_error
;
2296 bs
->on_write_error
= on_write_error
;
2299 BlockErrorAction
bdrv_get_on_error(BlockDriverState
*bs
, int is_read
)
2301 return is_read
? bs
->on_read_error
: bs
->on_write_error
;
2304 int bdrv_is_read_only(BlockDriverState
*bs
)
2306 return bs
->read_only
;
2309 int bdrv_is_sg(BlockDriverState
*bs
)
2314 int bdrv_enable_write_cache(BlockDriverState
*bs
)
2316 return bs
->enable_write_cache
;
2319 int bdrv_is_encrypted(BlockDriverState
*bs
)
2321 if (bs
->backing_hd
&& bs
->backing_hd
->encrypted
)
2323 return bs
->encrypted
;
2326 int bdrv_key_required(BlockDriverState
*bs
)
2328 BlockDriverState
*backing_hd
= bs
->backing_hd
;
2330 if (backing_hd
&& backing_hd
->encrypted
&& !backing_hd
->valid_key
)
2332 return (bs
->encrypted
&& !bs
->valid_key
);
2335 int bdrv_set_key(BlockDriverState
*bs
, const char *key
)
2338 if (bs
->backing_hd
&& bs
->backing_hd
->encrypted
) {
2339 ret
= bdrv_set_key(bs
->backing_hd
, key
);
2345 if (!bs
->encrypted
) {
2347 } else if (!bs
->drv
|| !bs
->drv
->bdrv_set_key
) {
2350 ret
= bs
->drv
->bdrv_set_key(bs
, key
);
2353 } else if (!bs
->valid_key
) {
2355 /* call the change callback now, we skipped it on open */
2356 bdrv_dev_change_media_cb(bs
, true);
2361 void bdrv_get_format(BlockDriverState
*bs
, char *buf
, int buf_size
)
2366 pstrcpy(buf
, buf_size
, bs
->drv
->format_name
);
2370 void bdrv_iterate_format(void (*it
)(void *opaque
, const char *name
),
2375 QLIST_FOREACH(drv
, &bdrv_drivers
, list
) {
2376 it(opaque
, drv
->format_name
);
2380 BlockDriverState
*bdrv_find(const char *name
)
2382 BlockDriverState
*bs
;
2384 QTAILQ_FOREACH(bs
, &bdrv_states
, list
) {
2385 if (!strcmp(name
, bs
->device_name
)) {
2392 BlockDriverState
*bdrv_next(BlockDriverState
*bs
)
2395 return QTAILQ_FIRST(&bdrv_states
);
2397 return QTAILQ_NEXT(bs
, list
);
2400 void bdrv_iterate(void (*it
)(void *opaque
, BlockDriverState
*bs
), void *opaque
)
2402 BlockDriverState
*bs
;
2404 QTAILQ_FOREACH(bs
, &bdrv_states
, list
) {
2409 const char *bdrv_get_device_name(BlockDriverState
*bs
)
2411 return bs
->device_name
;
2414 void bdrv_flush_all(void)
2416 BlockDriverState
*bs
;
2418 QTAILQ_FOREACH(bs
, &bdrv_states
, list
) {
2423 int bdrv_has_zero_init(BlockDriverState
*bs
)
2427 if (bs
->drv
->bdrv_has_zero_init
) {
2428 return bs
->drv
->bdrv_has_zero_init(bs
);
2434 typedef struct BdrvCoIsAllocatedData
{
2435 BlockDriverState
*bs
;
2441 } BdrvCoIsAllocatedData
;
2444 * Returns true iff the specified sector is present in the disk image. Drivers
2445 * not implementing the functionality are assumed to not support backing files,
2446 * hence all their sectors are reported as allocated.
2448 * If 'sector_num' is beyond the end of the disk image the return value is 0
2449 * and 'pnum' is set to 0.
2451 * 'pnum' is set to the number of sectors (including and immediately following
2452 * the specified sector) that are known to be in the same
2453 * allocated/unallocated state.
2455 * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
2456 * beyond the end of the disk image it will be clamped.
2458 int coroutine_fn
bdrv_co_is_allocated(BlockDriverState
*bs
, int64_t sector_num
,
2459 int nb_sectors
, int *pnum
)
2463 if (sector_num
>= bs
->total_sectors
) {
2468 n
= bs
->total_sectors
- sector_num
;
2469 if (n
< nb_sectors
) {
2473 if (!bs
->drv
->bdrv_co_is_allocated
) {
2478 return bs
->drv
->bdrv_co_is_allocated(bs
, sector_num
, nb_sectors
, pnum
);
2481 /* Coroutine wrapper for bdrv_is_allocated() */
2482 static void coroutine_fn
bdrv_is_allocated_co_entry(void *opaque
)
2484 BdrvCoIsAllocatedData
*data
= opaque
;
2485 BlockDriverState
*bs
= data
->bs
;
2487 data
->ret
= bdrv_co_is_allocated(bs
, data
->sector_num
, data
->nb_sectors
,
2493 * Synchronous wrapper around bdrv_co_is_allocated().
2495 * See bdrv_co_is_allocated() for details.
2497 int bdrv_is_allocated(BlockDriverState
*bs
, int64_t sector_num
, int nb_sectors
,
2501 BdrvCoIsAllocatedData data
= {
2503 .sector_num
= sector_num
,
2504 .nb_sectors
= nb_sectors
,
2509 co
= qemu_coroutine_create(bdrv_is_allocated_co_entry
);
2510 qemu_coroutine_enter(co
, &data
);
2511 while (!data
.done
) {
2517 BlockInfoList
*qmp_query_block(Error
**errp
)
2519 BlockInfoList
*head
= NULL
, *cur_item
= NULL
;
2520 BlockDriverState
*bs
;
2522 QTAILQ_FOREACH(bs
, &bdrv_states
, list
) {
2523 BlockInfoList
*info
= g_malloc0(sizeof(*info
));
2525 info
->value
= g_malloc0(sizeof(*info
->value
));
2526 info
->value
->device
= g_strdup(bs
->device_name
);
2527 info
->value
->type
= g_strdup("unknown");
2528 info
->value
->locked
= bdrv_dev_is_medium_locked(bs
);
2529 info
->value
->removable
= bdrv_dev_has_removable_media(bs
);
2531 if (bdrv_dev_has_removable_media(bs
)) {
2532 info
->value
->has_tray_open
= true;
2533 info
->value
->tray_open
= bdrv_dev_is_tray_open(bs
);
2536 if (bdrv_iostatus_is_enabled(bs
)) {
2537 info
->value
->has_io_status
= true;
2538 info
->value
->io_status
= bs
->iostatus
;
2542 info
->value
->has_inserted
= true;
2543 info
->value
->inserted
= g_malloc0(sizeof(*info
->value
->inserted
));
2544 info
->value
->inserted
->file
= g_strdup(bs
->filename
);
2545 info
->value
->inserted
->ro
= bs
->read_only
;
2546 info
->value
->inserted
->drv
= g_strdup(bs
->drv
->format_name
);
2547 info
->value
->inserted
->encrypted
= bs
->encrypted
;
2548 if (bs
->backing_file
[0]) {
2549 info
->value
->inserted
->has_backing_file
= true;
2550 info
->value
->inserted
->backing_file
= g_strdup(bs
->backing_file
);
2553 if (bs
->io_limits_enabled
) {
2554 info
->value
->inserted
->bps
=
2555 bs
->io_limits
.bps
[BLOCK_IO_LIMIT_TOTAL
];
2556 info
->value
->inserted
->bps_rd
=
2557 bs
->io_limits
.bps
[BLOCK_IO_LIMIT_READ
];
2558 info
->value
->inserted
->bps_wr
=
2559 bs
->io_limits
.bps
[BLOCK_IO_LIMIT_WRITE
];
2560 info
->value
->inserted
->iops
=
2561 bs
->io_limits
.iops
[BLOCK_IO_LIMIT_TOTAL
];
2562 info
->value
->inserted
->iops_rd
=
2563 bs
->io_limits
.iops
[BLOCK_IO_LIMIT_READ
];
2564 info
->value
->inserted
->iops_wr
=
2565 bs
->io_limits
.iops
[BLOCK_IO_LIMIT_WRITE
];
2569 /* XXX: waiting for the qapi to support GSList */
2571 head
= cur_item
= info
;
2573 cur_item
->next
= info
;
2581 /* Consider exposing this as a full fledged QMP command */
2582 static BlockStats
*qmp_query_blockstat(const BlockDriverState
*bs
, Error
**errp
)
2586 s
= g_malloc0(sizeof(*s
));
2588 if (bs
->device_name
[0]) {
2589 s
->has_device
= true;
2590 s
->device
= g_strdup(bs
->device_name
);
2593 s
->stats
= g_malloc0(sizeof(*s
->stats
));
2594 s
->stats
->rd_bytes
= bs
->nr_bytes
[BDRV_ACCT_READ
];
2595 s
->stats
->wr_bytes
= bs
->nr_bytes
[BDRV_ACCT_WRITE
];
2596 s
->stats
->rd_operations
= bs
->nr_ops
[BDRV_ACCT_READ
];
2597 s
->stats
->wr_operations
= bs
->nr_ops
[BDRV_ACCT_WRITE
];
2598 s
->stats
->wr_highest_offset
= bs
->wr_highest_sector
* BDRV_SECTOR_SIZE
;
2599 s
->stats
->flush_operations
= bs
->nr_ops
[BDRV_ACCT_FLUSH
];
2600 s
->stats
->wr_total_time_ns
= bs
->total_time_ns
[BDRV_ACCT_WRITE
];
2601 s
->stats
->rd_total_time_ns
= bs
->total_time_ns
[BDRV_ACCT_READ
];
2602 s
->stats
->flush_total_time_ns
= bs
->total_time_ns
[BDRV_ACCT_FLUSH
];
2605 s
->has_parent
= true;
2606 s
->parent
= qmp_query_blockstat(bs
->file
, NULL
);
2612 BlockStatsList
*qmp_query_blockstats(Error
**errp
)
2614 BlockStatsList
*head
= NULL
, *cur_item
= NULL
;
2615 BlockDriverState
*bs
;
2617 QTAILQ_FOREACH(bs
, &bdrv_states
, list
) {
2618 BlockStatsList
*info
= g_malloc0(sizeof(*info
));
2619 info
->value
= qmp_query_blockstat(bs
, NULL
);
2621 /* XXX: waiting for the qapi to support GSList */
2623 head
= cur_item
= info
;
2625 cur_item
->next
= info
;
2633 const char *bdrv_get_encrypted_filename(BlockDriverState
*bs
)
2635 if (bs
->backing_hd
&& bs
->backing_hd
->encrypted
)
2636 return bs
->backing_file
;
2637 else if (bs
->encrypted
)
2638 return bs
->filename
;
2643 void bdrv_get_backing_filename(BlockDriverState
*bs
,
2644 char *filename
, int filename_size
)
2646 pstrcpy(filename
, filename_size
, bs
->backing_file
);
2649 int bdrv_write_compressed(BlockDriverState
*bs
, int64_t sector_num
,
2650 const uint8_t *buf
, int nb_sectors
)
2652 BlockDriver
*drv
= bs
->drv
;
2655 if (!drv
->bdrv_write_compressed
)
2657 if (bdrv_check_request(bs
, sector_num
, nb_sectors
))
2660 if (bs
->dirty_bitmap
) {
2661 set_dirty_bitmap(bs
, sector_num
, nb_sectors
, 1);
2664 return drv
->bdrv_write_compressed(bs
, sector_num
, buf
, nb_sectors
);
2667 int bdrv_get_info(BlockDriverState
*bs
, BlockDriverInfo
*bdi
)
2669 BlockDriver
*drv
= bs
->drv
;
2672 if (!drv
->bdrv_get_info
)
2674 memset(bdi
, 0, sizeof(*bdi
));
2675 return drv
->bdrv_get_info(bs
, bdi
);
2678 int bdrv_save_vmstate(BlockDriverState
*bs
, const uint8_t *buf
,
2679 int64_t pos
, int size
)
2681 BlockDriver
*drv
= bs
->drv
;
2684 if (drv
->bdrv_save_vmstate
)
2685 return drv
->bdrv_save_vmstate(bs
, buf
, pos
, size
);
2687 return bdrv_save_vmstate(bs
->file
, buf
, pos
, size
);
2691 int bdrv_load_vmstate(BlockDriverState
*bs
, uint8_t *buf
,
2692 int64_t pos
, int size
)
2694 BlockDriver
*drv
= bs
->drv
;
2697 if (drv
->bdrv_load_vmstate
)
2698 return drv
->bdrv_load_vmstate(bs
, buf
, pos
, size
);
2700 return bdrv_load_vmstate(bs
->file
, buf
, pos
, size
);
2704 void bdrv_debug_event(BlockDriverState
*bs
, BlkDebugEvent event
)
2706 BlockDriver
*drv
= bs
->drv
;
2708 if (!drv
|| !drv
->bdrv_debug_event
) {
2712 return drv
->bdrv_debug_event(bs
, event
);
2716 /**************************************************************/
2717 /* handling of snapshots */
2719 int bdrv_can_snapshot(BlockDriverState
*bs
)
2721 BlockDriver
*drv
= bs
->drv
;
2722 if (!drv
|| !bdrv_is_inserted(bs
) || bdrv_is_read_only(bs
)) {
2726 if (!drv
->bdrv_snapshot_create
) {
2727 if (bs
->file
!= NULL
) {
2728 return bdrv_can_snapshot(bs
->file
);
2736 int bdrv_is_snapshot(BlockDriverState
*bs
)
2738 return !!(bs
->open_flags
& BDRV_O_SNAPSHOT
);
2741 BlockDriverState
*bdrv_snapshots(void)
2743 BlockDriverState
*bs
;
2746 return bs_snapshots
;
2750 while ((bs
= bdrv_next(bs
))) {
2751 if (bdrv_can_snapshot(bs
)) {
2759 int bdrv_snapshot_create(BlockDriverState
*bs
,
2760 QEMUSnapshotInfo
*sn_info
)
2762 BlockDriver
*drv
= bs
->drv
;
2765 if (drv
->bdrv_snapshot_create
)
2766 return drv
->bdrv_snapshot_create(bs
, sn_info
);
2768 return bdrv_snapshot_create(bs
->file
, sn_info
);
2772 int bdrv_snapshot_goto(BlockDriverState
*bs
,
2773 const char *snapshot_id
)
2775 BlockDriver
*drv
= bs
->drv
;
2780 if (drv
->bdrv_snapshot_goto
)
2781 return drv
->bdrv_snapshot_goto(bs
, snapshot_id
);
2784 drv
->bdrv_close(bs
);
2785 ret
= bdrv_snapshot_goto(bs
->file
, snapshot_id
);
2786 open_ret
= drv
->bdrv_open(bs
, bs
->open_flags
);
2788 bdrv_delete(bs
->file
);
2798 int bdrv_snapshot_delete(BlockDriverState
*bs
, const char *snapshot_id
)
2800 BlockDriver
*drv
= bs
->drv
;
2803 if (drv
->bdrv_snapshot_delete
)
2804 return drv
->bdrv_snapshot_delete(bs
, snapshot_id
);
2806 return bdrv_snapshot_delete(bs
->file
, snapshot_id
);
2810 int bdrv_snapshot_list(BlockDriverState
*bs
,
2811 QEMUSnapshotInfo
**psn_info
)
2813 BlockDriver
*drv
= bs
->drv
;
2816 if (drv
->bdrv_snapshot_list
)
2817 return drv
->bdrv_snapshot_list(bs
, psn_info
);
2819 return bdrv_snapshot_list(bs
->file
, psn_info
);
2823 int bdrv_snapshot_load_tmp(BlockDriverState
*bs
,
2824 const char *snapshot_name
)
2826 BlockDriver
*drv
= bs
->drv
;
2830 if (!bs
->read_only
) {
2833 if (drv
->bdrv_snapshot_load_tmp
) {
2834 return drv
->bdrv_snapshot_load_tmp(bs
, snapshot_name
);
2839 BlockDriverState
*bdrv_find_backing_image(BlockDriverState
*bs
,
2840 const char *backing_file
)
2846 if (bs
->backing_hd
) {
2847 if (strcmp(bs
->backing_file
, backing_file
) == 0) {
2848 return bs
->backing_hd
;
2850 return bdrv_find_backing_image(bs
->backing_hd
, backing_file
);
2857 #define NB_SUFFIXES 4
2859 char *get_human_readable_size(char *buf
, int buf_size
, int64_t size
)
2861 static const char suffixes
[NB_SUFFIXES
] = "KMGT";
2866 snprintf(buf
, buf_size
, "%" PRId64
, size
);
2869 for(i
= 0; i
< NB_SUFFIXES
; i
++) {
2870 if (size
< (10 * base
)) {
2871 snprintf(buf
, buf_size
, "%0.1f%c",
2872 (double)size
/ base
,
2875 } else if (size
< (1000 * base
) || i
== (NB_SUFFIXES
- 1)) {
2876 snprintf(buf
, buf_size
, "%" PRId64
"%c",
2877 ((size
+ (base
>> 1)) / base
),
2887 char *bdrv_snapshot_dump(char *buf
, int buf_size
, QEMUSnapshotInfo
*sn
)
2889 char buf1
[128], date_buf
[128], clock_buf
[128];
2899 snprintf(buf
, buf_size
,
2900 "%-10s%-20s%7s%20s%15s",
2901 "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
2905 ptm
= localtime(&ti
);
2906 strftime(date_buf
, sizeof(date_buf
),
2907 "%Y-%m-%d %H:%M:%S", ptm
);
2909 localtime_r(&ti
, &tm
);
2910 strftime(date_buf
, sizeof(date_buf
),
2911 "%Y-%m-%d %H:%M:%S", &tm
);
2913 secs
= sn
->vm_clock_nsec
/ 1000000000;
2914 snprintf(clock_buf
, sizeof(clock_buf
),
2915 "%02d:%02d:%02d.%03d",
2917 (int)((secs
/ 60) % 60),
2919 (int)((sn
->vm_clock_nsec
/ 1000000) % 1000));
2920 snprintf(buf
, buf_size
,
2921 "%-10s%-20s%7s%20s%15s",
2922 sn
->id_str
, sn
->name
,
2923 get_human_readable_size(buf1
, sizeof(buf1
), sn
->vm_state_size
),
2930 /**************************************************************/
2933 BlockDriverAIOCB
*bdrv_aio_readv(BlockDriverState
*bs
, int64_t sector_num
,
2934 QEMUIOVector
*qiov
, int nb_sectors
,
2935 BlockDriverCompletionFunc
*cb
, void *opaque
)
2937 trace_bdrv_aio_readv(bs
, sector_num
, nb_sectors
, opaque
);
2939 return bdrv_co_aio_rw_vector(bs
, sector_num
, qiov
, nb_sectors
,
2943 BlockDriverAIOCB
*bdrv_aio_writev(BlockDriverState
*bs
, int64_t sector_num
,
2944 QEMUIOVector
*qiov
, int nb_sectors
,
2945 BlockDriverCompletionFunc
*cb
, void *opaque
)
2947 trace_bdrv_aio_writev(bs
, sector_num
, nb_sectors
, opaque
);
2949 return bdrv_co_aio_rw_vector(bs
, sector_num
, qiov
, nb_sectors
,
2954 typedef struct MultiwriteCB
{
2959 BlockDriverCompletionFunc
*cb
;
2961 QEMUIOVector
*free_qiov
;
2965 static void multiwrite_user_cb(MultiwriteCB
*mcb
)
2969 for (i
= 0; i
< mcb
->num_callbacks
; i
++) {
2970 mcb
->callbacks
[i
].cb(mcb
->callbacks
[i
].opaque
, mcb
->error
);
2971 if (mcb
->callbacks
[i
].free_qiov
) {
2972 qemu_iovec_destroy(mcb
->callbacks
[i
].free_qiov
);
2974 g_free(mcb
->callbacks
[i
].free_qiov
);
2978 static void multiwrite_cb(void *opaque
, int ret
)
2980 MultiwriteCB
*mcb
= opaque
;
2982 trace_multiwrite_cb(mcb
, ret
);
2984 if (ret
< 0 && !mcb
->error
) {
2988 mcb
->num_requests
--;
2989 if (mcb
->num_requests
== 0) {
2990 multiwrite_user_cb(mcb
);
2995 static int multiwrite_req_compare(const void *a
, const void *b
)
2997 const BlockRequest
*req1
= a
, *req2
= b
;
3000 * Note that we can't simply subtract req2->sector from req1->sector
3001 * here as that could overflow the return value.
3003 if (req1
->sector
> req2
->sector
) {
3005 } else if (req1
->sector
< req2
->sector
) {
3013 * Takes a bunch of requests and tries to merge them. Returns the number of
3014 * requests that remain after merging.
3016 static int multiwrite_merge(BlockDriverState
*bs
, BlockRequest
*reqs
,
3017 int num_reqs
, MultiwriteCB
*mcb
)
3021 // Sort requests by start sector
3022 qsort(reqs
, num_reqs
, sizeof(*reqs
), &multiwrite_req_compare
);
3024 // Check if adjacent requests touch the same clusters. If so, combine them,
3025 // filling up gaps with zero sectors.
3027 for (i
= 1; i
< num_reqs
; i
++) {
3029 int64_t oldreq_last
= reqs
[outidx
].sector
+ reqs
[outidx
].nb_sectors
;
3031 // Handle exactly sequential writes and overlapping writes.
3032 if (reqs
[i
].sector
<= oldreq_last
) {
3036 if (reqs
[outidx
].qiov
->niov
+ reqs
[i
].qiov
->niov
+ 1 > IOV_MAX
) {
3042 QEMUIOVector
*qiov
= g_malloc0(sizeof(*qiov
));
3043 qemu_iovec_init(qiov
,
3044 reqs
[outidx
].qiov
->niov
+ reqs
[i
].qiov
->niov
+ 1);
3046 // Add the first request to the merged one. If the requests are
3047 // overlapping, drop the last sectors of the first request.
3048 size
= (reqs
[i
].sector
- reqs
[outidx
].sector
) << 9;
3049 qemu_iovec_concat(qiov
, reqs
[outidx
].qiov
, size
);
3051 // We should need to add any zeros between the two requests
3052 assert (reqs
[i
].sector
<= oldreq_last
);
3054 // Add the second request
3055 qemu_iovec_concat(qiov
, reqs
[i
].qiov
, reqs
[i
].qiov
->size
);
3057 reqs
[outidx
].nb_sectors
= qiov
->size
>> 9;
3058 reqs
[outidx
].qiov
= qiov
;
3060 mcb
->callbacks
[i
].free_qiov
= reqs
[outidx
].qiov
;
3063 reqs
[outidx
].sector
= reqs
[i
].sector
;
3064 reqs
[outidx
].nb_sectors
= reqs
[i
].nb_sectors
;
3065 reqs
[outidx
].qiov
= reqs
[i
].qiov
;
3073 * Submit multiple AIO write requests at once.
3075 * On success, the function returns 0 and all requests in the reqs array have
3076 * been submitted. In error case this function returns -1, and any of the
3077 * requests may or may not be submitted yet. In particular, this means that the
3078 * callback will be called for some of the requests, for others it won't. The
3079 * caller must check the error field of the BlockRequest to wait for the right
3080 * callbacks (if error != 0, no callback will be called).
3082 * The implementation may modify the contents of the reqs array, e.g. to merge
3083 * requests. However, the fields opaque and error are left unmodified as they
3084 * are used to signal failure for a single request to the caller.
3086 int bdrv_aio_multiwrite(BlockDriverState
*bs
, BlockRequest
*reqs
, int num_reqs
)
3091 /* don't submit writes if we don't have a medium */
3092 if (bs
->drv
== NULL
) {
3093 for (i
= 0; i
< num_reqs
; i
++) {
3094 reqs
[i
].error
= -ENOMEDIUM
;
3099 if (num_reqs
== 0) {
3103 // Create MultiwriteCB structure
3104 mcb
= g_malloc0(sizeof(*mcb
) + num_reqs
* sizeof(*mcb
->callbacks
));
3105 mcb
->num_requests
= 0;
3106 mcb
->num_callbacks
= num_reqs
;
3108 for (i
= 0; i
< num_reqs
; i
++) {
3109 mcb
->callbacks
[i
].cb
= reqs
[i
].cb
;
3110 mcb
->callbacks
[i
].opaque
= reqs
[i
].opaque
;
3113 // Check for mergable requests
3114 num_reqs
= multiwrite_merge(bs
, reqs
, num_reqs
, mcb
);
3116 trace_bdrv_aio_multiwrite(mcb
, mcb
->num_callbacks
, num_reqs
);
3118 /* Run the aio requests. */
3119 mcb
->num_requests
= num_reqs
;
3120 for (i
= 0; i
< num_reqs
; i
++) {
3121 bdrv_aio_writev(bs
, reqs
[i
].sector
, reqs
[i
].qiov
,
3122 reqs
[i
].nb_sectors
, multiwrite_cb
, mcb
);
3128 void bdrv_aio_cancel(BlockDriverAIOCB
*acb
)
3130 acb
->pool
->cancel(acb
);
3133 /* block I/O throttling */
3134 static bool bdrv_exceed_bps_limits(BlockDriverState
*bs
, int nb_sectors
,
3135 bool is_write
, double elapsed_time
, uint64_t *wait
)
3137 uint64_t bps_limit
= 0;
3138 double bytes_limit
, bytes_base
, bytes_res
;
3139 double slice_time
, wait_time
;
3141 if (bs
->io_limits
.bps
[BLOCK_IO_LIMIT_TOTAL
]) {
3142 bps_limit
= bs
->io_limits
.bps
[BLOCK_IO_LIMIT_TOTAL
];
3143 } else if (bs
->io_limits
.bps
[is_write
]) {
3144 bps_limit
= bs
->io_limits
.bps
[is_write
];
3153 slice_time
= bs
->slice_end
- bs
->slice_start
;
3154 slice_time
/= (NANOSECONDS_PER_SECOND
);
3155 bytes_limit
= bps_limit
* slice_time
;
3156 bytes_base
= bs
->nr_bytes
[is_write
] - bs
->io_base
.bytes
[is_write
];
3157 if (bs
->io_limits
.bps
[BLOCK_IO_LIMIT_TOTAL
]) {
3158 bytes_base
+= bs
->nr_bytes
[!is_write
] - bs
->io_base
.bytes
[!is_write
];
3161 /* bytes_base: the bytes of data which have been read/written; and
3162 * it is obtained from the history statistic info.
3163 * bytes_res: the remaining bytes of data which need to be read/written.
3164 * (bytes_base + bytes_res) / bps_limit: used to calcuate
3165 * the total time for completing reading/writting all data.
3167 bytes_res
= (unsigned) nb_sectors
* BDRV_SECTOR_SIZE
;
3169 if (bytes_base
+ bytes_res
<= bytes_limit
) {
3177 /* Calc approx time to dispatch */
3178 wait_time
= (bytes_base
+ bytes_res
) / bps_limit
- elapsed_time
;
3180 /* When the I/O rate at runtime exceeds the limits,
3181 * bs->slice_end need to be extended in order that the current statistic
3182 * info can be kept until the timer fire, so it is increased and tuned
3183 * based on the result of experiment.
3185 bs
->slice_time
= wait_time
* BLOCK_IO_SLICE_TIME
* 10;
3186 bs
->slice_end
+= bs
->slice_time
- 3 * BLOCK_IO_SLICE_TIME
;
3188 *wait
= wait_time
* BLOCK_IO_SLICE_TIME
* 10;
3194 static bool bdrv_exceed_iops_limits(BlockDriverState
*bs
, bool is_write
,
3195 double elapsed_time
, uint64_t *wait
)
3197 uint64_t iops_limit
= 0;
3198 double ios_limit
, ios_base
;
3199 double slice_time
, wait_time
;
3201 if (bs
->io_limits
.iops
[BLOCK_IO_LIMIT_TOTAL
]) {
3202 iops_limit
= bs
->io_limits
.iops
[BLOCK_IO_LIMIT_TOTAL
];
3203 } else if (bs
->io_limits
.iops
[is_write
]) {
3204 iops_limit
= bs
->io_limits
.iops
[is_write
];
3213 slice_time
= bs
->slice_end
- bs
->slice_start
;
3214 slice_time
/= (NANOSECONDS_PER_SECOND
);
3215 ios_limit
= iops_limit
* slice_time
;
3216 ios_base
= bs
->nr_ops
[is_write
] - bs
->io_base
.ios
[is_write
];
3217 if (bs
->io_limits
.iops
[BLOCK_IO_LIMIT_TOTAL
]) {
3218 ios_base
+= bs
->nr_ops
[!is_write
] - bs
->io_base
.ios
[!is_write
];
3221 if (ios_base
+ 1 <= ios_limit
) {
3229 /* Calc approx time to dispatch */
3230 wait_time
= (ios_base
+ 1) / iops_limit
;
3231 if (wait_time
> elapsed_time
) {
3232 wait_time
= wait_time
- elapsed_time
;
3237 bs
->slice_time
= wait_time
* BLOCK_IO_SLICE_TIME
* 10;
3238 bs
->slice_end
+= bs
->slice_time
- 3 * BLOCK_IO_SLICE_TIME
;
3240 *wait
= wait_time
* BLOCK_IO_SLICE_TIME
* 10;
3246 static bool bdrv_exceed_io_limits(BlockDriverState
*bs
, int nb_sectors
,
3247 bool is_write
, int64_t *wait
)
3249 int64_t now
, max_wait
;
3250 uint64_t bps_wait
= 0, iops_wait
= 0;
3251 double elapsed_time
;
3252 int bps_ret
, iops_ret
;
3254 now
= qemu_get_clock_ns(vm_clock
);
3255 if ((bs
->slice_start
< now
)
3256 && (bs
->slice_end
> now
)) {
3257 bs
->slice_end
= now
+ bs
->slice_time
;
3259 bs
->slice_time
= 5 * BLOCK_IO_SLICE_TIME
;
3260 bs
->slice_start
= now
;
3261 bs
->slice_end
= now
+ bs
->slice_time
;
3263 bs
->io_base
.bytes
[is_write
] = bs
->nr_bytes
[is_write
];
3264 bs
->io_base
.bytes
[!is_write
] = bs
->nr_bytes
[!is_write
];
3266 bs
->io_base
.ios
[is_write
] = bs
->nr_ops
[is_write
];
3267 bs
->io_base
.ios
[!is_write
] = bs
->nr_ops
[!is_write
];
3270 elapsed_time
= now
- bs
->slice_start
;
3271 elapsed_time
/= (NANOSECONDS_PER_SECOND
);
3273 bps_ret
= bdrv_exceed_bps_limits(bs
, nb_sectors
,
3274 is_write
, elapsed_time
, &bps_wait
);
3275 iops_ret
= bdrv_exceed_iops_limits(bs
, is_write
,
3276 elapsed_time
, &iops_wait
);
3277 if (bps_ret
|| iops_ret
) {
3278 max_wait
= bps_wait
> iops_wait
? bps_wait
: iops_wait
;
3283 now
= qemu_get_clock_ns(vm_clock
);
3284 if (bs
->slice_end
< now
+ max_wait
) {
3285 bs
->slice_end
= now
+ max_wait
;
3298 /**************************************************************/
3299 /* async block device emulation */
3301 typedef struct BlockDriverAIOCBSync
{
3302 BlockDriverAIOCB common
;
3305 /* vector translation state */
3309 } BlockDriverAIOCBSync
;
3311 static void bdrv_aio_cancel_em(BlockDriverAIOCB
*blockacb
)
3313 BlockDriverAIOCBSync
*acb
=
3314 container_of(blockacb
, BlockDriverAIOCBSync
, common
);
3315 qemu_bh_delete(acb
->bh
);
3317 qemu_aio_release(acb
);
3320 static AIOPool bdrv_em_aio_pool
= {
3321 .aiocb_size
= sizeof(BlockDriverAIOCBSync
),
3322 .cancel
= bdrv_aio_cancel_em
,
3325 static void bdrv_aio_bh_cb(void *opaque
)
3327 BlockDriverAIOCBSync
*acb
= opaque
;
3330 qemu_iovec_from_buffer(acb
->qiov
, acb
->bounce
, acb
->qiov
->size
);
3331 qemu_vfree(acb
->bounce
);
3332 acb
->common
.cb(acb
->common
.opaque
, acb
->ret
);
3333 qemu_bh_delete(acb
->bh
);
3335 qemu_aio_release(acb
);
3338 static BlockDriverAIOCB
*bdrv_aio_rw_vector(BlockDriverState
*bs
,
3342 BlockDriverCompletionFunc
*cb
,
3347 BlockDriverAIOCBSync
*acb
;
3349 acb
= qemu_aio_get(&bdrv_em_aio_pool
, bs
, cb
, opaque
);
3350 acb
->is_write
= is_write
;
3352 acb
->bounce
= qemu_blockalign(bs
, qiov
->size
);
3353 acb
->bh
= qemu_bh_new(bdrv_aio_bh_cb
, acb
);
3356 qemu_iovec_to_buffer(acb
->qiov
, acb
->bounce
);
3357 acb
->ret
= bs
->drv
->bdrv_write(bs
, sector_num
, acb
->bounce
, nb_sectors
);
3359 acb
->ret
= bs
->drv
->bdrv_read(bs
, sector_num
, acb
->bounce
, nb_sectors
);
3362 qemu_bh_schedule(acb
->bh
);
3364 return &acb
->common
;
3367 static BlockDriverAIOCB
*bdrv_aio_readv_em(BlockDriverState
*bs
,
3368 int64_t sector_num
, QEMUIOVector
*qiov
, int nb_sectors
,
3369 BlockDriverCompletionFunc
*cb
, void *opaque
)
3371 return bdrv_aio_rw_vector(bs
, sector_num
, qiov
, nb_sectors
, cb
, opaque
, 0);
3374 static BlockDriverAIOCB
*bdrv_aio_writev_em(BlockDriverState
*bs
,
3375 int64_t sector_num
, QEMUIOVector
*qiov
, int nb_sectors
,
3376 BlockDriverCompletionFunc
*cb
, void *opaque
)
3378 return bdrv_aio_rw_vector(bs
, sector_num
, qiov
, nb_sectors
, cb
, opaque
, 1);
3382 typedef struct BlockDriverAIOCBCoroutine
{
3383 BlockDriverAIOCB common
;
3387 } BlockDriverAIOCBCoroutine
;
3389 static void bdrv_aio_co_cancel_em(BlockDriverAIOCB
*blockacb
)
3394 static AIOPool bdrv_em_co_aio_pool
= {
3395 .aiocb_size
= sizeof(BlockDriverAIOCBCoroutine
),
3396 .cancel
= bdrv_aio_co_cancel_em
,
3399 static void bdrv_co_em_bh(void *opaque
)
3401 BlockDriverAIOCBCoroutine
*acb
= opaque
;
3403 acb
->common
.cb(acb
->common
.opaque
, acb
->req
.error
);
3404 qemu_bh_delete(acb
->bh
);
3405 qemu_aio_release(acb
);
3408 /* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
3409 static void coroutine_fn
bdrv_co_do_rw(void *opaque
)
3411 BlockDriverAIOCBCoroutine
*acb
= opaque
;
3412 BlockDriverState
*bs
= acb
->common
.bs
;
3414 if (!acb
->is_write
) {
3415 acb
->req
.error
= bdrv_co_do_readv(bs
, acb
->req
.sector
,
3416 acb
->req
.nb_sectors
, acb
->req
.qiov
, 0);
3418 acb
->req
.error
= bdrv_co_do_writev(bs
, acb
->req
.sector
,
3419 acb
->req
.nb_sectors
, acb
->req
.qiov
, 0);
3422 acb
->bh
= qemu_bh_new(bdrv_co_em_bh
, acb
);
3423 qemu_bh_schedule(acb
->bh
);
3426 static BlockDriverAIOCB
*bdrv_co_aio_rw_vector(BlockDriverState
*bs
,
3430 BlockDriverCompletionFunc
*cb
,
3435 BlockDriverAIOCBCoroutine
*acb
;
3437 acb
= qemu_aio_get(&bdrv_em_co_aio_pool
, bs
, cb
, opaque
);
3438 acb
->req
.sector
= sector_num
;
3439 acb
->req
.nb_sectors
= nb_sectors
;
3440 acb
->req
.qiov
= qiov
;
3441 acb
->is_write
= is_write
;
3443 co
= qemu_coroutine_create(bdrv_co_do_rw
);
3444 qemu_coroutine_enter(co
, acb
);
3446 return &acb
->common
;
3449 static void coroutine_fn
bdrv_aio_flush_co_entry(void *opaque
)
3451 BlockDriverAIOCBCoroutine
*acb
= opaque
;
3452 BlockDriverState
*bs
= acb
->common
.bs
;
3454 acb
->req
.error
= bdrv_co_flush(bs
);
3455 acb
->bh
= qemu_bh_new(bdrv_co_em_bh
, acb
);
3456 qemu_bh_schedule(acb
->bh
);
3459 BlockDriverAIOCB
*bdrv_aio_flush(BlockDriverState
*bs
,
3460 BlockDriverCompletionFunc
*cb
, void *opaque
)
3462 trace_bdrv_aio_flush(bs
, opaque
);
3465 BlockDriverAIOCBCoroutine
*acb
;
3467 acb
= qemu_aio_get(&bdrv_em_co_aio_pool
, bs
, cb
, opaque
);
3468 co
= qemu_coroutine_create(bdrv_aio_flush_co_entry
);
3469 qemu_coroutine_enter(co
, acb
);
3471 return &acb
->common
;
3474 static void coroutine_fn
bdrv_aio_discard_co_entry(void *opaque
)
3476 BlockDriverAIOCBCoroutine
*acb
= opaque
;
3477 BlockDriverState
*bs
= acb
->common
.bs
;
3479 acb
->req
.error
= bdrv_co_discard(bs
, acb
->req
.sector
, acb
->req
.nb_sectors
);
3480 acb
->bh
= qemu_bh_new(bdrv_co_em_bh
, acb
);
3481 qemu_bh_schedule(acb
->bh
);
3484 BlockDriverAIOCB
*bdrv_aio_discard(BlockDriverState
*bs
,
3485 int64_t sector_num
, int nb_sectors
,
3486 BlockDriverCompletionFunc
*cb
, void *opaque
)
3489 BlockDriverAIOCBCoroutine
*acb
;
3491 trace_bdrv_aio_discard(bs
, sector_num
, nb_sectors
, opaque
);
3493 acb
= qemu_aio_get(&bdrv_em_co_aio_pool
, bs
, cb
, opaque
);
3494 acb
->req
.sector
= sector_num
;
3495 acb
->req
.nb_sectors
= nb_sectors
;
3496 co
= qemu_coroutine_create(bdrv_aio_discard_co_entry
);
3497 qemu_coroutine_enter(co
, acb
);
3499 return &acb
->common
;
3502 void bdrv_init(void)
3504 module_call_init(MODULE_INIT_BLOCK
);
3507 void bdrv_init_with_whitelist(void)
3509 use_bdrv_whitelist
= 1;
3513 void *qemu_aio_get(AIOPool
*pool
, BlockDriverState
*bs
,
3514 BlockDriverCompletionFunc
*cb
, void *opaque
)
3516 BlockDriverAIOCB
*acb
;
3518 if (pool
->free_aiocb
) {
3519 acb
= pool
->free_aiocb
;
3520 pool
->free_aiocb
= acb
->next
;
3522 acb
= g_malloc0(pool
->aiocb_size
);
3527 acb
->opaque
= opaque
;
3531 void qemu_aio_release(void *p
)
3533 BlockDriverAIOCB
*acb
= (BlockDriverAIOCB
*)p
;
3534 AIOPool
*pool
= acb
->pool
;
3535 acb
->next
= pool
->free_aiocb
;
3536 pool
->free_aiocb
= acb
;
3539 /**************************************************************/
3540 /* Coroutine block device emulation */
3542 typedef struct CoroutineIOCompletion
{
3543 Coroutine
*coroutine
;
3545 } CoroutineIOCompletion
;
3547 static void bdrv_co_io_em_complete(void *opaque
, int ret
)
3549 CoroutineIOCompletion
*co
= opaque
;
3552 qemu_coroutine_enter(co
->coroutine
, NULL
);
3555 static int coroutine_fn
bdrv_co_io_em(BlockDriverState
*bs
, int64_t sector_num
,
3556 int nb_sectors
, QEMUIOVector
*iov
,
3559 CoroutineIOCompletion co
= {
3560 .coroutine
= qemu_coroutine_self(),
3562 BlockDriverAIOCB
*acb
;
3565 acb
= bs
->drv
->bdrv_aio_writev(bs
, sector_num
, iov
, nb_sectors
,
3566 bdrv_co_io_em_complete
, &co
);
3568 acb
= bs
->drv
->bdrv_aio_readv(bs
, sector_num
, iov
, nb_sectors
,
3569 bdrv_co_io_em_complete
, &co
);
3572 trace_bdrv_co_io_em(bs
, sector_num
, nb_sectors
, is_write
, acb
);
3576 qemu_coroutine_yield();
3581 static int coroutine_fn
bdrv_co_readv_em(BlockDriverState
*bs
,
3582 int64_t sector_num
, int nb_sectors
,
3585 return bdrv_co_io_em(bs
, sector_num
, nb_sectors
, iov
, false);
3588 static int coroutine_fn
bdrv_co_writev_em(BlockDriverState
*bs
,
3589 int64_t sector_num
, int nb_sectors
,
3592 return bdrv_co_io_em(bs
, sector_num
, nb_sectors
, iov
, true);
3595 static void coroutine_fn
bdrv_flush_co_entry(void *opaque
)
3597 RwCo
*rwco
= opaque
;
3599 rwco
->ret
= bdrv_co_flush(rwco
->bs
);
3602 int coroutine_fn
bdrv_co_flush(BlockDriverState
*bs
)
3606 if (!bs
|| !bdrv_is_inserted(bs
) || bdrv_is_read_only(bs
)) {
3610 /* Write back cached data to the OS even with cache=unsafe */
3611 if (bs
->drv
->bdrv_co_flush_to_os
) {
3612 ret
= bs
->drv
->bdrv_co_flush_to_os(bs
);
3618 /* But don't actually force it to the disk with cache=unsafe */
3619 if (bs
->open_flags
& BDRV_O_NO_FLUSH
) {
3623 if (bs
->drv
->bdrv_co_flush_to_disk
) {
3624 ret
= bs
->drv
->bdrv_co_flush_to_disk(bs
);
3625 } else if (bs
->drv
->bdrv_aio_flush
) {
3626 BlockDriverAIOCB
*acb
;
3627 CoroutineIOCompletion co
= {
3628 .coroutine
= qemu_coroutine_self(),
3631 acb
= bs
->drv
->bdrv_aio_flush(bs
, bdrv_co_io_em_complete
, &co
);
3635 qemu_coroutine_yield();
3640 * Some block drivers always operate in either writethrough or unsafe
3641 * mode and don't support bdrv_flush therefore. Usually qemu doesn't
3642 * know how the server works (because the behaviour is hardcoded or
3643 * depends on server-side configuration), so we can't ensure that
3644 * everything is safe on disk. Returning an error doesn't work because
3645 * that would break guests even if the server operates in writethrough
3648 * Let's hope the user knows what he's doing.
3656 /* Now flush the underlying protocol. It will also have BDRV_O_NO_FLUSH
3657 * in the case of cache=unsafe, so there are no useless flushes.
3659 return bdrv_co_flush(bs
->file
);
3662 void bdrv_invalidate_cache(BlockDriverState
*bs
)
3664 if (bs
->drv
&& bs
->drv
->bdrv_invalidate_cache
) {
3665 bs
->drv
->bdrv_invalidate_cache(bs
);
3669 void bdrv_invalidate_cache_all(void)
3671 BlockDriverState
*bs
;
3673 QTAILQ_FOREACH(bs
, &bdrv_states
, list
) {
3674 bdrv_invalidate_cache(bs
);
3678 void bdrv_clear_incoming_migration_all(void)
3680 BlockDriverState
*bs
;
3682 QTAILQ_FOREACH(bs
, &bdrv_states
, list
) {
3683 bs
->open_flags
= bs
->open_flags
& ~(BDRV_O_INCOMING
);
3687 int bdrv_flush(BlockDriverState
*bs
)
3695 if (qemu_in_coroutine()) {
3696 /* Fast-path if already in coroutine context */
3697 bdrv_flush_co_entry(&rwco
);
3699 co
= qemu_coroutine_create(bdrv_flush_co_entry
);
3700 qemu_coroutine_enter(co
, &rwco
);
3701 while (rwco
.ret
== NOT_DONE
) {
3709 static void coroutine_fn
bdrv_discard_co_entry(void *opaque
)
3711 RwCo
*rwco
= opaque
;
3713 rwco
->ret
= bdrv_co_discard(rwco
->bs
, rwco
->sector_num
, rwco
->nb_sectors
);
3716 int coroutine_fn
bdrv_co_discard(BlockDriverState
*bs
, int64_t sector_num
,
3721 } else if (bdrv_check_request(bs
, sector_num
, nb_sectors
)) {
3723 } else if (bs
->read_only
) {
3725 } else if (bs
->drv
->bdrv_co_discard
) {
3726 return bs
->drv
->bdrv_co_discard(bs
, sector_num
, nb_sectors
);
3727 } else if (bs
->drv
->bdrv_aio_discard
) {
3728 BlockDriverAIOCB
*acb
;
3729 CoroutineIOCompletion co
= {
3730 .coroutine
= qemu_coroutine_self(),
3733 acb
= bs
->drv
->bdrv_aio_discard(bs
, sector_num
, nb_sectors
,
3734 bdrv_co_io_em_complete
, &co
);
3738 qemu_coroutine_yield();
3746 int bdrv_discard(BlockDriverState
*bs
, int64_t sector_num
, int nb_sectors
)
3751 .sector_num
= sector_num
,
3752 .nb_sectors
= nb_sectors
,
3756 if (qemu_in_coroutine()) {
3757 /* Fast-path if already in coroutine context */
3758 bdrv_discard_co_entry(&rwco
);
3760 co
= qemu_coroutine_create(bdrv_discard_co_entry
);
3761 qemu_coroutine_enter(co
, &rwco
);
3762 while (rwco
.ret
== NOT_DONE
) {
3770 /**************************************************************/
3771 /* removable device support */
3774 * Return TRUE if the media is present
3776 int bdrv_is_inserted(BlockDriverState
*bs
)
3778 BlockDriver
*drv
= bs
->drv
;
3782 if (!drv
->bdrv_is_inserted
)
3784 return drv
->bdrv_is_inserted(bs
);
3788 * Return whether the media changed since the last call to this
3789 * function, or -ENOTSUP if we don't know. Most drivers don't know.
3791 int bdrv_media_changed(BlockDriverState
*bs
)
3793 BlockDriver
*drv
= bs
->drv
;
3795 if (drv
&& drv
->bdrv_media_changed
) {
3796 return drv
->bdrv_media_changed(bs
);
3802 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3804 void bdrv_eject(BlockDriverState
*bs
, bool eject_flag
)
3806 BlockDriver
*drv
= bs
->drv
;
3808 if (drv
&& drv
->bdrv_eject
) {
3809 drv
->bdrv_eject(bs
, eject_flag
);
3812 if (bs
->device_name
[0] != '\0') {
3813 bdrv_emit_qmp_eject_event(bs
, eject_flag
);
3818 * Lock or unlock the media (if it is locked, the user won't be able
3819 * to eject it manually).
3821 void bdrv_lock_medium(BlockDriverState
*bs
, bool locked
)
3823 BlockDriver
*drv
= bs
->drv
;
3825 trace_bdrv_lock_medium(bs
, locked
);
3827 if (drv
&& drv
->bdrv_lock_medium
) {
3828 drv
->bdrv_lock_medium(bs
, locked
);
3832 /* needed for generic scsi interface */
3834 int bdrv_ioctl(BlockDriverState
*bs
, unsigned long int req
, void *buf
)
3836 BlockDriver
*drv
= bs
->drv
;
3838 if (drv
&& drv
->bdrv_ioctl
)
3839 return drv
->bdrv_ioctl(bs
, req
, buf
);
3843 BlockDriverAIOCB
*bdrv_aio_ioctl(BlockDriverState
*bs
,
3844 unsigned long int req
, void *buf
,
3845 BlockDriverCompletionFunc
*cb
, void *opaque
)
3847 BlockDriver
*drv
= bs
->drv
;
3849 if (drv
&& drv
->bdrv_aio_ioctl
)
3850 return drv
->bdrv_aio_ioctl(bs
, req
, buf
, cb
, opaque
);
3854 void bdrv_set_buffer_alignment(BlockDriverState
*bs
, int align
)
3856 bs
->buffer_alignment
= align
;
3859 void *qemu_blockalign(BlockDriverState
*bs
, size_t size
)
3861 return qemu_memalign((bs
&& bs
->buffer_alignment
) ? bs
->buffer_alignment
: 512, size
);
3864 void bdrv_set_dirty_tracking(BlockDriverState
*bs
, int enable
)
3866 int64_t bitmap_size
;
3868 bs
->dirty_count
= 0;
3870 if (!bs
->dirty_bitmap
) {
3871 bitmap_size
= (bdrv_getlength(bs
) >> BDRV_SECTOR_BITS
) +
3872 BDRV_SECTORS_PER_DIRTY_CHUNK
* 8 - 1;
3873 bitmap_size
/= BDRV_SECTORS_PER_DIRTY_CHUNK
* 8;
3875 bs
->dirty_bitmap
= g_malloc0(bitmap_size
);
3878 if (bs
->dirty_bitmap
) {
3879 g_free(bs
->dirty_bitmap
);
3880 bs
->dirty_bitmap
= NULL
;
3885 int bdrv_get_dirty(BlockDriverState
*bs
, int64_t sector
)
3887 int64_t chunk
= sector
/ (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK
;
3889 if (bs
->dirty_bitmap
&&
3890 (sector
<< BDRV_SECTOR_BITS
) < bdrv_getlength(bs
)) {
3891 return !!(bs
->dirty_bitmap
[chunk
/ (sizeof(unsigned long) * 8)] &
3892 (1UL << (chunk
% (sizeof(unsigned long) * 8))));
3898 void bdrv_reset_dirty(BlockDriverState
*bs
, int64_t cur_sector
,
3901 set_dirty_bitmap(bs
, cur_sector
, nr_sectors
, 0);
3904 int64_t bdrv_get_dirty_count(BlockDriverState
*bs
)
3906 return bs
->dirty_count
;
3909 void bdrv_set_in_use(BlockDriverState
*bs
, int in_use
)
3911 assert(bs
->in_use
!= in_use
);
3912 bs
->in_use
= in_use
;
3915 int bdrv_in_use(BlockDriverState
*bs
)
3920 void bdrv_iostatus_enable(BlockDriverState
*bs
)
3922 bs
->iostatus_enabled
= true;
3923 bs
->iostatus
= BLOCK_DEVICE_IO_STATUS_OK
;
3926 /* The I/O status is only enabled if the drive explicitly
3927 * enables it _and_ the VM is configured to stop on errors */
3928 bool bdrv_iostatus_is_enabled(const BlockDriverState
*bs
)
3930 return (bs
->iostatus_enabled
&&
3931 (bs
->on_write_error
== BLOCK_ERR_STOP_ENOSPC
||
3932 bs
->on_write_error
== BLOCK_ERR_STOP_ANY
||
3933 bs
->on_read_error
== BLOCK_ERR_STOP_ANY
));
3936 void bdrv_iostatus_disable(BlockDriverState
*bs
)
3938 bs
->iostatus_enabled
= false;
3941 void bdrv_iostatus_reset(BlockDriverState
*bs
)
3943 if (bdrv_iostatus_is_enabled(bs
)) {
3944 bs
->iostatus
= BLOCK_DEVICE_IO_STATUS_OK
;
3948 /* XXX: Today this is set by device models because it makes the implementation
3949 quite simple. However, the block layer knows about the error, so it's
3950 possible to implement this without device models being involved */
3951 void bdrv_iostatus_set_err(BlockDriverState
*bs
, int error
)
3953 if (bdrv_iostatus_is_enabled(bs
) &&
3954 bs
->iostatus
== BLOCK_DEVICE_IO_STATUS_OK
) {
3956 bs
->iostatus
= error
== ENOSPC
? BLOCK_DEVICE_IO_STATUS_NOSPACE
:
3957 BLOCK_DEVICE_IO_STATUS_FAILED
;
3962 bdrv_acct_start(BlockDriverState
*bs
, BlockAcctCookie
*cookie
, int64_t bytes
,
3963 enum BlockAcctType type
)
3965 assert(type
< BDRV_MAX_IOTYPE
);
3967 cookie
->bytes
= bytes
;
3968 cookie
->start_time_ns
= get_clock();
3969 cookie
->type
= type
;
3973 bdrv_acct_done(BlockDriverState
*bs
, BlockAcctCookie
*cookie
)
3975 assert(cookie
->type
< BDRV_MAX_IOTYPE
);
3977 bs
->nr_bytes
[cookie
->type
] += cookie
->bytes
;
3978 bs
->nr_ops
[cookie
->type
]++;
3979 bs
->total_time_ns
[cookie
->type
] += get_clock() - cookie
->start_time_ns
;
3982 int bdrv_img_create(const char *filename
, const char *fmt
,
3983 const char *base_filename
, const char *base_fmt
,
3984 char *options
, uint64_t img_size
, int flags
)
3986 QEMUOptionParameter
*param
= NULL
, *create_options
= NULL
;
3987 QEMUOptionParameter
*backing_fmt
, *backing_file
, *size
;
3988 BlockDriverState
*bs
= NULL
;
3989 BlockDriver
*drv
, *proto_drv
;
3990 BlockDriver
*backing_drv
= NULL
;
3993 /* Find driver and parse its options */
3994 drv
= bdrv_find_format(fmt
);
3996 error_report("Unknown file format '%s'", fmt
);
4001 proto_drv
= bdrv_find_protocol(filename
);
4003 error_report("Unknown protocol '%s'", filename
);
4008 create_options
= append_option_parameters(create_options
,
4009 drv
->create_options
);
4010 create_options
= append_option_parameters(create_options
,
4011 proto_drv
->create_options
);
4013 /* Create parameter list with default values */
4014 param
= parse_option_parameters("", create_options
, param
);
4016 set_option_parameter_int(param
, BLOCK_OPT_SIZE
, img_size
);
4018 /* Parse -o options */
4020 param
= parse_option_parameters(options
, create_options
, param
);
4021 if (param
== NULL
) {
4022 error_report("Invalid options for file format '%s'.", fmt
);
4028 if (base_filename
) {
4029 if (set_option_parameter(param
, BLOCK_OPT_BACKING_FILE
,
4031 error_report("Backing file not supported for file format '%s'",
4039 if (set_option_parameter(param
, BLOCK_OPT_BACKING_FMT
, base_fmt
)) {
4040 error_report("Backing file format not supported for file "
4041 "format '%s'", fmt
);
4047 backing_file
= get_option_parameter(param
, BLOCK_OPT_BACKING_FILE
);
4048 if (backing_file
&& backing_file
->value
.s
) {
4049 if (!strcmp(filename
, backing_file
->value
.s
)) {
4050 error_report("Error: Trying to create an image with the "
4051 "same filename as the backing file");
4057 backing_fmt
= get_option_parameter(param
, BLOCK_OPT_BACKING_FMT
);
4058 if (backing_fmt
&& backing_fmt
->value
.s
) {
4059 backing_drv
= bdrv_find_format(backing_fmt
->value
.s
);
4061 error_report("Unknown backing file format '%s'",
4062 backing_fmt
->value
.s
);
4068 // The size for the image must always be specified, with one exception:
4069 // If we are using a backing file, we can obtain the size from there
4070 size
= get_option_parameter(param
, BLOCK_OPT_SIZE
);
4071 if (size
&& size
->value
.n
== -1) {
4072 if (backing_file
&& backing_file
->value
.s
) {
4078 ret
= bdrv_open(bs
, backing_file
->value
.s
, flags
, backing_drv
);
4080 error_report("Could not open '%s'", backing_file
->value
.s
);
4083 bdrv_get_geometry(bs
, &size
);
4086 snprintf(buf
, sizeof(buf
), "%" PRId64
, size
);
4087 set_option_parameter(param
, BLOCK_OPT_SIZE
, buf
);
4089 error_report("Image creation needs a size parameter");
4095 printf("Formatting '%s', fmt=%s ", filename
, fmt
);
4096 print_option_parameters(param
);
4099 ret
= bdrv_create(drv
, filename
, param
);
4102 if (ret
== -ENOTSUP
) {
4103 error_report("Formatting or formatting option not supported for "
4104 "file format '%s'", fmt
);
4105 } else if (ret
== -EFBIG
) {
4106 error_report("The image size is too large for file format '%s'",
4109 error_report("%s: error while creating %s: %s", filename
, fmt
,
4115 free_option_parameters(create_options
);
4116 free_option_parameters(param
);
4125 void *block_job_create(const BlockJobType
*job_type
, BlockDriverState
*bs
,
4126 int64_t speed
, BlockDriverCompletionFunc
*cb
,
4127 void *opaque
, Error
**errp
)
4131 if (bs
->job
|| bdrv_in_use(bs
)) {
4132 error_set(errp
, QERR_DEVICE_IN_USE
, bdrv_get_device_name(bs
));
4135 bdrv_set_in_use(bs
, 1);
4137 job
= g_malloc0(job_type
->instance_size
);
4138 job
->job_type
= job_type
;
4141 job
->opaque
= opaque
;
4144 /* Only set speed when necessary to avoid NotSupported error */
4146 Error
*local_err
= NULL
;
4148 block_job_set_speed(job
, speed
, &local_err
);
4149 if (error_is_set(&local_err
)) {
4152 bdrv_set_in_use(bs
, 0);
4153 error_propagate(errp
, local_err
);
4160 void block_job_complete(BlockJob
*job
, int ret
)
4162 BlockDriverState
*bs
= job
->bs
;
4164 assert(bs
->job
== job
);
4165 job
->cb(job
->opaque
, ret
);
4168 bdrv_set_in_use(bs
, 0);
4171 void block_job_set_speed(BlockJob
*job
, int64_t speed
, Error
**errp
)
4173 Error
*local_err
= NULL
;
4175 if (!job
->job_type
->set_speed
) {
4176 error_set(errp
, QERR_NOT_SUPPORTED
);
4179 job
->job_type
->set_speed(job
, speed
, &local_err
);
4180 if (error_is_set(&local_err
)) {
4181 error_propagate(errp
, local_err
);
4188 void block_job_cancel(BlockJob
*job
)
4190 job
->cancelled
= true;
4193 bool block_job_is_cancelled(BlockJob
*job
)
4195 return job
->cancelled
;
4198 void block_job_cancel_sync(BlockJob
*job
)
4200 BlockDriverState
*bs
= job
->bs
;
4202 assert(bs
->job
== job
);
4203 block_job_cancel(job
);
4204 while (bs
->job
!= NULL
&& bs
->job
->busy
) {