block: Eliminate BlockDriverState member device_name[]
[qemu/kevin.git] / block.c
blob5c37970a4a076774abb90924b8f61b063b1fa76b
1 /*
2 * QEMU System Emulator block driver
4 * Copyright (c) 2003 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
24 #include "config-host.h"
25 #include "qemu-common.h"
26 #include "trace.h"
27 #include "block/block_int.h"
28 #include "block/blockjob.h"
29 #include "qemu/module.h"
30 #include "qapi/qmp/qjson.h"
31 #include "sysemu/block-backend.h"
32 #include "sysemu/sysemu.h"
33 #include "qemu/notify.h"
34 #include "block/coroutine.h"
35 #include "block/qapi.h"
36 #include "qmp-commands.h"
37 #include "qemu/timer.h"
38 #include "qapi-event.h"
40 #ifdef CONFIG_BSD
41 #include <sys/types.h>
42 #include <sys/stat.h>
43 #include <sys/ioctl.h>
44 #include <sys/queue.h>
45 #ifndef __DragonFly__
46 #include <sys/disk.h>
47 #endif
48 #endif
50 #ifdef _WIN32
51 #include <windows.h>
52 #endif
54 struct BdrvDirtyBitmap {
55 HBitmap *bitmap;
56 QLIST_ENTRY(BdrvDirtyBitmap) list;
59 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
61 #define COROUTINE_POOL_RESERVATION 64 /* number of coroutines to reserve */
63 static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
64 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
65 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
66 BlockDriverCompletionFunc *cb, void *opaque);
67 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
68 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
69 BlockDriverCompletionFunc *cb, void *opaque);
70 static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
71 int64_t sector_num, int nb_sectors,
72 QEMUIOVector *iov);
73 static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
74 int64_t sector_num, int nb_sectors,
75 QEMUIOVector *iov);
76 static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
77 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
78 BdrvRequestFlags flags);
79 static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
80 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
81 BdrvRequestFlags flags);
82 static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
83 int64_t sector_num,
84 QEMUIOVector *qiov,
85 int nb_sectors,
86 BdrvRequestFlags flags,
87 BlockDriverCompletionFunc *cb,
88 void *opaque,
89 bool is_write);
90 static void coroutine_fn bdrv_co_do_rw(void *opaque);
91 static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
92 int64_t sector_num, int nb_sectors, BdrvRequestFlags flags);
94 static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
95 QTAILQ_HEAD_INITIALIZER(bdrv_states);
97 static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
98 QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
100 static QLIST_HEAD(, BlockDriver) bdrv_drivers =
101 QLIST_HEAD_INITIALIZER(bdrv_drivers);
103 /* If non-zero, use only whitelisted block drivers */
104 static int use_bdrv_whitelist;
106 #ifdef _WIN32
107 static int is_windows_drive_prefix(const char *filename)
109 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
110 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
111 filename[1] == ':');
114 int is_windows_drive(const char *filename)
116 if (is_windows_drive_prefix(filename) &&
117 filename[2] == '\0')
118 return 1;
119 if (strstart(filename, "\\\\.\\", NULL) ||
120 strstart(filename, "//./", NULL))
121 return 1;
122 return 0;
124 #endif
126 /* throttling disk I/O limits */
127 void bdrv_set_io_limits(BlockDriverState *bs,
128 ThrottleConfig *cfg)
130 int i;
132 throttle_config(&bs->throttle_state, cfg);
134 for (i = 0; i < 2; i++) {
135 qemu_co_enter_next(&bs->throttled_reqs[i]);
139 /* this function drain all the throttled IOs */
140 static bool bdrv_start_throttled_reqs(BlockDriverState *bs)
142 bool drained = false;
143 bool enabled = bs->io_limits_enabled;
144 int i;
146 bs->io_limits_enabled = false;
148 for (i = 0; i < 2; i++) {
149 while (qemu_co_enter_next(&bs->throttled_reqs[i])) {
150 drained = true;
154 bs->io_limits_enabled = enabled;
156 return drained;
159 void bdrv_io_limits_disable(BlockDriverState *bs)
161 bs->io_limits_enabled = false;
163 bdrv_start_throttled_reqs(bs);
165 throttle_destroy(&bs->throttle_state);
168 static void bdrv_throttle_read_timer_cb(void *opaque)
170 BlockDriverState *bs = opaque;
171 qemu_co_enter_next(&bs->throttled_reqs[0]);
174 static void bdrv_throttle_write_timer_cb(void *opaque)
176 BlockDriverState *bs = opaque;
177 qemu_co_enter_next(&bs->throttled_reqs[1]);
180 /* should be called before bdrv_set_io_limits if a limit is set */
181 void bdrv_io_limits_enable(BlockDriverState *bs)
183 assert(!bs->io_limits_enabled);
184 throttle_init(&bs->throttle_state,
185 bdrv_get_aio_context(bs),
186 QEMU_CLOCK_VIRTUAL,
187 bdrv_throttle_read_timer_cb,
188 bdrv_throttle_write_timer_cb,
189 bs);
190 bs->io_limits_enabled = true;
193 /* This function makes an IO wait if needed
195 * @nb_sectors: the number of sectors of the IO
196 * @is_write: is the IO a write
198 static void bdrv_io_limits_intercept(BlockDriverState *bs,
199 unsigned int bytes,
200 bool is_write)
202 /* does this io must wait */
203 bool must_wait = throttle_schedule_timer(&bs->throttle_state, is_write);
205 /* if must wait or any request of this type throttled queue the IO */
206 if (must_wait ||
207 !qemu_co_queue_empty(&bs->throttled_reqs[is_write])) {
208 qemu_co_queue_wait(&bs->throttled_reqs[is_write]);
211 /* the IO will be executed, do the accounting */
212 throttle_account(&bs->throttle_state, is_write, bytes);
215 /* if the next request must wait -> do nothing */
216 if (throttle_schedule_timer(&bs->throttle_state, is_write)) {
217 return;
220 /* else queue next request for execution */
221 qemu_co_queue_next(&bs->throttled_reqs[is_write]);
224 size_t bdrv_opt_mem_align(BlockDriverState *bs)
226 if (!bs || !bs->drv) {
227 /* 4k should be on the safe side */
228 return 4096;
231 return bs->bl.opt_mem_alignment;
234 /* check if the path starts with "<protocol>:" */
235 static int path_has_protocol(const char *path)
237 const char *p;
239 #ifdef _WIN32
240 if (is_windows_drive(path) ||
241 is_windows_drive_prefix(path)) {
242 return 0;
244 p = path + strcspn(path, ":/\\");
245 #else
246 p = path + strcspn(path, ":/");
247 #endif
249 return *p == ':';
252 int path_is_absolute(const char *path)
254 #ifdef _WIN32
255 /* specific case for names like: "\\.\d:" */
256 if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
257 return 1;
259 return (*path == '/' || *path == '\\');
260 #else
261 return (*path == '/');
262 #endif
265 /* if filename is absolute, just copy it to dest. Otherwise, build a
266 path to it by considering it is relative to base_path. URL are
267 supported. */
268 void path_combine(char *dest, int dest_size,
269 const char *base_path,
270 const char *filename)
272 const char *p, *p1;
273 int len;
275 if (dest_size <= 0)
276 return;
277 if (path_is_absolute(filename)) {
278 pstrcpy(dest, dest_size, filename);
279 } else {
280 p = strchr(base_path, ':');
281 if (p)
282 p++;
283 else
284 p = base_path;
285 p1 = strrchr(base_path, '/');
286 #ifdef _WIN32
288 const char *p2;
289 p2 = strrchr(base_path, '\\');
290 if (!p1 || p2 > p1)
291 p1 = p2;
293 #endif
294 if (p1)
295 p1++;
296 else
297 p1 = base_path;
298 if (p1 > p)
299 p = p1;
300 len = p - base_path;
301 if (len > dest_size - 1)
302 len = dest_size - 1;
303 memcpy(dest, base_path, len);
304 dest[len] = '\0';
305 pstrcat(dest, dest_size, filename);
309 void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz)
311 if (bs->backing_file[0] == '\0' || path_has_protocol(bs->backing_file)) {
312 pstrcpy(dest, sz, bs->backing_file);
313 } else {
314 path_combine(dest, sz, bs->filename, bs->backing_file);
318 void bdrv_register(BlockDriver *bdrv)
320 /* Block drivers without coroutine functions need emulation */
321 if (!bdrv->bdrv_co_readv) {
322 bdrv->bdrv_co_readv = bdrv_co_readv_em;
323 bdrv->bdrv_co_writev = bdrv_co_writev_em;
325 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
326 * the block driver lacks aio we need to emulate that too.
328 if (!bdrv->bdrv_aio_readv) {
329 /* add AIO emulation layer */
330 bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
331 bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
335 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
338 /* create a new block device (by default it is empty) */
339 BlockDriverState *bdrv_new_root(const char *device_name, Error **errp)
341 BlockDriverState *bs;
343 assert(*device_name);
345 if (*device_name && !id_wellformed(device_name)) {
346 error_setg(errp, "Invalid device name");
347 return NULL;
350 if (bdrv_find(device_name)) {
351 error_setg(errp, "Device with id '%s' already exists",
352 device_name);
353 return NULL;
355 if (bdrv_find_node(device_name)) {
356 error_setg(errp,
357 "Device name '%s' conflicts with an existing node name",
358 device_name);
359 return NULL;
362 bs = bdrv_new();
364 QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list);
365 return bs;
368 BlockDriverState *bdrv_new(void)
370 BlockDriverState *bs;
371 int i;
373 bs = g_new0(BlockDriverState, 1);
374 QLIST_INIT(&bs->dirty_bitmaps);
375 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
376 QLIST_INIT(&bs->op_blockers[i]);
378 bdrv_iostatus_disable(bs);
379 notifier_list_init(&bs->close_notifiers);
380 notifier_with_return_list_init(&bs->before_write_notifiers);
381 qemu_co_queue_init(&bs->throttled_reqs[0]);
382 qemu_co_queue_init(&bs->throttled_reqs[1]);
383 bs->refcnt = 1;
384 bs->aio_context = qemu_get_aio_context();
386 return bs;
389 void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify)
391 notifier_list_add(&bs->close_notifiers, notify);
394 BlockDriver *bdrv_find_format(const char *format_name)
396 BlockDriver *drv1;
397 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
398 if (!strcmp(drv1->format_name, format_name)) {
399 return drv1;
402 return NULL;
405 static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
407 static const char *whitelist_rw[] = {
408 CONFIG_BDRV_RW_WHITELIST
410 static const char *whitelist_ro[] = {
411 CONFIG_BDRV_RO_WHITELIST
413 const char **p;
415 if (!whitelist_rw[0] && !whitelist_ro[0]) {
416 return 1; /* no whitelist, anything goes */
419 for (p = whitelist_rw; *p; p++) {
420 if (!strcmp(drv->format_name, *p)) {
421 return 1;
424 if (read_only) {
425 for (p = whitelist_ro; *p; p++) {
426 if (!strcmp(drv->format_name, *p)) {
427 return 1;
431 return 0;
434 BlockDriver *bdrv_find_whitelisted_format(const char *format_name,
435 bool read_only)
437 BlockDriver *drv = bdrv_find_format(format_name);
438 return drv && bdrv_is_whitelisted(drv, read_only) ? drv : NULL;
441 typedef struct CreateCo {
442 BlockDriver *drv;
443 char *filename;
444 QemuOpts *opts;
445 int ret;
446 Error *err;
447 } CreateCo;
449 static void coroutine_fn bdrv_create_co_entry(void *opaque)
451 Error *local_err = NULL;
452 int ret;
454 CreateCo *cco = opaque;
455 assert(cco->drv);
457 ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err);
458 if (local_err) {
459 error_propagate(&cco->err, local_err);
461 cco->ret = ret;
464 int bdrv_create(BlockDriver *drv, const char* filename,
465 QemuOpts *opts, Error **errp)
467 int ret;
469 Coroutine *co;
470 CreateCo cco = {
471 .drv = drv,
472 .filename = g_strdup(filename),
473 .opts = opts,
474 .ret = NOT_DONE,
475 .err = NULL,
478 if (!drv->bdrv_create) {
479 error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
480 ret = -ENOTSUP;
481 goto out;
484 if (qemu_in_coroutine()) {
485 /* Fast-path if already in coroutine context */
486 bdrv_create_co_entry(&cco);
487 } else {
488 co = qemu_coroutine_create(bdrv_create_co_entry);
489 qemu_coroutine_enter(co, &cco);
490 while (cco.ret == NOT_DONE) {
491 aio_poll(qemu_get_aio_context(), true);
495 ret = cco.ret;
496 if (ret < 0) {
497 if (cco.err) {
498 error_propagate(errp, cco.err);
499 } else {
500 error_setg_errno(errp, -ret, "Could not create image");
504 out:
505 g_free(cco.filename);
506 return ret;
509 int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp)
511 BlockDriver *drv;
512 Error *local_err = NULL;
513 int ret;
515 drv = bdrv_find_protocol(filename, true);
516 if (drv == NULL) {
517 error_setg(errp, "Could not find protocol for file '%s'", filename);
518 return -ENOENT;
521 ret = bdrv_create(drv, filename, opts, &local_err);
522 if (local_err) {
523 error_propagate(errp, local_err);
525 return ret;
528 void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
530 BlockDriver *drv = bs->drv;
531 Error *local_err = NULL;
533 memset(&bs->bl, 0, sizeof(bs->bl));
535 if (!drv) {
536 return;
539 /* Take some limits from the children as a default */
540 if (bs->file) {
541 bdrv_refresh_limits(bs->file, &local_err);
542 if (local_err) {
543 error_propagate(errp, local_err);
544 return;
546 bs->bl.opt_transfer_length = bs->file->bl.opt_transfer_length;
547 bs->bl.opt_mem_alignment = bs->file->bl.opt_mem_alignment;
548 } else {
549 bs->bl.opt_mem_alignment = 512;
552 if (bs->backing_hd) {
553 bdrv_refresh_limits(bs->backing_hd, &local_err);
554 if (local_err) {
555 error_propagate(errp, local_err);
556 return;
558 bs->bl.opt_transfer_length =
559 MAX(bs->bl.opt_transfer_length,
560 bs->backing_hd->bl.opt_transfer_length);
561 bs->bl.opt_mem_alignment =
562 MAX(bs->bl.opt_mem_alignment,
563 bs->backing_hd->bl.opt_mem_alignment);
566 /* Then let the driver override it */
567 if (drv->bdrv_refresh_limits) {
568 drv->bdrv_refresh_limits(bs, errp);
573 * Create a uniquely-named empty temporary file.
574 * Return 0 upon success, otherwise a negative errno value.
576 int get_tmp_filename(char *filename, int size)
578 #ifdef _WIN32
579 char temp_dir[MAX_PATH];
580 /* GetTempFileName requires that its output buffer (4th param)
581 have length MAX_PATH or greater. */
582 assert(size >= MAX_PATH);
583 return (GetTempPath(MAX_PATH, temp_dir)
584 && GetTempFileName(temp_dir, "qem", 0, filename)
585 ? 0 : -GetLastError());
586 #else
587 int fd;
588 const char *tmpdir;
589 tmpdir = getenv("TMPDIR");
590 if (!tmpdir) {
591 tmpdir = "/var/tmp";
593 if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
594 return -EOVERFLOW;
596 fd = mkstemp(filename);
597 if (fd < 0) {
598 return -errno;
600 if (close(fd) != 0) {
601 unlink(filename);
602 return -errno;
604 return 0;
605 #endif
609 * Detect host devices. By convention, /dev/cdrom[N] is always
610 * recognized as a host CDROM.
612 static BlockDriver *find_hdev_driver(const char *filename)
614 int score_max = 0, score;
615 BlockDriver *drv = NULL, *d;
617 QLIST_FOREACH(d, &bdrv_drivers, list) {
618 if (d->bdrv_probe_device) {
619 score = d->bdrv_probe_device(filename);
620 if (score > score_max) {
621 score_max = score;
622 drv = d;
627 return drv;
630 BlockDriver *bdrv_find_protocol(const char *filename,
631 bool allow_protocol_prefix)
633 BlockDriver *drv1;
634 char protocol[128];
635 int len;
636 const char *p;
638 /* TODO Drivers without bdrv_file_open must be specified explicitly */
641 * XXX(hch): we really should not let host device detection
642 * override an explicit protocol specification, but moving this
643 * later breaks access to device names with colons in them.
644 * Thanks to the brain-dead persistent naming schemes on udev-
645 * based Linux systems those actually are quite common.
647 drv1 = find_hdev_driver(filename);
648 if (drv1) {
649 return drv1;
652 if (!path_has_protocol(filename) || !allow_protocol_prefix) {
653 return bdrv_find_format("file");
656 p = strchr(filename, ':');
657 assert(p != NULL);
658 len = p - filename;
659 if (len > sizeof(protocol) - 1)
660 len = sizeof(protocol) - 1;
661 memcpy(protocol, filename, len);
662 protocol[len] = '\0';
663 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
664 if (drv1->protocol_name &&
665 !strcmp(drv1->protocol_name, protocol)) {
666 return drv1;
669 return NULL;
672 static int find_image_format(BlockDriverState *bs, const char *filename,
673 BlockDriver **pdrv, Error **errp)
675 int score, score_max;
676 BlockDriver *drv1, *drv;
677 uint8_t buf[2048];
678 int ret = 0;
680 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
681 if (bs->sg || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
682 drv = bdrv_find_format("raw");
683 if (!drv) {
684 error_setg(errp, "Could not find raw image format");
685 ret = -ENOENT;
687 *pdrv = drv;
688 return ret;
691 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
692 if (ret < 0) {
693 error_setg_errno(errp, -ret, "Could not read image for determining its "
694 "format");
695 *pdrv = NULL;
696 return ret;
699 score_max = 0;
700 drv = NULL;
701 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
702 if (drv1->bdrv_probe) {
703 score = drv1->bdrv_probe(buf, ret, filename);
704 if (score > score_max) {
705 score_max = score;
706 drv = drv1;
710 if (!drv) {
711 error_setg(errp, "Could not determine image format: No compatible "
712 "driver found");
713 ret = -ENOENT;
715 *pdrv = drv;
716 return ret;
720 * Set the current 'total_sectors' value
721 * Return 0 on success, -errno on error.
723 static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
725 BlockDriver *drv = bs->drv;
727 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
728 if (bs->sg)
729 return 0;
731 /* query actual device if possible, otherwise just trust the hint */
732 if (drv->bdrv_getlength) {
733 int64_t length = drv->bdrv_getlength(bs);
734 if (length < 0) {
735 return length;
737 hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
740 bs->total_sectors = hint;
741 return 0;
745 * Set open flags for a given discard mode
747 * Return 0 on success, -1 if the discard mode was invalid.
749 int bdrv_parse_discard_flags(const char *mode, int *flags)
751 *flags &= ~BDRV_O_UNMAP;
753 if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
754 /* do nothing */
755 } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
756 *flags |= BDRV_O_UNMAP;
757 } else {
758 return -1;
761 return 0;
765 * Set open flags for a given cache mode
767 * Return 0 on success, -1 if the cache mode was invalid.
769 int bdrv_parse_cache_flags(const char *mode, int *flags)
771 *flags &= ~BDRV_O_CACHE_MASK;
773 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
774 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
775 } else if (!strcmp(mode, "directsync")) {
776 *flags |= BDRV_O_NOCACHE;
777 } else if (!strcmp(mode, "writeback")) {
778 *flags |= BDRV_O_CACHE_WB;
779 } else if (!strcmp(mode, "unsafe")) {
780 *flags |= BDRV_O_CACHE_WB;
781 *flags |= BDRV_O_NO_FLUSH;
782 } else if (!strcmp(mode, "writethrough")) {
783 /* this is the default */
784 } else {
785 return -1;
788 return 0;
792 * The copy-on-read flag is actually a reference count so multiple users may
793 * use the feature without worrying about clobbering its previous state.
794 * Copy-on-read stays enabled until all users have called to disable it.
796 void bdrv_enable_copy_on_read(BlockDriverState *bs)
798 bs->copy_on_read++;
801 void bdrv_disable_copy_on_read(BlockDriverState *bs)
803 assert(bs->copy_on_read > 0);
804 bs->copy_on_read--;
808 * Returns the flags that a temporary snapshot should get, based on the
809 * originally requested flags (the originally requested image will have flags
810 * like a backing file)
812 static int bdrv_temp_snapshot_flags(int flags)
814 return (flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
818 * Returns the flags that bs->file should get, based on the given flags for
819 * the parent BDS
821 static int bdrv_inherited_flags(int flags)
823 /* Enable protocol handling, disable format probing for bs->file */
824 flags |= BDRV_O_PROTOCOL;
826 /* Our block drivers take care to send flushes and respect unmap policy,
827 * so we can enable both unconditionally on lower layers. */
828 flags |= BDRV_O_CACHE_WB | BDRV_O_UNMAP;
830 /* Clear flags that only apply to the top layer */
831 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ);
833 return flags;
837 * Returns the flags that bs->backing_hd should get, based on the given flags
838 * for the parent BDS
840 static int bdrv_backing_flags(int flags)
842 /* backing files always opened read-only */
843 flags &= ~(BDRV_O_RDWR | BDRV_O_COPY_ON_READ);
845 /* snapshot=on is handled on the top layer */
846 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY);
848 return flags;
851 static int bdrv_open_flags(BlockDriverState *bs, int flags)
853 int open_flags = flags | BDRV_O_CACHE_WB;
856 * Clear flags that are internal to the block layer before opening the
857 * image.
859 open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL);
862 * Snapshots should be writable.
864 if (flags & BDRV_O_TEMPORARY) {
865 open_flags |= BDRV_O_RDWR;
868 return open_flags;
871 static void bdrv_assign_node_name(BlockDriverState *bs,
872 const char *node_name,
873 Error **errp)
875 if (!node_name) {
876 return;
879 /* Check for empty string or invalid characters */
880 if (!id_wellformed(node_name)) {
881 error_setg(errp, "Invalid node name");
882 return;
885 /* takes care of avoiding namespaces collisions */
886 if (bdrv_find(node_name)) {
887 error_setg(errp, "node-name=%s is conflicting with a device id",
888 node_name);
889 return;
892 /* takes care of avoiding duplicates node names */
893 if (bdrv_find_node(node_name)) {
894 error_setg(errp, "Duplicate node name");
895 return;
898 /* copy node name into the bs and insert it into the graph list */
899 pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
900 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
904 * Common part for opening disk images and files
906 * Removes all processed options from *options.
908 static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
909 QDict *options, int flags, BlockDriver *drv, Error **errp)
911 int ret, open_flags;
912 const char *filename;
913 const char *node_name = NULL;
914 Error *local_err = NULL;
916 assert(drv != NULL);
917 assert(bs->file == NULL);
918 assert(options != NULL && bs->options != options);
920 if (file != NULL) {
921 filename = file->filename;
922 } else {
923 filename = qdict_get_try_str(options, "filename");
926 if (drv->bdrv_needs_filename && !filename) {
927 error_setg(errp, "The '%s' block driver requires a file name",
928 drv->format_name);
929 return -EINVAL;
932 trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name);
934 node_name = qdict_get_try_str(options, "node-name");
935 bdrv_assign_node_name(bs, node_name, &local_err);
936 if (local_err) {
937 error_propagate(errp, local_err);
938 return -EINVAL;
940 qdict_del(options, "node-name");
942 /* bdrv_open() with directly using a protocol as drv. This layer is already
943 * opened, so assign it to bs (while file becomes a closed BlockDriverState)
944 * and return immediately. */
945 if (file != NULL && drv->bdrv_file_open) {
946 bdrv_swap(file, bs);
947 return 0;
950 bs->open_flags = flags;
951 bs->guest_block_size = 512;
952 bs->request_alignment = 512;
953 bs->zero_beyond_eof = true;
954 open_flags = bdrv_open_flags(bs, flags);
955 bs->read_only = !(open_flags & BDRV_O_RDWR);
956 bs->growable = !!(flags & BDRV_O_PROTOCOL);
958 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
959 error_setg(errp,
960 !bs->read_only && bdrv_is_whitelisted(drv, true)
961 ? "Driver '%s' can only be used for read-only devices"
962 : "Driver '%s' is not whitelisted",
963 drv->format_name);
964 return -ENOTSUP;
967 assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
968 if (flags & BDRV_O_COPY_ON_READ) {
969 if (!bs->read_only) {
970 bdrv_enable_copy_on_read(bs);
971 } else {
972 error_setg(errp, "Can't use copy-on-read on read-only device");
973 return -EINVAL;
977 if (filename != NULL) {
978 pstrcpy(bs->filename, sizeof(bs->filename), filename);
979 } else {
980 bs->filename[0] = '\0';
982 pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename);
984 bs->drv = drv;
985 bs->opaque = g_malloc0(drv->instance_size);
987 bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
989 /* Open the image, either directly or using a protocol */
990 if (drv->bdrv_file_open) {
991 assert(file == NULL);
992 assert(!drv->bdrv_needs_filename || filename != NULL);
993 ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
994 } else {
995 if (file == NULL) {
996 error_setg(errp, "Can't use '%s' as a block driver for the "
997 "protocol level", drv->format_name);
998 ret = -EINVAL;
999 goto free_and_fail;
1001 bs->file = file;
1002 ret = drv->bdrv_open(bs, options, open_flags, &local_err);
1005 if (ret < 0) {
1006 if (local_err) {
1007 error_propagate(errp, local_err);
1008 } else if (bs->filename[0]) {
1009 error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
1010 } else {
1011 error_setg_errno(errp, -ret, "Could not open image");
1013 goto free_and_fail;
1016 ret = refresh_total_sectors(bs, bs->total_sectors);
1017 if (ret < 0) {
1018 error_setg_errno(errp, -ret, "Could not refresh total sector count");
1019 goto free_and_fail;
1022 bdrv_refresh_limits(bs, &local_err);
1023 if (local_err) {
1024 error_propagate(errp, local_err);
1025 ret = -EINVAL;
1026 goto free_and_fail;
1029 assert(bdrv_opt_mem_align(bs) != 0);
1030 assert((bs->request_alignment != 0) || bs->sg);
1031 return 0;
1033 free_and_fail:
1034 bs->file = NULL;
1035 g_free(bs->opaque);
1036 bs->opaque = NULL;
1037 bs->drv = NULL;
1038 return ret;
1041 static QDict *parse_json_filename(const char *filename, Error **errp)
1043 QObject *options_obj;
1044 QDict *options;
1045 int ret;
1047 ret = strstart(filename, "json:", &filename);
1048 assert(ret);
1050 options_obj = qobject_from_json(filename);
1051 if (!options_obj) {
1052 error_setg(errp, "Could not parse the JSON options");
1053 return NULL;
1056 if (qobject_type(options_obj) != QTYPE_QDICT) {
1057 qobject_decref(options_obj);
1058 error_setg(errp, "Invalid JSON object given");
1059 return NULL;
1062 options = qobject_to_qdict(options_obj);
1063 qdict_flatten(options);
1065 return options;
1069 * Fills in default options for opening images and converts the legacy
1070 * filename/flags pair to option QDict entries.
1072 static int bdrv_fill_options(QDict **options, const char **pfilename, int flags,
1073 BlockDriver *drv, Error **errp)
1075 const char *filename = *pfilename;
1076 const char *drvname;
1077 bool protocol = flags & BDRV_O_PROTOCOL;
1078 bool parse_filename = false;
1079 Error *local_err = NULL;
1081 /* Parse json: pseudo-protocol */
1082 if (filename && g_str_has_prefix(filename, "json:")) {
1083 QDict *json_options = parse_json_filename(filename, &local_err);
1084 if (local_err) {
1085 error_propagate(errp, local_err);
1086 return -EINVAL;
1089 /* Options given in the filename have lower priority than options
1090 * specified directly */
1091 qdict_join(*options, json_options, false);
1092 QDECREF(json_options);
1093 *pfilename = filename = NULL;
1096 /* Fetch the file name from the options QDict if necessary */
1097 if (protocol && filename) {
1098 if (!qdict_haskey(*options, "filename")) {
1099 qdict_put(*options, "filename", qstring_from_str(filename));
1100 parse_filename = true;
1101 } else {
1102 error_setg(errp, "Can't specify 'file' and 'filename' options at "
1103 "the same time");
1104 return -EINVAL;
1108 /* Find the right block driver */
1109 filename = qdict_get_try_str(*options, "filename");
1110 drvname = qdict_get_try_str(*options, "driver");
1112 if (drv) {
1113 if (drvname) {
1114 error_setg(errp, "Driver specified twice");
1115 return -EINVAL;
1117 drvname = drv->format_name;
1118 qdict_put(*options, "driver", qstring_from_str(drvname));
1119 } else {
1120 if (!drvname && protocol) {
1121 if (filename) {
1122 drv = bdrv_find_protocol(filename, parse_filename);
1123 if (!drv) {
1124 error_setg(errp, "Unknown protocol");
1125 return -EINVAL;
1128 drvname = drv->format_name;
1129 qdict_put(*options, "driver", qstring_from_str(drvname));
1130 } else {
1131 error_setg(errp, "Must specify either driver or file");
1132 return -EINVAL;
1134 } else if (drvname) {
1135 drv = bdrv_find_format(drvname);
1136 if (!drv) {
1137 error_setg(errp, "Unknown driver '%s'", drvname);
1138 return -ENOENT;
1143 assert(drv || !protocol);
1145 /* Driver-specific filename parsing */
1146 if (drv && drv->bdrv_parse_filename && parse_filename) {
1147 drv->bdrv_parse_filename(filename, *options, &local_err);
1148 if (local_err) {
1149 error_propagate(errp, local_err);
1150 return -EINVAL;
1153 if (!drv->bdrv_needs_filename) {
1154 qdict_del(*options, "filename");
1158 return 0;
1161 void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
1164 if (bs->backing_hd) {
1165 assert(bs->backing_blocker);
1166 bdrv_op_unblock_all(bs->backing_hd, bs->backing_blocker);
1167 } else if (backing_hd) {
1168 error_setg(&bs->backing_blocker,
1169 "device is used as backing hd of '%s'",
1170 bdrv_get_device_name(bs));
1173 bs->backing_hd = backing_hd;
1174 if (!backing_hd) {
1175 error_free(bs->backing_blocker);
1176 bs->backing_blocker = NULL;
1177 goto out;
1179 bs->open_flags &= ~BDRV_O_NO_BACKING;
1180 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename);
1181 pstrcpy(bs->backing_format, sizeof(bs->backing_format),
1182 backing_hd->drv ? backing_hd->drv->format_name : "");
1184 bdrv_op_block_all(bs->backing_hd, bs->backing_blocker);
1185 /* Otherwise we won't be able to commit due to check in bdrv_commit */
1186 bdrv_op_unblock(bs->backing_hd, BLOCK_OP_TYPE_COMMIT,
1187 bs->backing_blocker);
1188 out:
1189 bdrv_refresh_limits(bs, NULL);
1193 * Opens the backing file for a BlockDriverState if not yet open
1195 * options is a QDict of options to pass to the block drivers, or NULL for an
1196 * empty set of options. The reference to the QDict is transferred to this
1197 * function (even on failure), so if the caller intends to reuse the dictionary,
1198 * it needs to use QINCREF() before calling bdrv_file_open.
1200 int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp)
1202 char *backing_filename = g_malloc0(PATH_MAX);
1203 int ret = 0;
1204 BlockDriver *back_drv = NULL;
1205 BlockDriverState *backing_hd;
1206 Error *local_err = NULL;
1208 if (bs->backing_hd != NULL) {
1209 QDECREF(options);
1210 goto free_exit;
1213 /* NULL means an empty set of options */
1214 if (options == NULL) {
1215 options = qdict_new();
1218 bs->open_flags &= ~BDRV_O_NO_BACKING;
1219 if (qdict_haskey(options, "file.filename")) {
1220 backing_filename[0] = '\0';
1221 } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
1222 QDECREF(options);
1223 goto free_exit;
1224 } else {
1225 bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX);
1228 if (!bs->drv || !bs->drv->supports_backing) {
1229 ret = -EINVAL;
1230 error_setg(errp, "Driver doesn't support backing files");
1231 QDECREF(options);
1232 goto free_exit;
1235 backing_hd = bdrv_new();
1237 if (bs->backing_format[0] != '\0') {
1238 back_drv = bdrv_find_format(bs->backing_format);
1241 assert(bs->backing_hd == NULL);
1242 ret = bdrv_open(&backing_hd,
1243 *backing_filename ? backing_filename : NULL, NULL, options,
1244 bdrv_backing_flags(bs->open_flags), back_drv, &local_err);
1245 if (ret < 0) {
1246 bdrv_unref(backing_hd);
1247 backing_hd = NULL;
1248 bs->open_flags |= BDRV_O_NO_BACKING;
1249 error_setg(errp, "Could not open backing file: %s",
1250 error_get_pretty(local_err));
1251 error_free(local_err);
1252 goto free_exit;
1254 bdrv_set_backing_hd(bs, backing_hd);
1256 free_exit:
1257 g_free(backing_filename);
1258 return ret;
1262 * Opens a disk image whose options are given as BlockdevRef in another block
1263 * device's options.
1265 * If allow_none is true, no image will be opened if filename is false and no
1266 * BlockdevRef is given. *pbs will remain unchanged and 0 will be returned.
1268 * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
1269 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1270 * itself, all options starting with "${bdref_key}." are considered part of the
1271 * BlockdevRef.
1273 * The BlockdevRef will be removed from the options QDict.
1275 * To conform with the behavior of bdrv_open(), *pbs has to be NULL.
1277 int bdrv_open_image(BlockDriverState **pbs, const char *filename,
1278 QDict *options, const char *bdref_key, int flags,
1279 bool allow_none, Error **errp)
1281 QDict *image_options;
1282 int ret;
1283 char *bdref_key_dot;
1284 const char *reference;
1286 assert(pbs);
1287 assert(*pbs == NULL);
1289 bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1290 qdict_extract_subqdict(options, &image_options, bdref_key_dot);
1291 g_free(bdref_key_dot);
1293 reference = qdict_get_try_str(options, bdref_key);
1294 if (!filename && !reference && !qdict_size(image_options)) {
1295 if (allow_none) {
1296 ret = 0;
1297 } else {
1298 error_setg(errp, "A block device must be specified for \"%s\"",
1299 bdref_key);
1300 ret = -EINVAL;
1302 QDECREF(image_options);
1303 goto done;
1306 ret = bdrv_open(pbs, filename, reference, image_options, flags, NULL, errp);
1308 done:
1309 qdict_del(options, bdref_key);
1310 return ret;
1313 int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
1315 /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
1316 char *tmp_filename = g_malloc0(PATH_MAX + 1);
1317 int64_t total_size;
1318 BlockDriver *bdrv_qcow2;
1319 QemuOpts *opts = NULL;
1320 QDict *snapshot_options;
1321 BlockDriverState *bs_snapshot;
1322 Error *local_err;
1323 int ret;
1325 /* if snapshot, we create a temporary backing file and open it
1326 instead of opening 'filename' directly */
1328 /* Get the required size from the image */
1329 total_size = bdrv_getlength(bs);
1330 if (total_size < 0) {
1331 ret = total_size;
1332 error_setg_errno(errp, -total_size, "Could not get image size");
1333 goto out;
1336 /* Create the temporary image */
1337 ret = get_tmp_filename(tmp_filename, PATH_MAX + 1);
1338 if (ret < 0) {
1339 error_setg_errno(errp, -ret, "Could not get temporary filename");
1340 goto out;
1343 bdrv_qcow2 = bdrv_find_format("qcow2");
1344 opts = qemu_opts_create(bdrv_qcow2->create_opts, NULL, 0,
1345 &error_abort);
1346 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size);
1347 ret = bdrv_create(bdrv_qcow2, tmp_filename, opts, &local_err);
1348 qemu_opts_del(opts);
1349 if (ret < 0) {
1350 error_setg_errno(errp, -ret, "Could not create temporary overlay "
1351 "'%s': %s", tmp_filename,
1352 error_get_pretty(local_err));
1353 error_free(local_err);
1354 goto out;
1357 /* Prepare a new options QDict for the temporary file */
1358 snapshot_options = qdict_new();
1359 qdict_put(snapshot_options, "file.driver",
1360 qstring_from_str("file"));
1361 qdict_put(snapshot_options, "file.filename",
1362 qstring_from_str(tmp_filename));
1364 bs_snapshot = bdrv_new();
1366 ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options,
1367 flags, bdrv_qcow2, &local_err);
1368 if (ret < 0) {
1369 error_propagate(errp, local_err);
1370 goto out;
1373 bdrv_append(bs_snapshot, bs);
1375 out:
1376 g_free(tmp_filename);
1377 return ret;
1381 * Opens a disk image (raw, qcow2, vmdk, ...)
1383 * options is a QDict of options to pass to the block drivers, or NULL for an
1384 * empty set of options. The reference to the QDict belongs to the block layer
1385 * after the call (even on failure), so if the caller intends to reuse the
1386 * dictionary, it needs to use QINCREF() before calling bdrv_open.
1388 * If *pbs is NULL, a new BDS will be created with a pointer to it stored there.
1389 * If it is not NULL, the referenced BDS will be reused.
1391 * The reference parameter may be used to specify an existing block device which
1392 * should be opened. If specified, neither options nor a filename may be given,
1393 * nor can an existing BDS be reused (that is, *pbs has to be NULL).
1395 int bdrv_open(BlockDriverState **pbs, const char *filename,
1396 const char *reference, QDict *options, int flags,
1397 BlockDriver *drv, Error **errp)
1399 int ret;
1400 BlockDriverState *file = NULL, *bs;
1401 const char *drvname;
1402 Error *local_err = NULL;
1403 int snapshot_flags = 0;
1405 assert(pbs);
1407 if (reference) {
1408 bool options_non_empty = options ? qdict_size(options) : false;
1409 QDECREF(options);
1411 if (*pbs) {
1412 error_setg(errp, "Cannot reuse an existing BDS when referencing "
1413 "another block device");
1414 return -EINVAL;
1417 if (filename || options_non_empty) {
1418 error_setg(errp, "Cannot reference an existing block device with "
1419 "additional options or a new filename");
1420 return -EINVAL;
1423 bs = bdrv_lookup_bs(reference, reference, errp);
1424 if (!bs) {
1425 return -ENODEV;
1427 bdrv_ref(bs);
1428 *pbs = bs;
1429 return 0;
1432 if (*pbs) {
1433 bs = *pbs;
1434 } else {
1435 bs = bdrv_new();
1438 /* NULL means an empty set of options */
1439 if (options == NULL) {
1440 options = qdict_new();
1443 ret = bdrv_fill_options(&options, &filename, flags, drv, &local_err);
1444 if (local_err) {
1445 goto fail;
1448 /* Find the right image format driver */
1449 drv = NULL;
1450 drvname = qdict_get_try_str(options, "driver");
1451 if (drvname) {
1452 drv = bdrv_find_format(drvname);
1453 qdict_del(options, "driver");
1454 if (!drv) {
1455 error_setg(errp, "Unknown driver: '%s'", drvname);
1456 ret = -EINVAL;
1457 goto fail;
1461 assert(drvname || !(flags & BDRV_O_PROTOCOL));
1462 if (drv && !drv->bdrv_file_open) {
1463 /* If the user explicitly wants a format driver here, we'll need to add
1464 * another layer for the protocol in bs->file */
1465 flags &= ~BDRV_O_PROTOCOL;
1468 bs->options = options;
1469 options = qdict_clone_shallow(options);
1471 /* Open image file without format layer */
1472 if ((flags & BDRV_O_PROTOCOL) == 0) {
1473 if (flags & BDRV_O_RDWR) {
1474 flags |= BDRV_O_ALLOW_RDWR;
1476 if (flags & BDRV_O_SNAPSHOT) {
1477 snapshot_flags = bdrv_temp_snapshot_flags(flags);
1478 flags = bdrv_backing_flags(flags);
1481 assert(file == NULL);
1482 ret = bdrv_open_image(&file, filename, options, "file",
1483 bdrv_inherited_flags(flags),
1484 true, &local_err);
1485 if (ret < 0) {
1486 goto fail;
1490 /* Image format probing */
1491 if (!drv && file) {
1492 ret = find_image_format(file, filename, &drv, &local_err);
1493 if (ret < 0) {
1494 goto fail;
1496 } else if (!drv) {
1497 error_setg(errp, "Must specify either driver or file");
1498 ret = -EINVAL;
1499 goto fail;
1502 /* Open the image */
1503 ret = bdrv_open_common(bs, file, options, flags, drv, &local_err);
1504 if (ret < 0) {
1505 goto fail;
1508 if (file && (bs->file != file)) {
1509 bdrv_unref(file);
1510 file = NULL;
1513 /* If there is a backing file, use it */
1514 if ((flags & BDRV_O_NO_BACKING) == 0) {
1515 QDict *backing_options;
1517 qdict_extract_subqdict(options, &backing_options, "backing.");
1518 ret = bdrv_open_backing_file(bs, backing_options, &local_err);
1519 if (ret < 0) {
1520 goto close_and_fail;
1524 bdrv_refresh_filename(bs);
1526 /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
1527 * temporary snapshot afterwards. */
1528 if (snapshot_flags) {
1529 ret = bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err);
1530 if (local_err) {
1531 goto close_and_fail;
1535 /* Check if any unknown options were used */
1536 if (options && (qdict_size(options) != 0)) {
1537 const QDictEntry *entry = qdict_first(options);
1538 if (flags & BDRV_O_PROTOCOL) {
1539 error_setg(errp, "Block protocol '%s' doesn't support the option "
1540 "'%s'", drv->format_name, entry->key);
1541 } else {
1542 error_setg(errp, "Block format '%s' used by device '%s' doesn't "
1543 "support the option '%s'", drv->format_name,
1544 bdrv_get_device_name(bs), entry->key);
1547 ret = -EINVAL;
1548 goto close_and_fail;
1551 if (!bdrv_key_required(bs)) {
1552 bdrv_dev_change_media_cb(bs, true);
1553 } else if (!runstate_check(RUN_STATE_PRELAUNCH)
1554 && !runstate_check(RUN_STATE_INMIGRATE)
1555 && !runstate_check(RUN_STATE_PAUSED)) { /* HACK */
1556 error_setg(errp,
1557 "Guest must be stopped for opening of encrypted image");
1558 ret = -EBUSY;
1559 goto close_and_fail;
1562 QDECREF(options);
1563 *pbs = bs;
1564 return 0;
1566 fail:
1567 if (file != NULL) {
1568 bdrv_unref(file);
1570 QDECREF(bs->options);
1571 QDECREF(options);
1572 bs->options = NULL;
1573 if (!*pbs) {
1574 /* If *pbs is NULL, a new BDS has been created in this function and
1575 needs to be freed now. Otherwise, it does not need to be closed,
1576 since it has not really been opened yet. */
1577 bdrv_unref(bs);
1579 if (local_err) {
1580 error_propagate(errp, local_err);
1582 return ret;
1584 close_and_fail:
1585 /* See fail path, but now the BDS has to be always closed */
1586 if (*pbs) {
1587 bdrv_close(bs);
1588 } else {
1589 bdrv_unref(bs);
1591 QDECREF(options);
1592 if (local_err) {
1593 error_propagate(errp, local_err);
1595 return ret;
1598 typedef struct BlockReopenQueueEntry {
1599 bool prepared;
1600 BDRVReopenState state;
1601 QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
1602 } BlockReopenQueueEntry;
1605 * Adds a BlockDriverState to a simple queue for an atomic, transactional
1606 * reopen of multiple devices.
1608 * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
1609 * already performed, or alternatively may be NULL a new BlockReopenQueue will
1610 * be created and initialized. This newly created BlockReopenQueue should be
1611 * passed back in for subsequent calls that are intended to be of the same
1612 * atomic 'set'.
1614 * bs is the BlockDriverState to add to the reopen queue.
1616 * flags contains the open flags for the associated bs
1618 * returns a pointer to bs_queue, which is either the newly allocated
1619 * bs_queue, or the existing bs_queue being used.
1622 BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
1623 BlockDriverState *bs, int flags)
1625 assert(bs != NULL);
1627 BlockReopenQueueEntry *bs_entry;
1628 if (bs_queue == NULL) {
1629 bs_queue = g_new0(BlockReopenQueue, 1);
1630 QSIMPLEQ_INIT(bs_queue);
1633 /* bdrv_open() masks this flag out */
1634 flags &= ~BDRV_O_PROTOCOL;
1636 if (bs->file) {
1637 bdrv_reopen_queue(bs_queue, bs->file, bdrv_inherited_flags(flags));
1640 bs_entry = g_new0(BlockReopenQueueEntry, 1);
1641 QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
1643 bs_entry->state.bs = bs;
1644 bs_entry->state.flags = flags;
1646 return bs_queue;
1650 * Reopen multiple BlockDriverStates atomically & transactionally.
1652 * The queue passed in (bs_queue) must have been built up previous
1653 * via bdrv_reopen_queue().
1655 * Reopens all BDS specified in the queue, with the appropriate
1656 * flags. All devices are prepared for reopen, and failure of any
1657 * device will cause all device changes to be abandonded, and intermediate
1658 * data cleaned up.
1660 * If all devices prepare successfully, then the changes are committed
1661 * to all devices.
1664 int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
1666 int ret = -1;
1667 BlockReopenQueueEntry *bs_entry, *next;
1668 Error *local_err = NULL;
1670 assert(bs_queue != NULL);
1672 bdrv_drain_all();
1674 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1675 if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
1676 error_propagate(errp, local_err);
1677 goto cleanup;
1679 bs_entry->prepared = true;
1682 /* If we reach this point, we have success and just need to apply the
1683 * changes
1685 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1686 bdrv_reopen_commit(&bs_entry->state);
1689 ret = 0;
1691 cleanup:
1692 QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
1693 if (ret && bs_entry->prepared) {
1694 bdrv_reopen_abort(&bs_entry->state);
1696 g_free(bs_entry);
1698 g_free(bs_queue);
1699 return ret;
1703 /* Reopen a single BlockDriverState with the specified flags. */
1704 int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
1706 int ret = -1;
1707 Error *local_err = NULL;
1708 BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, bdrv_flags);
1710 ret = bdrv_reopen_multiple(queue, &local_err);
1711 if (local_err != NULL) {
1712 error_propagate(errp, local_err);
1714 return ret;
1719 * Prepares a BlockDriverState for reopen. All changes are staged in the
1720 * 'opaque' field of the BDRVReopenState, which is used and allocated by
1721 * the block driver layer .bdrv_reopen_prepare()
1723 * bs is the BlockDriverState to reopen
1724 * flags are the new open flags
1725 * queue is the reopen queue
1727 * Returns 0 on success, non-zero on error. On error errp will be set
1728 * as well.
1730 * On failure, bdrv_reopen_abort() will be called to clean up any data.
1731 * It is the responsibility of the caller to then call the abort() or
1732 * commit() for any other BDS that have been left in a prepare() state
1735 int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
1736 Error **errp)
1738 int ret = -1;
1739 Error *local_err = NULL;
1740 BlockDriver *drv;
1742 assert(reopen_state != NULL);
1743 assert(reopen_state->bs->drv != NULL);
1744 drv = reopen_state->bs->drv;
1746 /* if we are to stay read-only, do not allow permission change
1747 * to r/w */
1748 if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
1749 reopen_state->flags & BDRV_O_RDWR) {
1750 error_set(errp, QERR_DEVICE_IS_READ_ONLY,
1751 bdrv_get_device_name(reopen_state->bs));
1752 goto error;
1756 ret = bdrv_flush(reopen_state->bs);
1757 if (ret) {
1758 error_set(errp, ERROR_CLASS_GENERIC_ERROR, "Error (%s) flushing drive",
1759 strerror(-ret));
1760 goto error;
1763 if (drv->bdrv_reopen_prepare) {
1764 ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
1765 if (ret) {
1766 if (local_err != NULL) {
1767 error_propagate(errp, local_err);
1768 } else {
1769 error_setg(errp, "failed while preparing to reopen image '%s'",
1770 reopen_state->bs->filename);
1772 goto error;
1774 } else {
1775 /* It is currently mandatory to have a bdrv_reopen_prepare()
1776 * handler for each supported drv. */
1777 error_set(errp, QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
1778 drv->format_name, bdrv_get_device_name(reopen_state->bs),
1779 "reopening of file");
1780 ret = -1;
1781 goto error;
1784 ret = 0;
1786 error:
1787 return ret;
1791 * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
1792 * makes them final by swapping the staging BlockDriverState contents into
1793 * the active BlockDriverState contents.
1795 void bdrv_reopen_commit(BDRVReopenState *reopen_state)
1797 BlockDriver *drv;
1799 assert(reopen_state != NULL);
1800 drv = reopen_state->bs->drv;
1801 assert(drv != NULL);
1803 /* If there are any driver level actions to take */
1804 if (drv->bdrv_reopen_commit) {
1805 drv->bdrv_reopen_commit(reopen_state);
1808 /* set BDS specific flags now */
1809 reopen_state->bs->open_flags = reopen_state->flags;
1810 reopen_state->bs->enable_write_cache = !!(reopen_state->flags &
1811 BDRV_O_CACHE_WB);
1812 reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
1814 bdrv_refresh_limits(reopen_state->bs, NULL);
1818 * Abort the reopen, and delete and free the staged changes in
1819 * reopen_state
1821 void bdrv_reopen_abort(BDRVReopenState *reopen_state)
1823 BlockDriver *drv;
1825 assert(reopen_state != NULL);
1826 drv = reopen_state->bs->drv;
1827 assert(drv != NULL);
1829 if (drv->bdrv_reopen_abort) {
1830 drv->bdrv_reopen_abort(reopen_state);
1835 void bdrv_close(BlockDriverState *bs)
1837 BdrvAioNotifier *ban, *ban_next;
1839 if (bs->job) {
1840 block_job_cancel_sync(bs->job);
1842 bdrv_drain_all(); /* complete I/O */
1843 bdrv_flush(bs);
1844 bdrv_drain_all(); /* in case flush left pending I/O */
1845 notifier_list_notify(&bs->close_notifiers, bs);
1847 if (bs->drv) {
1848 if (bs->backing_hd) {
1849 BlockDriverState *backing_hd = bs->backing_hd;
1850 bdrv_set_backing_hd(bs, NULL);
1851 bdrv_unref(backing_hd);
1853 bs->drv->bdrv_close(bs);
1854 g_free(bs->opaque);
1855 bs->opaque = NULL;
1856 bs->drv = NULL;
1857 bs->copy_on_read = 0;
1858 bs->backing_file[0] = '\0';
1859 bs->backing_format[0] = '\0';
1860 bs->total_sectors = 0;
1861 bs->encrypted = 0;
1862 bs->valid_key = 0;
1863 bs->sg = 0;
1864 bs->growable = 0;
1865 bs->zero_beyond_eof = false;
1866 QDECREF(bs->options);
1867 bs->options = NULL;
1868 QDECREF(bs->full_open_options);
1869 bs->full_open_options = NULL;
1871 if (bs->file != NULL) {
1872 bdrv_unref(bs->file);
1873 bs->file = NULL;
1877 bdrv_dev_change_media_cb(bs, false);
1879 /*throttling disk I/O limits*/
1880 if (bs->io_limits_enabled) {
1881 bdrv_io_limits_disable(bs);
1884 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
1885 g_free(ban);
1887 QLIST_INIT(&bs->aio_notifiers);
1890 void bdrv_close_all(void)
1892 BlockDriverState *bs;
1894 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
1895 AioContext *aio_context = bdrv_get_aio_context(bs);
1897 aio_context_acquire(aio_context);
1898 bdrv_close(bs);
1899 aio_context_release(aio_context);
1903 /* Check if any requests are in-flight (including throttled requests) */
1904 static bool bdrv_requests_pending(BlockDriverState *bs)
1906 if (!QLIST_EMPTY(&bs->tracked_requests)) {
1907 return true;
1909 if (!qemu_co_queue_empty(&bs->throttled_reqs[0])) {
1910 return true;
1912 if (!qemu_co_queue_empty(&bs->throttled_reqs[1])) {
1913 return true;
1915 if (bs->file && bdrv_requests_pending(bs->file)) {
1916 return true;
1918 if (bs->backing_hd && bdrv_requests_pending(bs->backing_hd)) {
1919 return true;
1921 return false;
1925 * Wait for pending requests to complete across all BlockDriverStates
1927 * This function does not flush data to disk, use bdrv_flush_all() for that
1928 * after calling this function.
1930 * Note that completion of an asynchronous I/O operation can trigger any
1931 * number of other I/O operations on other devices---for example a coroutine
1932 * can be arbitrarily complex and a constant flow of I/O can come until the
1933 * coroutine is complete. Because of this, it is not possible to have a
1934 * function to drain a single device's I/O queue.
1936 void bdrv_drain_all(void)
1938 /* Always run first iteration so any pending completion BHs run */
1939 bool busy = true;
1940 BlockDriverState *bs;
1942 while (busy) {
1943 busy = false;
1945 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
1946 AioContext *aio_context = bdrv_get_aio_context(bs);
1947 bool bs_busy;
1949 aio_context_acquire(aio_context);
1950 bdrv_flush_io_queue(bs);
1951 bdrv_start_throttled_reqs(bs);
1952 bs_busy = bdrv_requests_pending(bs);
1953 bs_busy |= aio_poll(aio_context, bs_busy);
1954 aio_context_release(aio_context);
1956 busy |= bs_busy;
1961 /* make a BlockDriverState anonymous by removing from bdrv_state and
1962 * graph_bdrv_state list.
1963 Also, NULL terminate the device_name to prevent double remove */
1964 void bdrv_make_anon(BlockDriverState *bs)
1967 * Take care to remove bs from bdrv_states only when it's actually
1968 * in it. Note that bs->device_list.tqe_prev is initially null,
1969 * and gets set to non-null by QTAILQ_INSERT_TAIL(). Establish
1970 * the useful invariant "bs in bdrv_states iff bs->tqe_prev" by
1971 * resetting it to null on remove.
1973 if (bs->device_list.tqe_prev) {
1974 QTAILQ_REMOVE(&bdrv_states, bs, device_list);
1975 bs->device_list.tqe_prev = NULL;
1977 if (bs->node_name[0] != '\0') {
1978 QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
1980 bs->node_name[0] = '\0';
1983 static void bdrv_rebind(BlockDriverState *bs)
1985 if (bs->drv && bs->drv->bdrv_rebind) {
1986 bs->drv->bdrv_rebind(bs);
1990 static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
1991 BlockDriverState *bs_src)
1993 /* move some fields that need to stay attached to the device */
1995 /* dev info */
1996 bs_dest->dev_ops = bs_src->dev_ops;
1997 bs_dest->dev_opaque = bs_src->dev_opaque;
1998 bs_dest->dev = bs_src->dev;
1999 bs_dest->guest_block_size = bs_src->guest_block_size;
2000 bs_dest->copy_on_read = bs_src->copy_on_read;
2002 bs_dest->enable_write_cache = bs_src->enable_write_cache;
2004 /* i/o throttled req */
2005 memcpy(&bs_dest->throttle_state,
2006 &bs_src->throttle_state,
2007 sizeof(ThrottleState));
2008 bs_dest->throttled_reqs[0] = bs_src->throttled_reqs[0];
2009 bs_dest->throttled_reqs[1] = bs_src->throttled_reqs[1];
2010 bs_dest->io_limits_enabled = bs_src->io_limits_enabled;
2012 /* r/w error */
2013 bs_dest->on_read_error = bs_src->on_read_error;
2014 bs_dest->on_write_error = bs_src->on_write_error;
2016 /* i/o status */
2017 bs_dest->iostatus_enabled = bs_src->iostatus_enabled;
2018 bs_dest->iostatus = bs_src->iostatus;
2020 /* dirty bitmap */
2021 bs_dest->dirty_bitmaps = bs_src->dirty_bitmaps;
2023 /* reference count */
2024 bs_dest->refcnt = bs_src->refcnt;
2026 /* job */
2027 bs_dest->job = bs_src->job;
2029 /* keep the same entry in bdrv_states */
2030 bs_dest->device_list = bs_src->device_list;
2031 bs_dest->blk = bs_src->blk;
2033 memcpy(bs_dest->op_blockers, bs_src->op_blockers,
2034 sizeof(bs_dest->op_blockers));
2038 * Swap bs contents for two image chains while they are live,
2039 * while keeping required fields on the BlockDriverState that is
2040 * actually attached to a device.
2042 * This will modify the BlockDriverState fields, and swap contents
2043 * between bs_new and bs_old. Both bs_new and bs_old are modified.
2045 * bs_new must not be attached to a BlockBackend.
2047 * This function does not create any image files.
2049 void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old)
2051 BlockDriverState tmp;
2053 /* The code needs to swap the node_name but simply swapping node_list won't
2054 * work so first remove the nodes from the graph list, do the swap then
2055 * insert them back if needed.
2057 if (bs_new->node_name[0] != '\0') {
2058 QTAILQ_REMOVE(&graph_bdrv_states, bs_new, node_list);
2060 if (bs_old->node_name[0] != '\0') {
2061 QTAILQ_REMOVE(&graph_bdrv_states, bs_old, node_list);
2064 /* bs_new must be unattached and shouldn't have anything fancy enabled */
2065 assert(!bs_new->blk);
2066 assert(QLIST_EMPTY(&bs_new->dirty_bitmaps));
2067 assert(bs_new->job == NULL);
2068 assert(bs_new->dev == NULL);
2069 assert(bs_new->io_limits_enabled == false);
2070 assert(!throttle_have_timer(&bs_new->throttle_state));
2072 tmp = *bs_new;
2073 *bs_new = *bs_old;
2074 *bs_old = tmp;
2076 /* there are some fields that should not be swapped, move them back */
2077 bdrv_move_feature_fields(&tmp, bs_old);
2078 bdrv_move_feature_fields(bs_old, bs_new);
2079 bdrv_move_feature_fields(bs_new, &tmp);
2081 /* bs_new must remain unattached */
2082 assert(!bs_new->blk);
2084 /* Check a few fields that should remain attached to the device */
2085 assert(bs_new->dev == NULL);
2086 assert(bs_new->job == NULL);
2087 assert(bs_new->io_limits_enabled == false);
2088 assert(!throttle_have_timer(&bs_new->throttle_state));
2090 /* insert the nodes back into the graph node list if needed */
2091 if (bs_new->node_name[0] != '\0') {
2092 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_new, node_list);
2094 if (bs_old->node_name[0] != '\0') {
2095 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_old, node_list);
2098 bdrv_rebind(bs_new);
2099 bdrv_rebind(bs_old);
2103 * Add new bs contents at the top of an image chain while the chain is
2104 * live, while keeping required fields on the top layer.
2106 * This will modify the BlockDriverState fields, and swap contents
2107 * between bs_new and bs_top. Both bs_new and bs_top are modified.
2109 * bs_new must not be attached to a BlockBackend.
2111 * This function does not create any image files.
2113 void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
2115 bdrv_swap(bs_new, bs_top);
2117 /* The contents of 'tmp' will become bs_top, as we are
2118 * swapping bs_new and bs_top contents. */
2119 bdrv_set_backing_hd(bs_top, bs_new);
2122 static void bdrv_delete(BlockDriverState *bs)
2124 assert(!bs->dev);
2125 assert(!bs->job);
2126 assert(bdrv_op_blocker_is_empty(bs));
2127 assert(!bs->refcnt);
2128 assert(QLIST_EMPTY(&bs->dirty_bitmaps));
2130 bdrv_close(bs);
2132 /* remove from list, if necessary */
2133 bdrv_make_anon(bs);
2135 g_free(bs);
2138 int bdrv_attach_dev(BlockDriverState *bs, void *dev)
2139 /* TODO change to DeviceState *dev when all users are qdevified */
2141 if (bs->dev) {
2142 return -EBUSY;
2144 bs->dev = dev;
2145 bdrv_iostatus_reset(bs);
2147 /* We're expecting I/O from the device so bump up coroutine pool size */
2148 qemu_coroutine_adjust_pool_size(COROUTINE_POOL_RESERVATION);
2149 return 0;
2152 /* TODO qdevified devices don't use this, remove when devices are qdevified */
2153 void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
2155 if (bdrv_attach_dev(bs, dev) < 0) {
2156 abort();
2160 void bdrv_detach_dev(BlockDriverState *bs, void *dev)
2161 /* TODO change to DeviceState *dev when all users are qdevified */
2163 assert(bs->dev == dev);
2164 bs->dev = NULL;
2165 bs->dev_ops = NULL;
2166 bs->dev_opaque = NULL;
2167 bs->guest_block_size = 512;
2168 qemu_coroutine_adjust_pool_size(-COROUTINE_POOL_RESERVATION);
2171 /* TODO change to return DeviceState * when all users are qdevified */
2172 void *bdrv_get_attached_dev(BlockDriverState *bs)
2174 return bs->dev;
2177 void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
2178 void *opaque)
2180 bs->dev_ops = ops;
2181 bs->dev_opaque = opaque;
2184 static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load)
2186 if (bs->dev_ops && bs->dev_ops->change_media_cb) {
2187 bool tray_was_closed = !bdrv_dev_is_tray_open(bs);
2188 bs->dev_ops->change_media_cb(bs->dev_opaque, load);
2189 if (tray_was_closed) {
2190 /* tray open */
2191 qapi_event_send_device_tray_moved(bdrv_get_device_name(bs),
2192 true, &error_abort);
2194 if (load) {
2195 /* tray close */
2196 qapi_event_send_device_tray_moved(bdrv_get_device_name(bs),
2197 false, &error_abort);
2202 bool bdrv_dev_has_removable_media(BlockDriverState *bs)
2204 return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb);
2207 void bdrv_dev_eject_request(BlockDriverState *bs, bool force)
2209 if (bs->dev_ops && bs->dev_ops->eject_request_cb) {
2210 bs->dev_ops->eject_request_cb(bs->dev_opaque, force);
2214 bool bdrv_dev_is_tray_open(BlockDriverState *bs)
2216 if (bs->dev_ops && bs->dev_ops->is_tray_open) {
2217 return bs->dev_ops->is_tray_open(bs->dev_opaque);
2219 return false;
2222 static void bdrv_dev_resize_cb(BlockDriverState *bs)
2224 if (bs->dev_ops && bs->dev_ops->resize_cb) {
2225 bs->dev_ops->resize_cb(bs->dev_opaque);
2229 bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
2231 if (bs->dev_ops && bs->dev_ops->is_medium_locked) {
2232 return bs->dev_ops->is_medium_locked(bs->dev_opaque);
2234 return false;
2238 * Run consistency checks on an image
2240 * Returns 0 if the check could be completed (it doesn't mean that the image is
2241 * free of errors) or -errno when an internal error occurred. The results of the
2242 * check are stored in res.
2244 int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
2246 if (bs->drv == NULL) {
2247 return -ENOMEDIUM;
2249 if (bs->drv->bdrv_check == NULL) {
2250 return -ENOTSUP;
2253 memset(res, 0, sizeof(*res));
2254 return bs->drv->bdrv_check(bs, res, fix);
2257 #define COMMIT_BUF_SECTORS 2048
2259 /* commit COW file into the raw image */
2260 int bdrv_commit(BlockDriverState *bs)
2262 BlockDriver *drv = bs->drv;
2263 int64_t sector, total_sectors, length, backing_length;
2264 int n, ro, open_flags;
2265 int ret = 0;
2266 uint8_t *buf = NULL;
2267 char filename[PATH_MAX];
2269 if (!drv)
2270 return -ENOMEDIUM;
2272 if (!bs->backing_hd) {
2273 return -ENOTSUP;
2276 if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT, NULL) ||
2277 bdrv_op_is_blocked(bs->backing_hd, BLOCK_OP_TYPE_COMMIT, NULL)) {
2278 return -EBUSY;
2281 ro = bs->backing_hd->read_only;
2282 /* Use pstrcpy (not strncpy): filename must be NUL-terminated. */
2283 pstrcpy(filename, sizeof(filename), bs->backing_hd->filename);
2284 open_flags = bs->backing_hd->open_flags;
2286 if (ro) {
2287 if (bdrv_reopen(bs->backing_hd, open_flags | BDRV_O_RDWR, NULL)) {
2288 return -EACCES;
2292 length = bdrv_getlength(bs);
2293 if (length < 0) {
2294 ret = length;
2295 goto ro_cleanup;
2298 backing_length = bdrv_getlength(bs->backing_hd);
2299 if (backing_length < 0) {
2300 ret = backing_length;
2301 goto ro_cleanup;
2304 /* If our top snapshot is larger than the backing file image,
2305 * grow the backing file image if possible. If not possible,
2306 * we must return an error */
2307 if (length > backing_length) {
2308 ret = bdrv_truncate(bs->backing_hd, length);
2309 if (ret < 0) {
2310 goto ro_cleanup;
2314 total_sectors = length >> BDRV_SECTOR_BITS;
2316 /* qemu_try_blockalign() for bs will choose an alignment that works for
2317 * bs->backing_hd as well, so no need to compare the alignment manually. */
2318 buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
2319 if (buf == NULL) {
2320 ret = -ENOMEM;
2321 goto ro_cleanup;
2324 for (sector = 0; sector < total_sectors; sector += n) {
2325 ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
2326 if (ret < 0) {
2327 goto ro_cleanup;
2329 if (ret) {
2330 ret = bdrv_read(bs, sector, buf, n);
2331 if (ret < 0) {
2332 goto ro_cleanup;
2335 ret = bdrv_write(bs->backing_hd, sector, buf, n);
2336 if (ret < 0) {
2337 goto ro_cleanup;
2342 if (drv->bdrv_make_empty) {
2343 ret = drv->bdrv_make_empty(bs);
2344 if (ret < 0) {
2345 goto ro_cleanup;
2347 bdrv_flush(bs);
2351 * Make sure all data we wrote to the backing device is actually
2352 * stable on disk.
2354 if (bs->backing_hd) {
2355 bdrv_flush(bs->backing_hd);
2358 ret = 0;
2359 ro_cleanup:
2360 qemu_vfree(buf);
2362 if (ro) {
2363 /* ignoring error return here */
2364 bdrv_reopen(bs->backing_hd, open_flags & ~BDRV_O_RDWR, NULL);
2367 return ret;
2370 int bdrv_commit_all(void)
2372 BlockDriverState *bs;
2374 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
2375 AioContext *aio_context = bdrv_get_aio_context(bs);
2377 aio_context_acquire(aio_context);
2378 if (bs->drv && bs->backing_hd) {
2379 int ret = bdrv_commit(bs);
2380 if (ret < 0) {
2381 aio_context_release(aio_context);
2382 return ret;
2385 aio_context_release(aio_context);
2387 return 0;
2391 * Remove an active request from the tracked requests list
2393 * This function should be called when a tracked request is completing.
2395 static void tracked_request_end(BdrvTrackedRequest *req)
2397 if (req->serialising) {
2398 req->bs->serialising_in_flight--;
2401 QLIST_REMOVE(req, list);
2402 qemu_co_queue_restart_all(&req->wait_queue);
2406 * Add an active request to the tracked requests list
2408 static void tracked_request_begin(BdrvTrackedRequest *req,
2409 BlockDriverState *bs,
2410 int64_t offset,
2411 unsigned int bytes, bool is_write)
2413 *req = (BdrvTrackedRequest){
2414 .bs = bs,
2415 .offset = offset,
2416 .bytes = bytes,
2417 .is_write = is_write,
2418 .co = qemu_coroutine_self(),
2419 .serialising = false,
2420 .overlap_offset = offset,
2421 .overlap_bytes = bytes,
2424 qemu_co_queue_init(&req->wait_queue);
2426 QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
2429 static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
2431 int64_t overlap_offset = req->offset & ~(align - 1);
2432 unsigned int overlap_bytes = ROUND_UP(req->offset + req->bytes, align)
2433 - overlap_offset;
2435 if (!req->serialising) {
2436 req->bs->serialising_in_flight++;
2437 req->serialising = true;
2440 req->overlap_offset = MIN(req->overlap_offset, overlap_offset);
2441 req->overlap_bytes = MAX(req->overlap_bytes, overlap_bytes);
2445 * Round a region to cluster boundaries
2447 void bdrv_round_to_clusters(BlockDriverState *bs,
2448 int64_t sector_num, int nb_sectors,
2449 int64_t *cluster_sector_num,
2450 int *cluster_nb_sectors)
2452 BlockDriverInfo bdi;
2454 if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
2455 *cluster_sector_num = sector_num;
2456 *cluster_nb_sectors = nb_sectors;
2457 } else {
2458 int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
2459 *cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c);
2460 *cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num +
2461 nb_sectors, c);
2465 static int bdrv_get_cluster_size(BlockDriverState *bs)
2467 BlockDriverInfo bdi;
2468 int ret;
2470 ret = bdrv_get_info(bs, &bdi);
2471 if (ret < 0 || bdi.cluster_size == 0) {
2472 return bs->request_alignment;
2473 } else {
2474 return bdi.cluster_size;
2478 static bool tracked_request_overlaps(BdrvTrackedRequest *req,
2479 int64_t offset, unsigned int bytes)
2481 /* aaaa bbbb */
2482 if (offset >= req->overlap_offset + req->overlap_bytes) {
2483 return false;
2485 /* bbbb aaaa */
2486 if (req->overlap_offset >= offset + bytes) {
2487 return false;
2489 return true;
2492 static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
2494 BlockDriverState *bs = self->bs;
2495 BdrvTrackedRequest *req;
2496 bool retry;
2497 bool waited = false;
2499 if (!bs->serialising_in_flight) {
2500 return false;
2503 do {
2504 retry = false;
2505 QLIST_FOREACH(req, &bs->tracked_requests, list) {
2506 if (req == self || (!req->serialising && !self->serialising)) {
2507 continue;
2509 if (tracked_request_overlaps(req, self->overlap_offset,
2510 self->overlap_bytes))
2512 /* Hitting this means there was a reentrant request, for
2513 * example, a block driver issuing nested requests. This must
2514 * never happen since it means deadlock.
2516 assert(qemu_coroutine_self() != req->co);
2518 /* If the request is already (indirectly) waiting for us, or
2519 * will wait for us as soon as it wakes up, then just go on
2520 * (instead of producing a deadlock in the former case). */
2521 if (!req->waiting_for) {
2522 self->waiting_for = req;
2523 qemu_co_queue_wait(&req->wait_queue);
2524 self->waiting_for = NULL;
2525 retry = true;
2526 waited = true;
2527 break;
2531 } while (retry);
2533 return waited;
2537 * Return values:
2538 * 0 - success
2539 * -EINVAL - backing format specified, but no file
2540 * -ENOSPC - can't update the backing file because no space is left in the
2541 * image file header
2542 * -ENOTSUP - format driver doesn't support changing the backing file
2544 int bdrv_change_backing_file(BlockDriverState *bs,
2545 const char *backing_file, const char *backing_fmt)
2547 BlockDriver *drv = bs->drv;
2548 int ret;
2550 /* Backing file format doesn't make sense without a backing file */
2551 if (backing_fmt && !backing_file) {
2552 return -EINVAL;
2555 if (drv->bdrv_change_backing_file != NULL) {
2556 ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
2557 } else {
2558 ret = -ENOTSUP;
2561 if (ret == 0) {
2562 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
2563 pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
2565 return ret;
2569 * Finds the image layer in the chain that has 'bs' as its backing file.
2571 * active is the current topmost image.
2573 * Returns NULL if bs is not found in active's image chain,
2574 * or if active == bs.
2576 * Returns the bottommost base image if bs == NULL.
2578 BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
2579 BlockDriverState *bs)
2581 while (active && bs != active->backing_hd) {
2582 active = active->backing_hd;
2585 return active;
2588 /* Given a BDS, searches for the base layer. */
2589 BlockDriverState *bdrv_find_base(BlockDriverState *bs)
2591 return bdrv_find_overlay(bs, NULL);
2594 typedef struct BlkIntermediateStates {
2595 BlockDriverState *bs;
2596 QSIMPLEQ_ENTRY(BlkIntermediateStates) entry;
2597 } BlkIntermediateStates;
2601 * Drops images above 'base' up to and including 'top', and sets the image
2602 * above 'top' to have base as its backing file.
2604 * Requires that the overlay to 'top' is opened r/w, so that the backing file
2605 * information in 'bs' can be properly updated.
2607 * E.g., this will convert the following chain:
2608 * bottom <- base <- intermediate <- top <- active
2610 * to
2612 * bottom <- base <- active
2614 * It is allowed for bottom==base, in which case it converts:
2616 * base <- intermediate <- top <- active
2618 * to
2620 * base <- active
2622 * If backing_file_str is non-NULL, it will be used when modifying top's
2623 * overlay image metadata.
2625 * Error conditions:
2626 * if active == top, that is considered an error
2629 int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
2630 BlockDriverState *base, const char *backing_file_str)
2632 BlockDriverState *intermediate;
2633 BlockDriverState *base_bs = NULL;
2634 BlockDriverState *new_top_bs = NULL;
2635 BlkIntermediateStates *intermediate_state, *next;
2636 int ret = -EIO;
2638 QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete;
2639 QSIMPLEQ_INIT(&states_to_delete);
2641 if (!top->drv || !base->drv) {
2642 goto exit;
2645 new_top_bs = bdrv_find_overlay(active, top);
2647 if (new_top_bs == NULL) {
2648 /* we could not find the image above 'top', this is an error */
2649 goto exit;
2652 /* special case of new_top_bs->backing_hd already pointing to base - nothing
2653 * to do, no intermediate images */
2654 if (new_top_bs->backing_hd == base) {
2655 ret = 0;
2656 goto exit;
2659 intermediate = top;
2661 /* now we will go down through the list, and add each BDS we find
2662 * into our deletion queue, until we hit the 'base'
2664 while (intermediate) {
2665 intermediate_state = g_new0(BlkIntermediateStates, 1);
2666 intermediate_state->bs = intermediate;
2667 QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry);
2669 if (intermediate->backing_hd == base) {
2670 base_bs = intermediate->backing_hd;
2671 break;
2673 intermediate = intermediate->backing_hd;
2675 if (base_bs == NULL) {
2676 /* something went wrong, we did not end at the base. safely
2677 * unravel everything, and exit with error */
2678 goto exit;
2681 /* success - we can delete the intermediate states, and link top->base */
2682 backing_file_str = backing_file_str ? backing_file_str : base_bs->filename;
2683 ret = bdrv_change_backing_file(new_top_bs, backing_file_str,
2684 base_bs->drv ? base_bs->drv->format_name : "");
2685 if (ret) {
2686 goto exit;
2688 bdrv_set_backing_hd(new_top_bs, base_bs);
2690 QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
2691 /* so that bdrv_close() does not recursively close the chain */
2692 bdrv_set_backing_hd(intermediate_state->bs, NULL);
2693 bdrv_unref(intermediate_state->bs);
2695 ret = 0;
2697 exit:
2698 QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
2699 g_free(intermediate_state);
2701 return ret;
2705 static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
2706 size_t size)
2708 int64_t len;
2710 if (size > INT_MAX) {
2711 return -EIO;
2714 if (!bdrv_is_inserted(bs))
2715 return -ENOMEDIUM;
2717 if (bs->growable)
2718 return 0;
2720 len = bdrv_getlength(bs);
2722 if (offset < 0)
2723 return -EIO;
2725 if ((offset > len) || (len - offset < size))
2726 return -EIO;
2728 return 0;
2731 static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
2732 int nb_sectors)
2734 if (nb_sectors < 0 || nb_sectors > INT_MAX / BDRV_SECTOR_SIZE) {
2735 return -EIO;
2738 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
2739 nb_sectors * BDRV_SECTOR_SIZE);
2742 typedef struct RwCo {
2743 BlockDriverState *bs;
2744 int64_t offset;
2745 QEMUIOVector *qiov;
2746 bool is_write;
2747 int ret;
2748 BdrvRequestFlags flags;
2749 } RwCo;
2751 static void coroutine_fn bdrv_rw_co_entry(void *opaque)
2753 RwCo *rwco = opaque;
2755 if (!rwco->is_write) {
2756 rwco->ret = bdrv_co_do_preadv(rwco->bs, rwco->offset,
2757 rwco->qiov->size, rwco->qiov,
2758 rwco->flags);
2759 } else {
2760 rwco->ret = bdrv_co_do_pwritev(rwco->bs, rwco->offset,
2761 rwco->qiov->size, rwco->qiov,
2762 rwco->flags);
2767 * Process a vectored synchronous request using coroutines
2769 static int bdrv_prwv_co(BlockDriverState *bs, int64_t offset,
2770 QEMUIOVector *qiov, bool is_write,
2771 BdrvRequestFlags flags)
2773 Coroutine *co;
2774 RwCo rwco = {
2775 .bs = bs,
2776 .offset = offset,
2777 .qiov = qiov,
2778 .is_write = is_write,
2779 .ret = NOT_DONE,
2780 .flags = flags,
2784 * In sync call context, when the vcpu is blocked, this throttling timer
2785 * will not fire; so the I/O throttling function has to be disabled here
2786 * if it has been enabled.
2788 if (bs->io_limits_enabled) {
2789 fprintf(stderr, "Disabling I/O throttling on '%s' due "
2790 "to synchronous I/O.\n", bdrv_get_device_name(bs));
2791 bdrv_io_limits_disable(bs);
2794 if (qemu_in_coroutine()) {
2795 /* Fast-path if already in coroutine context */
2796 bdrv_rw_co_entry(&rwco);
2797 } else {
2798 AioContext *aio_context = bdrv_get_aio_context(bs);
2800 co = qemu_coroutine_create(bdrv_rw_co_entry);
2801 qemu_coroutine_enter(co, &rwco);
2802 while (rwco.ret == NOT_DONE) {
2803 aio_poll(aio_context, true);
2806 return rwco.ret;
2810 * Process a synchronous request using coroutines
2812 static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
2813 int nb_sectors, bool is_write, BdrvRequestFlags flags)
2815 QEMUIOVector qiov;
2816 struct iovec iov = {
2817 .iov_base = (void *)buf,
2818 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
2821 if (nb_sectors < 0 || nb_sectors > INT_MAX / BDRV_SECTOR_SIZE) {
2822 return -EINVAL;
2825 qemu_iovec_init_external(&qiov, &iov, 1);
2826 return bdrv_prwv_co(bs, sector_num << BDRV_SECTOR_BITS,
2827 &qiov, is_write, flags);
2830 /* return < 0 if error. See bdrv_write() for the return codes */
2831 int bdrv_read(BlockDriverState *bs, int64_t sector_num,
2832 uint8_t *buf, int nb_sectors)
2834 return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false, 0);
2837 /* Just like bdrv_read(), but with I/O throttling temporarily disabled */
2838 int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num,
2839 uint8_t *buf, int nb_sectors)
2841 bool enabled;
2842 int ret;
2844 enabled = bs->io_limits_enabled;
2845 bs->io_limits_enabled = false;
2846 ret = bdrv_read(bs, sector_num, buf, nb_sectors);
2847 bs->io_limits_enabled = enabled;
2848 return ret;
2851 /* Return < 0 if error. Important errors are:
2852 -EIO generic I/O error (may happen for all errors)
2853 -ENOMEDIUM No media inserted.
2854 -EINVAL Invalid sector number or nb_sectors
2855 -EACCES Trying to write a read-only device
2857 int bdrv_write(BlockDriverState *bs, int64_t sector_num,
2858 const uint8_t *buf, int nb_sectors)
2860 return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true, 0);
2863 int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num,
2864 int nb_sectors, BdrvRequestFlags flags)
2866 return bdrv_rw_co(bs, sector_num, NULL, nb_sectors, true,
2867 BDRV_REQ_ZERO_WRITE | flags);
2871 * Completely zero out a block device with the help of bdrv_write_zeroes.
2872 * The operation is sped up by checking the block status and only writing
2873 * zeroes to the device if they currently do not return zeroes. Optional
2874 * flags are passed through to bdrv_write_zeroes (e.g. BDRV_REQ_MAY_UNMAP).
2876 * Returns < 0 on error, 0 on success. For error codes see bdrv_write().
2878 int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags)
2880 int64_t target_sectors, ret, nb_sectors, sector_num = 0;
2881 int n;
2883 target_sectors = bdrv_nb_sectors(bs);
2884 if (target_sectors < 0) {
2885 return target_sectors;
2888 for (;;) {
2889 nb_sectors = target_sectors - sector_num;
2890 if (nb_sectors <= 0) {
2891 return 0;
2893 if (nb_sectors > INT_MAX) {
2894 nb_sectors = INT_MAX;
2896 ret = bdrv_get_block_status(bs, sector_num, nb_sectors, &n);
2897 if (ret < 0) {
2898 error_report("error getting block status at sector %" PRId64 ": %s",
2899 sector_num, strerror(-ret));
2900 return ret;
2902 if (ret & BDRV_BLOCK_ZERO) {
2903 sector_num += n;
2904 continue;
2906 ret = bdrv_write_zeroes(bs, sector_num, n, flags);
2907 if (ret < 0) {
2908 error_report("error writing zeroes at sector %" PRId64 ": %s",
2909 sector_num, strerror(-ret));
2910 return ret;
2912 sector_num += n;
2916 int bdrv_pread(BlockDriverState *bs, int64_t offset, void *buf, int bytes)
2918 QEMUIOVector qiov;
2919 struct iovec iov = {
2920 .iov_base = (void *)buf,
2921 .iov_len = bytes,
2923 int ret;
2925 if (bytes < 0) {
2926 return -EINVAL;
2929 qemu_iovec_init_external(&qiov, &iov, 1);
2930 ret = bdrv_prwv_co(bs, offset, &qiov, false, 0);
2931 if (ret < 0) {
2932 return ret;
2935 return bytes;
2938 int bdrv_pwritev(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov)
2940 int ret;
2942 ret = bdrv_prwv_co(bs, offset, qiov, true, 0);
2943 if (ret < 0) {
2944 return ret;
2947 return qiov->size;
2950 int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
2951 const void *buf, int bytes)
2953 QEMUIOVector qiov;
2954 struct iovec iov = {
2955 .iov_base = (void *) buf,
2956 .iov_len = bytes,
2959 if (bytes < 0) {
2960 return -EINVAL;
2963 qemu_iovec_init_external(&qiov, &iov, 1);
2964 return bdrv_pwritev(bs, offset, &qiov);
2968 * Writes to the file and ensures that no writes are reordered across this
2969 * request (acts as a barrier)
2971 * Returns 0 on success, -errno in error cases.
2973 int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
2974 const void *buf, int count)
2976 int ret;
2978 ret = bdrv_pwrite(bs, offset, buf, count);
2979 if (ret < 0) {
2980 return ret;
2983 /* No flush needed for cache modes that already do it */
2984 if (bs->enable_write_cache) {
2985 bdrv_flush(bs);
2988 return 0;
2991 static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
2992 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
2994 /* Perform I/O through a temporary buffer so that users who scribble over
2995 * their read buffer while the operation is in progress do not end up
2996 * modifying the image file. This is critical for zero-copy guest I/O
2997 * where anything might happen inside guest memory.
2999 void *bounce_buffer;
3001 BlockDriver *drv = bs->drv;
3002 struct iovec iov;
3003 QEMUIOVector bounce_qiov;
3004 int64_t cluster_sector_num;
3005 int cluster_nb_sectors;
3006 size_t skip_bytes;
3007 int ret;
3009 /* Cover entire cluster so no additional backing file I/O is required when
3010 * allocating cluster in the image file.
3012 bdrv_round_to_clusters(bs, sector_num, nb_sectors,
3013 &cluster_sector_num, &cluster_nb_sectors);
3015 trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors,
3016 cluster_sector_num, cluster_nb_sectors);
3018 iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
3019 iov.iov_base = bounce_buffer = qemu_try_blockalign(bs, iov.iov_len);
3020 if (bounce_buffer == NULL) {
3021 ret = -ENOMEM;
3022 goto err;
3025 qemu_iovec_init_external(&bounce_qiov, &iov, 1);
3027 ret = drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
3028 &bounce_qiov);
3029 if (ret < 0) {
3030 goto err;
3033 if (drv->bdrv_co_write_zeroes &&
3034 buffer_is_zero(bounce_buffer, iov.iov_len)) {
3035 ret = bdrv_co_do_write_zeroes(bs, cluster_sector_num,
3036 cluster_nb_sectors, 0);
3037 } else {
3038 /* This does not change the data on the disk, it is not necessary
3039 * to flush even in cache=writethrough mode.
3041 ret = drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors,
3042 &bounce_qiov);
3045 if (ret < 0) {
3046 /* It might be okay to ignore write errors for guest requests. If this
3047 * is a deliberate copy-on-read then we don't want to ignore the error.
3048 * Simply report it in all cases.
3050 goto err;
3053 skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE;
3054 qemu_iovec_from_buf(qiov, 0, bounce_buffer + skip_bytes,
3055 nb_sectors * BDRV_SECTOR_SIZE);
3057 err:
3058 qemu_vfree(bounce_buffer);
3059 return ret;
3063 * Forwards an already correctly aligned request to the BlockDriver. This
3064 * handles copy on read and zeroing after EOF; any other features must be
3065 * implemented by the caller.
3067 static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
3068 BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
3069 int64_t align, QEMUIOVector *qiov, int flags)
3071 BlockDriver *drv = bs->drv;
3072 int ret;
3074 int64_t sector_num = offset >> BDRV_SECTOR_BITS;
3075 unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
3077 assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
3078 assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
3079 assert(!qiov || bytes == qiov->size);
3081 /* Handle Copy on Read and associated serialisation */
3082 if (flags & BDRV_REQ_COPY_ON_READ) {
3083 /* If we touch the same cluster it counts as an overlap. This
3084 * guarantees that allocating writes will be serialized and not race
3085 * with each other for the same cluster. For example, in copy-on-read
3086 * it ensures that the CoR read and write operations are atomic and
3087 * guest writes cannot interleave between them. */
3088 mark_request_serialising(req, bdrv_get_cluster_size(bs));
3091 wait_serialising_requests(req);
3093 if (flags & BDRV_REQ_COPY_ON_READ) {
3094 int pnum;
3096 ret = bdrv_is_allocated(bs, sector_num, nb_sectors, &pnum);
3097 if (ret < 0) {
3098 goto out;
3101 if (!ret || pnum != nb_sectors) {
3102 ret = bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, qiov);
3103 goto out;
3107 /* Forward the request to the BlockDriver */
3108 if (!(bs->zero_beyond_eof && bs->growable)) {
3109 ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
3110 } else {
3111 /* Read zeros after EOF of growable BDSes */
3112 int64_t total_sectors, max_nb_sectors;
3114 total_sectors = bdrv_nb_sectors(bs);
3115 if (total_sectors < 0) {
3116 ret = total_sectors;
3117 goto out;
3120 max_nb_sectors = ROUND_UP(MAX(0, total_sectors - sector_num),
3121 align >> BDRV_SECTOR_BITS);
3122 if (max_nb_sectors > 0) {
3123 QEMUIOVector local_qiov;
3124 size_t local_sectors;
3126 max_nb_sectors = MIN(max_nb_sectors, SIZE_MAX / BDRV_SECTOR_BITS);
3127 local_sectors = MIN(max_nb_sectors, nb_sectors);
3129 qemu_iovec_init(&local_qiov, qiov->niov);
3130 qemu_iovec_concat(&local_qiov, qiov, 0,
3131 local_sectors * BDRV_SECTOR_SIZE);
3133 ret = drv->bdrv_co_readv(bs, sector_num, local_sectors,
3134 &local_qiov);
3136 qemu_iovec_destroy(&local_qiov);
3137 } else {
3138 ret = 0;
3141 /* Reading beyond end of file is supposed to produce zeroes */
3142 if (ret == 0 && total_sectors < sector_num + nb_sectors) {
3143 uint64_t offset = MAX(0, total_sectors - sector_num);
3144 uint64_t bytes = (sector_num + nb_sectors - offset) *
3145 BDRV_SECTOR_SIZE;
3146 qemu_iovec_memset(qiov, offset * BDRV_SECTOR_SIZE, 0, bytes);
3150 out:
3151 return ret;
3155 * Handle a read request in coroutine context
3157 static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
3158 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
3159 BdrvRequestFlags flags)
3161 BlockDriver *drv = bs->drv;
3162 BdrvTrackedRequest req;
3164 /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */
3165 uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
3166 uint8_t *head_buf = NULL;
3167 uint8_t *tail_buf = NULL;
3168 QEMUIOVector local_qiov;
3169 bool use_local_qiov = false;
3170 int ret;
3172 if (!drv) {
3173 return -ENOMEDIUM;
3175 if (bdrv_check_byte_request(bs, offset, bytes)) {
3176 return -EIO;
3179 if (bs->copy_on_read) {
3180 flags |= BDRV_REQ_COPY_ON_READ;
3183 /* throttling disk I/O */
3184 if (bs->io_limits_enabled) {
3185 bdrv_io_limits_intercept(bs, bytes, false);
3188 /* Align read if necessary by padding qiov */
3189 if (offset & (align - 1)) {
3190 head_buf = qemu_blockalign(bs, align);
3191 qemu_iovec_init(&local_qiov, qiov->niov + 2);
3192 qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
3193 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3194 use_local_qiov = true;
3196 bytes += offset & (align - 1);
3197 offset = offset & ~(align - 1);
3200 if ((offset + bytes) & (align - 1)) {
3201 if (!use_local_qiov) {
3202 qemu_iovec_init(&local_qiov, qiov->niov + 1);
3203 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3204 use_local_qiov = true;
3206 tail_buf = qemu_blockalign(bs, align);
3207 qemu_iovec_add(&local_qiov, tail_buf,
3208 align - ((offset + bytes) & (align - 1)));
3210 bytes = ROUND_UP(bytes, align);
3213 tracked_request_begin(&req, bs, offset, bytes, false);
3214 ret = bdrv_aligned_preadv(bs, &req, offset, bytes, align,
3215 use_local_qiov ? &local_qiov : qiov,
3216 flags);
3217 tracked_request_end(&req);
3219 if (use_local_qiov) {
3220 qemu_iovec_destroy(&local_qiov);
3221 qemu_vfree(head_buf);
3222 qemu_vfree(tail_buf);
3225 return ret;
3228 static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
3229 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
3230 BdrvRequestFlags flags)
3232 if (nb_sectors < 0 || nb_sectors > (UINT_MAX >> BDRV_SECTOR_BITS)) {
3233 return -EINVAL;
3236 return bdrv_co_do_preadv(bs, sector_num << BDRV_SECTOR_BITS,
3237 nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
3240 int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
3241 int nb_sectors, QEMUIOVector *qiov)
3243 trace_bdrv_co_readv(bs, sector_num, nb_sectors);
3245 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0);
3248 int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
3249 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
3251 trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors);
3253 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov,
3254 BDRV_REQ_COPY_ON_READ);
3257 /* if no limit is specified in the BlockLimits use a default
3258 * of 32768 512-byte sectors (16 MiB) per request.
3260 #define MAX_WRITE_ZEROES_DEFAULT 32768
3262 static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
3263 int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
3265 BlockDriver *drv = bs->drv;
3266 QEMUIOVector qiov;
3267 struct iovec iov = {0};
3268 int ret = 0;
3270 int max_write_zeroes = bs->bl.max_write_zeroes ?
3271 bs->bl.max_write_zeroes : MAX_WRITE_ZEROES_DEFAULT;
3273 while (nb_sectors > 0 && !ret) {
3274 int num = nb_sectors;
3276 /* Align request. Block drivers can expect the "bulk" of the request
3277 * to be aligned.
3279 if (bs->bl.write_zeroes_alignment
3280 && num > bs->bl.write_zeroes_alignment) {
3281 if (sector_num % bs->bl.write_zeroes_alignment != 0) {
3282 /* Make a small request up to the first aligned sector. */
3283 num = bs->bl.write_zeroes_alignment;
3284 num -= sector_num % bs->bl.write_zeroes_alignment;
3285 } else if ((sector_num + num) % bs->bl.write_zeroes_alignment != 0) {
3286 /* Shorten the request to the last aligned sector. num cannot
3287 * underflow because num > bs->bl.write_zeroes_alignment.
3289 num -= (sector_num + num) % bs->bl.write_zeroes_alignment;
3293 /* limit request size */
3294 if (num > max_write_zeroes) {
3295 num = max_write_zeroes;
3298 ret = -ENOTSUP;
3299 /* First try the efficient write zeroes operation */
3300 if (drv->bdrv_co_write_zeroes) {
3301 ret = drv->bdrv_co_write_zeroes(bs, sector_num, num, flags);
3304 if (ret == -ENOTSUP) {
3305 /* Fall back to bounce buffer if write zeroes is unsupported */
3306 iov.iov_len = num * BDRV_SECTOR_SIZE;
3307 if (iov.iov_base == NULL) {
3308 iov.iov_base = qemu_try_blockalign(bs, num * BDRV_SECTOR_SIZE);
3309 if (iov.iov_base == NULL) {
3310 ret = -ENOMEM;
3311 goto fail;
3313 memset(iov.iov_base, 0, num * BDRV_SECTOR_SIZE);
3315 qemu_iovec_init_external(&qiov, &iov, 1);
3317 ret = drv->bdrv_co_writev(bs, sector_num, num, &qiov);
3319 /* Keep bounce buffer around if it is big enough for all
3320 * all future requests.
3322 if (num < max_write_zeroes) {
3323 qemu_vfree(iov.iov_base);
3324 iov.iov_base = NULL;
3328 sector_num += num;
3329 nb_sectors -= num;
3332 fail:
3333 qemu_vfree(iov.iov_base);
3334 return ret;
3338 * Forwards an already correctly aligned write request to the BlockDriver.
3340 static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
3341 BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
3342 QEMUIOVector *qiov, int flags)
3344 BlockDriver *drv = bs->drv;
3345 bool waited;
3346 int ret;
3348 int64_t sector_num = offset >> BDRV_SECTOR_BITS;
3349 unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
3351 assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
3352 assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
3353 assert(!qiov || bytes == qiov->size);
3355 waited = wait_serialising_requests(req);
3356 assert(!waited || !req->serialising);
3357 assert(req->overlap_offset <= offset);
3358 assert(offset + bytes <= req->overlap_offset + req->overlap_bytes);
3360 ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req);
3362 if (!ret && bs->detect_zeroes != BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF &&
3363 !(flags & BDRV_REQ_ZERO_WRITE) && drv->bdrv_co_write_zeroes &&
3364 qemu_iovec_is_zero(qiov)) {
3365 flags |= BDRV_REQ_ZERO_WRITE;
3366 if (bs->detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP) {
3367 flags |= BDRV_REQ_MAY_UNMAP;
3371 if (ret < 0) {
3372 /* Do nothing, write notifier decided to fail this request */
3373 } else if (flags & BDRV_REQ_ZERO_WRITE) {
3374 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_ZERO);
3375 ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors, flags);
3376 } else {
3377 BLKDBG_EVENT(bs, BLKDBG_PWRITEV);
3378 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
3380 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_DONE);
3382 if (ret == 0 && !bs->enable_write_cache) {
3383 ret = bdrv_co_flush(bs);
3386 bdrv_set_dirty(bs, sector_num, nb_sectors);
3388 block_acct_highest_sector(&bs->stats, sector_num, nb_sectors);
3390 if (bs->growable && ret >= 0) {
3391 bs->total_sectors = MAX(bs->total_sectors, sector_num + nb_sectors);
3394 return ret;
3398 * Handle a write request in coroutine context
3400 static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
3401 int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
3402 BdrvRequestFlags flags)
3404 BdrvTrackedRequest req;
3405 /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */
3406 uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
3407 uint8_t *head_buf = NULL;
3408 uint8_t *tail_buf = NULL;
3409 QEMUIOVector local_qiov;
3410 bool use_local_qiov = false;
3411 int ret;
3413 if (!bs->drv) {
3414 return -ENOMEDIUM;
3416 if (bs->read_only) {
3417 return -EACCES;
3419 if (bdrv_check_byte_request(bs, offset, bytes)) {
3420 return -EIO;
3423 /* throttling disk I/O */
3424 if (bs->io_limits_enabled) {
3425 bdrv_io_limits_intercept(bs, bytes, true);
3429 * Align write if necessary by performing a read-modify-write cycle.
3430 * Pad qiov with the read parts and be sure to have a tracked request not
3431 * only for bdrv_aligned_pwritev, but also for the reads of the RMW cycle.
3433 tracked_request_begin(&req, bs, offset, bytes, true);
3435 if (offset & (align - 1)) {
3436 QEMUIOVector head_qiov;
3437 struct iovec head_iov;
3439 mark_request_serialising(&req, align);
3440 wait_serialising_requests(&req);
3442 head_buf = qemu_blockalign(bs, align);
3443 head_iov = (struct iovec) {
3444 .iov_base = head_buf,
3445 .iov_len = align,
3447 qemu_iovec_init_external(&head_qiov, &head_iov, 1);
3449 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_HEAD);
3450 ret = bdrv_aligned_preadv(bs, &req, offset & ~(align - 1), align,
3451 align, &head_qiov, 0);
3452 if (ret < 0) {
3453 goto fail;
3455 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
3457 qemu_iovec_init(&local_qiov, qiov->niov + 2);
3458 qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
3459 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3460 use_local_qiov = true;
3462 bytes += offset & (align - 1);
3463 offset = offset & ~(align - 1);
3466 if ((offset + bytes) & (align - 1)) {
3467 QEMUIOVector tail_qiov;
3468 struct iovec tail_iov;
3469 size_t tail_bytes;
3470 bool waited;
3472 mark_request_serialising(&req, align);
3473 waited = wait_serialising_requests(&req);
3474 assert(!waited || !use_local_qiov);
3476 tail_buf = qemu_blockalign(bs, align);
3477 tail_iov = (struct iovec) {
3478 .iov_base = tail_buf,
3479 .iov_len = align,
3481 qemu_iovec_init_external(&tail_qiov, &tail_iov, 1);
3483 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_TAIL);
3484 ret = bdrv_aligned_preadv(bs, &req, (offset + bytes) & ~(align - 1), align,
3485 align, &tail_qiov, 0);
3486 if (ret < 0) {
3487 goto fail;
3489 BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
3491 if (!use_local_qiov) {
3492 qemu_iovec_init(&local_qiov, qiov->niov + 1);
3493 qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
3494 use_local_qiov = true;
3497 tail_bytes = (offset + bytes) & (align - 1);
3498 qemu_iovec_add(&local_qiov, tail_buf + tail_bytes, align - tail_bytes);
3500 bytes = ROUND_UP(bytes, align);
3503 ret = bdrv_aligned_pwritev(bs, &req, offset, bytes,
3504 use_local_qiov ? &local_qiov : qiov,
3505 flags);
3507 fail:
3508 tracked_request_end(&req);
3510 if (use_local_qiov) {
3511 qemu_iovec_destroy(&local_qiov);
3513 qemu_vfree(head_buf);
3514 qemu_vfree(tail_buf);
3516 return ret;
3519 static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
3520 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
3521 BdrvRequestFlags flags)
3523 if (nb_sectors < 0 || nb_sectors > (INT_MAX >> BDRV_SECTOR_BITS)) {
3524 return -EINVAL;
3527 return bdrv_co_do_pwritev(bs, sector_num << BDRV_SECTOR_BITS,
3528 nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
3531 int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
3532 int nb_sectors, QEMUIOVector *qiov)
3534 trace_bdrv_co_writev(bs, sector_num, nb_sectors);
3536 return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov, 0);
3539 int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs,
3540 int64_t sector_num, int nb_sectors,
3541 BdrvRequestFlags flags)
3543 trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors, flags);
3545 if (!(bs->open_flags & BDRV_O_UNMAP)) {
3546 flags &= ~BDRV_REQ_MAY_UNMAP;
3549 return bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL,
3550 BDRV_REQ_ZERO_WRITE | flags);
3554 * Truncate file to 'offset' bytes (needed only for file protocols)
3556 int bdrv_truncate(BlockDriverState *bs, int64_t offset)
3558 BlockDriver *drv = bs->drv;
3559 int ret;
3560 if (!drv)
3561 return -ENOMEDIUM;
3562 if (!drv->bdrv_truncate)
3563 return -ENOTSUP;
3564 if (bs->read_only)
3565 return -EACCES;
3567 ret = drv->bdrv_truncate(bs, offset);
3568 if (ret == 0) {
3569 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
3570 bdrv_dev_resize_cb(bs);
3572 return ret;
3576 * Length of a allocated file in bytes. Sparse files are counted by actual
3577 * allocated space. Return < 0 if error or unknown.
3579 int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
3581 BlockDriver *drv = bs->drv;
3582 if (!drv) {
3583 return -ENOMEDIUM;
3585 if (drv->bdrv_get_allocated_file_size) {
3586 return drv->bdrv_get_allocated_file_size(bs);
3588 if (bs->file) {
3589 return bdrv_get_allocated_file_size(bs->file);
3591 return -ENOTSUP;
3595 * Return number of sectors on success, -errno on error.
3597 int64_t bdrv_nb_sectors(BlockDriverState *bs)
3599 BlockDriver *drv = bs->drv;
3601 if (!drv)
3602 return -ENOMEDIUM;
3604 if (drv->has_variable_length) {
3605 int ret = refresh_total_sectors(bs, bs->total_sectors);
3606 if (ret < 0) {
3607 return ret;
3610 return bs->total_sectors;
3614 * Return length in bytes on success, -errno on error.
3615 * The length is always a multiple of BDRV_SECTOR_SIZE.
3617 int64_t bdrv_getlength(BlockDriverState *bs)
3619 int64_t ret = bdrv_nb_sectors(bs);
3621 return ret < 0 ? ret : ret * BDRV_SECTOR_SIZE;
3624 /* return 0 as number of sectors if no device present or error */
3625 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
3627 int64_t nb_sectors = bdrv_nb_sectors(bs);
3629 *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
3632 void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
3633 BlockdevOnError on_write_error)
3635 bs->on_read_error = on_read_error;
3636 bs->on_write_error = on_write_error;
3639 BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read)
3641 return is_read ? bs->on_read_error : bs->on_write_error;
3644 BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error)
3646 BlockdevOnError on_err = is_read ? bs->on_read_error : bs->on_write_error;
3648 switch (on_err) {
3649 case BLOCKDEV_ON_ERROR_ENOSPC:
3650 return (error == ENOSPC) ?
3651 BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT;
3652 case BLOCKDEV_ON_ERROR_STOP:
3653 return BLOCK_ERROR_ACTION_STOP;
3654 case BLOCKDEV_ON_ERROR_REPORT:
3655 return BLOCK_ERROR_ACTION_REPORT;
3656 case BLOCKDEV_ON_ERROR_IGNORE:
3657 return BLOCK_ERROR_ACTION_IGNORE;
3658 default:
3659 abort();
3663 static void send_qmp_error_event(BlockDriverState *bs,
3664 BlockErrorAction action,
3665 bool is_read, int error)
3667 BlockErrorAction ac;
3669 ac = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE;
3670 qapi_event_send_block_io_error(bdrv_get_device_name(bs), ac, action,
3671 bdrv_iostatus_is_enabled(bs),
3672 error == ENOSPC, strerror(error),
3673 &error_abort);
3676 /* This is done by device models because, while the block layer knows
3677 * about the error, it does not know whether an operation comes from
3678 * the device or the block layer (from a job, for example).
3680 void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action,
3681 bool is_read, int error)
3683 assert(error >= 0);
3685 if (action == BLOCK_ERROR_ACTION_STOP) {
3686 /* First set the iostatus, so that "info block" returns an iostatus
3687 * that matches the events raised so far (an additional error iostatus
3688 * is fine, but not a lost one).
3690 bdrv_iostatus_set_err(bs, error);
3692 /* Then raise the request to stop the VM and the event.
3693 * qemu_system_vmstop_request_prepare has two effects. First,
3694 * it ensures that the STOP event always comes after the
3695 * BLOCK_IO_ERROR event. Second, it ensures that even if management
3696 * can observe the STOP event and do a "cont" before the STOP
3697 * event is issued, the VM will not stop. In this case, vm_start()
3698 * also ensures that the STOP/RESUME pair of events is emitted.
3700 qemu_system_vmstop_request_prepare();
3701 send_qmp_error_event(bs, action, is_read, error);
3702 qemu_system_vmstop_request(RUN_STATE_IO_ERROR);
3703 } else {
3704 send_qmp_error_event(bs, action, is_read, error);
3708 int bdrv_is_read_only(BlockDriverState *bs)
3710 return bs->read_only;
3713 int bdrv_is_sg(BlockDriverState *bs)
3715 return bs->sg;
3718 int bdrv_enable_write_cache(BlockDriverState *bs)
3720 return bs->enable_write_cache;
3723 void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)
3725 bs->enable_write_cache = wce;
3727 /* so a reopen() will preserve wce */
3728 if (wce) {
3729 bs->open_flags |= BDRV_O_CACHE_WB;
3730 } else {
3731 bs->open_flags &= ~BDRV_O_CACHE_WB;
3735 int bdrv_is_encrypted(BlockDriverState *bs)
3737 if (bs->backing_hd && bs->backing_hd->encrypted)
3738 return 1;
3739 return bs->encrypted;
3742 int bdrv_key_required(BlockDriverState *bs)
3744 BlockDriverState *backing_hd = bs->backing_hd;
3746 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
3747 return 1;
3748 return (bs->encrypted && !bs->valid_key);
3751 int bdrv_set_key(BlockDriverState *bs, const char *key)
3753 int ret;
3754 if (bs->backing_hd && bs->backing_hd->encrypted) {
3755 ret = bdrv_set_key(bs->backing_hd, key);
3756 if (ret < 0)
3757 return ret;
3758 if (!bs->encrypted)
3759 return 0;
3761 if (!bs->encrypted) {
3762 return -EINVAL;
3763 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
3764 return -ENOMEDIUM;
3766 ret = bs->drv->bdrv_set_key(bs, key);
3767 if (ret < 0) {
3768 bs->valid_key = 0;
3769 } else if (!bs->valid_key) {
3770 bs->valid_key = 1;
3771 /* call the change callback now, we skipped it on open */
3772 bdrv_dev_change_media_cb(bs, true);
3774 return ret;
3777 const char *bdrv_get_format_name(BlockDriverState *bs)
3779 return bs->drv ? bs->drv->format_name : NULL;
3782 static int qsort_strcmp(const void *a, const void *b)
3784 return strcmp(a, b);
3787 void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
3788 void *opaque)
3790 BlockDriver *drv;
3791 int count = 0;
3792 int i;
3793 const char **formats = NULL;
3795 QLIST_FOREACH(drv, &bdrv_drivers, list) {
3796 if (drv->format_name) {
3797 bool found = false;
3798 int i = count;
3799 while (formats && i && !found) {
3800 found = !strcmp(formats[--i], drv->format_name);
3803 if (!found) {
3804 formats = g_renew(const char *, formats, count + 1);
3805 formats[count++] = drv->format_name;
3810 qsort(formats, count, sizeof(formats[0]), qsort_strcmp);
3812 for (i = 0; i < count; i++) {
3813 it(opaque, formats[i]);
3816 g_free(formats);
3819 /* This function is to find block backend bs */
3820 BlockDriverState *bdrv_find(const char *name)
3822 BlockDriverState *bs;
3824 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
3825 if (!strcmp(name, bdrv_get_device_name(bs))) {
3826 return bs;
3829 return NULL;
3832 /* This function is to find a node in the bs graph */
3833 BlockDriverState *bdrv_find_node(const char *node_name)
3835 BlockDriverState *bs;
3837 assert(node_name);
3839 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
3840 if (!strcmp(node_name, bs->node_name)) {
3841 return bs;
3844 return NULL;
3847 /* Put this QMP function here so it can access the static graph_bdrv_states. */
3848 BlockDeviceInfoList *bdrv_named_nodes_list(void)
3850 BlockDeviceInfoList *list, *entry;
3851 BlockDriverState *bs;
3853 list = NULL;
3854 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
3855 entry = g_malloc0(sizeof(*entry));
3856 entry->value = bdrv_block_device_info(bs);
3857 entry->next = list;
3858 list = entry;
3861 return list;
3864 BlockDriverState *bdrv_lookup_bs(const char *device,
3865 const char *node_name,
3866 Error **errp)
3868 BlockDriverState *bs = NULL;
3870 if (device) {
3871 bs = bdrv_find(device);
3873 if (bs) {
3874 return bs;
3878 if (node_name) {
3879 bs = bdrv_find_node(node_name);
3881 if (bs) {
3882 return bs;
3886 error_setg(errp, "Cannot find device=%s nor node_name=%s",
3887 device ? device : "",
3888 node_name ? node_name : "");
3889 return NULL;
3892 /* If 'base' is in the same chain as 'top', return true. Otherwise,
3893 * return false. If either argument is NULL, return false. */
3894 bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
3896 while (top && top != base) {
3897 top = top->backing_hd;
3900 return top != NULL;
3903 BlockDriverState *bdrv_next(BlockDriverState *bs)
3905 if (!bs) {
3906 return QTAILQ_FIRST(&bdrv_states);
3908 return QTAILQ_NEXT(bs, device_list);
3911 const char *bdrv_get_device_name(const BlockDriverState *bs)
3913 return bs->blk ? blk_name(bs->blk) : "";
3916 int bdrv_get_flags(BlockDriverState *bs)
3918 return bs->open_flags;
3921 int bdrv_flush_all(void)
3923 BlockDriverState *bs;
3924 int result = 0;
3926 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
3927 AioContext *aio_context = bdrv_get_aio_context(bs);
3928 int ret;
3930 aio_context_acquire(aio_context);
3931 ret = bdrv_flush(bs);
3932 if (ret < 0 && !result) {
3933 result = ret;
3935 aio_context_release(aio_context);
3938 return result;
3941 int bdrv_has_zero_init_1(BlockDriverState *bs)
3943 return 1;
3946 int bdrv_has_zero_init(BlockDriverState *bs)
3948 assert(bs->drv);
3950 /* If BS is a copy on write image, it is initialized to
3951 the contents of the base image, which may not be zeroes. */
3952 if (bs->backing_hd) {
3953 return 0;
3955 if (bs->drv->bdrv_has_zero_init) {
3956 return bs->drv->bdrv_has_zero_init(bs);
3959 /* safe default */
3960 return 0;
3963 bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs)
3965 BlockDriverInfo bdi;
3967 if (bs->backing_hd) {
3968 return false;
3971 if (bdrv_get_info(bs, &bdi) == 0) {
3972 return bdi.unallocated_blocks_are_zero;
3975 return false;
3978 bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
3980 BlockDriverInfo bdi;
3982 if (bs->backing_hd || !(bs->open_flags & BDRV_O_UNMAP)) {
3983 return false;
3986 if (bdrv_get_info(bs, &bdi) == 0) {
3987 return bdi.can_write_zeroes_with_unmap;
3990 return false;
3993 typedef struct BdrvCoGetBlockStatusData {
3994 BlockDriverState *bs;
3995 BlockDriverState *base;
3996 int64_t sector_num;
3997 int nb_sectors;
3998 int *pnum;
3999 int64_t ret;
4000 bool done;
4001 } BdrvCoGetBlockStatusData;
4004 * Returns true iff the specified sector is present in the disk image. Drivers
4005 * not implementing the functionality are assumed to not support backing files,
4006 * hence all their sectors are reported as allocated.
4008 * If 'sector_num' is beyond the end of the disk image the return value is 0
4009 * and 'pnum' is set to 0.
4011 * 'pnum' is set to the number of sectors (including and immediately following
4012 * the specified sector) that are known to be in the same
4013 * allocated/unallocated state.
4015 * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
4016 * beyond the end of the disk image it will be clamped.
4018 static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
4019 int64_t sector_num,
4020 int nb_sectors, int *pnum)
4022 int64_t total_sectors;
4023 int64_t n;
4024 int64_t ret, ret2;
4026 total_sectors = bdrv_nb_sectors(bs);
4027 if (total_sectors < 0) {
4028 return total_sectors;
4031 if (sector_num >= total_sectors) {
4032 *pnum = 0;
4033 return 0;
4036 n = total_sectors - sector_num;
4037 if (n < nb_sectors) {
4038 nb_sectors = n;
4041 if (!bs->drv->bdrv_co_get_block_status) {
4042 *pnum = nb_sectors;
4043 ret = BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED;
4044 if (bs->drv->protocol_name) {
4045 ret |= BDRV_BLOCK_OFFSET_VALID | (sector_num * BDRV_SECTOR_SIZE);
4047 return ret;
4050 ret = bs->drv->bdrv_co_get_block_status(bs, sector_num, nb_sectors, pnum);
4051 if (ret < 0) {
4052 *pnum = 0;
4053 return ret;
4056 if (ret & BDRV_BLOCK_RAW) {
4057 assert(ret & BDRV_BLOCK_OFFSET_VALID);
4058 return bdrv_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS,
4059 *pnum, pnum);
4062 if (ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ZERO)) {
4063 ret |= BDRV_BLOCK_ALLOCATED;
4066 if (!(ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO)) {
4067 if (bdrv_unallocated_blocks_are_zero(bs)) {
4068 ret |= BDRV_BLOCK_ZERO;
4069 } else if (bs->backing_hd) {
4070 BlockDriverState *bs2 = bs->backing_hd;
4071 int64_t nb_sectors2 = bdrv_nb_sectors(bs2);
4072 if (nb_sectors2 >= 0 && sector_num >= nb_sectors2) {
4073 ret |= BDRV_BLOCK_ZERO;
4078 if (bs->file &&
4079 (ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO) &&
4080 (ret & BDRV_BLOCK_OFFSET_VALID)) {
4081 ret2 = bdrv_co_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS,
4082 *pnum, pnum);
4083 if (ret2 >= 0) {
4084 /* Ignore errors. This is just providing extra information, it
4085 * is useful but not necessary.
4087 ret |= (ret2 & BDRV_BLOCK_ZERO);
4091 return ret;
4094 /* Coroutine wrapper for bdrv_get_block_status() */
4095 static void coroutine_fn bdrv_get_block_status_co_entry(void *opaque)
4097 BdrvCoGetBlockStatusData *data = opaque;
4098 BlockDriverState *bs = data->bs;
4100 data->ret = bdrv_co_get_block_status(bs, data->sector_num, data->nb_sectors,
4101 data->pnum);
4102 data->done = true;
4106 * Synchronous wrapper around bdrv_co_get_block_status().
4108 * See bdrv_co_get_block_status() for details.
4110 int64_t bdrv_get_block_status(BlockDriverState *bs, int64_t sector_num,
4111 int nb_sectors, int *pnum)
4113 Coroutine *co;
4114 BdrvCoGetBlockStatusData data = {
4115 .bs = bs,
4116 .sector_num = sector_num,
4117 .nb_sectors = nb_sectors,
4118 .pnum = pnum,
4119 .done = false,
4122 if (qemu_in_coroutine()) {
4123 /* Fast-path if already in coroutine context */
4124 bdrv_get_block_status_co_entry(&data);
4125 } else {
4126 AioContext *aio_context = bdrv_get_aio_context(bs);
4128 co = qemu_coroutine_create(bdrv_get_block_status_co_entry);
4129 qemu_coroutine_enter(co, &data);
4130 while (!data.done) {
4131 aio_poll(aio_context, true);
4134 return data.ret;
4137 int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num,
4138 int nb_sectors, int *pnum)
4140 int64_t ret = bdrv_get_block_status(bs, sector_num, nb_sectors, pnum);
4141 if (ret < 0) {
4142 return ret;
4144 return !!(ret & BDRV_BLOCK_ALLOCATED);
4148 * Given an image chain: ... -> [BASE] -> [INTER1] -> [INTER2] -> [TOP]
4150 * Return true if the given sector is allocated in any image between
4151 * BASE and TOP (inclusive). BASE can be NULL to check if the given
4152 * sector is allocated in any image of the chain. Return false otherwise.
4154 * 'pnum' is set to the number of sectors (including and immediately following
4155 * the specified sector) that are known to be in the same
4156 * allocated/unallocated state.
4159 int bdrv_is_allocated_above(BlockDriverState *top,
4160 BlockDriverState *base,
4161 int64_t sector_num,
4162 int nb_sectors, int *pnum)
4164 BlockDriverState *intermediate;
4165 int ret, n = nb_sectors;
4167 intermediate = top;
4168 while (intermediate && intermediate != base) {
4169 int pnum_inter;
4170 ret = bdrv_is_allocated(intermediate, sector_num, nb_sectors,
4171 &pnum_inter);
4172 if (ret < 0) {
4173 return ret;
4174 } else if (ret) {
4175 *pnum = pnum_inter;
4176 return 1;
4180 * [sector_num, nb_sectors] is unallocated on top but intermediate
4181 * might have
4183 * [sector_num+x, nr_sectors] allocated.
4185 if (n > pnum_inter &&
4186 (intermediate == top ||
4187 sector_num + pnum_inter < intermediate->total_sectors)) {
4188 n = pnum_inter;
4191 intermediate = intermediate->backing_hd;
4194 *pnum = n;
4195 return 0;
4198 const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
4200 if (bs->backing_hd && bs->backing_hd->encrypted)
4201 return bs->backing_file;
4202 else if (bs->encrypted)
4203 return bs->filename;
4204 else
4205 return NULL;
4208 void bdrv_get_backing_filename(BlockDriverState *bs,
4209 char *filename, int filename_size)
4211 pstrcpy(filename, filename_size, bs->backing_file);
4214 int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
4215 const uint8_t *buf, int nb_sectors)
4217 BlockDriver *drv = bs->drv;
4218 if (!drv)
4219 return -ENOMEDIUM;
4220 if (!drv->bdrv_write_compressed)
4221 return -ENOTSUP;
4222 if (bdrv_check_request(bs, sector_num, nb_sectors))
4223 return -EIO;
4225 assert(QLIST_EMPTY(&bs->dirty_bitmaps));
4227 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
4230 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
4232 BlockDriver *drv = bs->drv;
4233 if (!drv)
4234 return -ENOMEDIUM;
4235 if (!drv->bdrv_get_info)
4236 return -ENOTSUP;
4237 memset(bdi, 0, sizeof(*bdi));
4238 return drv->bdrv_get_info(bs, bdi);
4241 ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs)
4243 BlockDriver *drv = bs->drv;
4244 if (drv && drv->bdrv_get_specific_info) {
4245 return drv->bdrv_get_specific_info(bs);
4247 return NULL;
4250 int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
4251 int64_t pos, int size)
4253 QEMUIOVector qiov;
4254 struct iovec iov = {
4255 .iov_base = (void *) buf,
4256 .iov_len = size,
4259 qemu_iovec_init_external(&qiov, &iov, 1);
4260 return bdrv_writev_vmstate(bs, &qiov, pos);
4263 int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
4265 BlockDriver *drv = bs->drv;
4267 if (!drv) {
4268 return -ENOMEDIUM;
4269 } else if (drv->bdrv_save_vmstate) {
4270 return drv->bdrv_save_vmstate(bs, qiov, pos);
4271 } else if (bs->file) {
4272 return bdrv_writev_vmstate(bs->file, qiov, pos);
4275 return -ENOTSUP;
4278 int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
4279 int64_t pos, int size)
4281 BlockDriver *drv = bs->drv;
4282 if (!drv)
4283 return -ENOMEDIUM;
4284 if (drv->bdrv_load_vmstate)
4285 return drv->bdrv_load_vmstate(bs, buf, pos, size);
4286 if (bs->file)
4287 return bdrv_load_vmstate(bs->file, buf, pos, size);
4288 return -ENOTSUP;
4291 void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
4293 if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
4294 return;
4297 bs->drv->bdrv_debug_event(bs, event);
4300 int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
4301 const char *tag)
4303 while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
4304 bs = bs->file;
4307 if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
4308 return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
4311 return -ENOTSUP;
4314 int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
4316 while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) {
4317 bs = bs->file;
4320 if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) {
4321 return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
4324 return -ENOTSUP;
4327 int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
4329 while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) {
4330 bs = bs->file;
4333 if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
4334 return bs->drv->bdrv_debug_resume(bs, tag);
4337 return -ENOTSUP;
4340 bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
4342 while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
4343 bs = bs->file;
4346 if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
4347 return bs->drv->bdrv_debug_is_suspended(bs, tag);
4350 return false;
4353 int bdrv_is_snapshot(BlockDriverState *bs)
4355 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
4358 /* backing_file can either be relative, or absolute, or a protocol. If it is
4359 * relative, it must be relative to the chain. So, passing in bs->filename
4360 * from a BDS as backing_file should not be done, as that may be relative to
4361 * the CWD rather than the chain. */
4362 BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
4363 const char *backing_file)
4365 char *filename_full = NULL;
4366 char *backing_file_full = NULL;
4367 char *filename_tmp = NULL;
4368 int is_protocol = 0;
4369 BlockDriverState *curr_bs = NULL;
4370 BlockDriverState *retval = NULL;
4372 if (!bs || !bs->drv || !backing_file) {
4373 return NULL;
4376 filename_full = g_malloc(PATH_MAX);
4377 backing_file_full = g_malloc(PATH_MAX);
4378 filename_tmp = g_malloc(PATH_MAX);
4380 is_protocol = path_has_protocol(backing_file);
4382 for (curr_bs = bs; curr_bs->backing_hd; curr_bs = curr_bs->backing_hd) {
4384 /* If either of the filename paths is actually a protocol, then
4385 * compare unmodified paths; otherwise make paths relative */
4386 if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
4387 if (strcmp(backing_file, curr_bs->backing_file) == 0) {
4388 retval = curr_bs->backing_hd;
4389 break;
4391 } else {
4392 /* If not an absolute filename path, make it relative to the current
4393 * image's filename path */
4394 path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
4395 backing_file);
4397 /* We are going to compare absolute pathnames */
4398 if (!realpath(filename_tmp, filename_full)) {
4399 continue;
4402 /* We need to make sure the backing filename we are comparing against
4403 * is relative to the current image filename (or absolute) */
4404 path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
4405 curr_bs->backing_file);
4407 if (!realpath(filename_tmp, backing_file_full)) {
4408 continue;
4411 if (strcmp(backing_file_full, filename_full) == 0) {
4412 retval = curr_bs->backing_hd;
4413 break;
4418 g_free(filename_full);
4419 g_free(backing_file_full);
4420 g_free(filename_tmp);
4421 return retval;
4424 int bdrv_get_backing_file_depth(BlockDriverState *bs)
4426 if (!bs->drv) {
4427 return 0;
4430 if (!bs->backing_hd) {
4431 return 0;
4434 return 1 + bdrv_get_backing_file_depth(bs->backing_hd);
4437 /**************************************************************/
4438 /* async I/Os */
4440 BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
4441 QEMUIOVector *qiov, int nb_sectors,
4442 BlockDriverCompletionFunc *cb, void *opaque)
4444 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
4446 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
4447 cb, opaque, false);
4450 BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
4451 QEMUIOVector *qiov, int nb_sectors,
4452 BlockDriverCompletionFunc *cb, void *opaque)
4454 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
4456 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
4457 cb, opaque, true);
4460 BlockDriverAIOCB *bdrv_aio_write_zeroes(BlockDriverState *bs,
4461 int64_t sector_num, int nb_sectors, BdrvRequestFlags flags,
4462 BlockDriverCompletionFunc *cb, void *opaque)
4464 trace_bdrv_aio_write_zeroes(bs, sector_num, nb_sectors, flags, opaque);
4466 return bdrv_co_aio_rw_vector(bs, sector_num, NULL, nb_sectors,
4467 BDRV_REQ_ZERO_WRITE | flags,
4468 cb, opaque, true);
4472 typedef struct MultiwriteCB {
4473 int error;
4474 int num_requests;
4475 int num_callbacks;
4476 struct {
4477 BlockDriverCompletionFunc *cb;
4478 void *opaque;
4479 QEMUIOVector *free_qiov;
4480 } callbacks[];
4481 } MultiwriteCB;
4483 static void multiwrite_user_cb(MultiwriteCB *mcb)
4485 int i;
4487 for (i = 0; i < mcb->num_callbacks; i++) {
4488 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
4489 if (mcb->callbacks[i].free_qiov) {
4490 qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
4492 g_free(mcb->callbacks[i].free_qiov);
4496 static void multiwrite_cb(void *opaque, int ret)
4498 MultiwriteCB *mcb = opaque;
4500 trace_multiwrite_cb(mcb, ret);
4502 if (ret < 0 && !mcb->error) {
4503 mcb->error = ret;
4506 mcb->num_requests--;
4507 if (mcb->num_requests == 0) {
4508 multiwrite_user_cb(mcb);
4509 g_free(mcb);
4513 static int multiwrite_req_compare(const void *a, const void *b)
4515 const BlockRequest *req1 = a, *req2 = b;
4518 * Note that we can't simply subtract req2->sector from req1->sector
4519 * here as that could overflow the return value.
4521 if (req1->sector > req2->sector) {
4522 return 1;
4523 } else if (req1->sector < req2->sector) {
4524 return -1;
4525 } else {
4526 return 0;
4531 * Takes a bunch of requests and tries to merge them. Returns the number of
4532 * requests that remain after merging.
4534 static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
4535 int num_reqs, MultiwriteCB *mcb)
4537 int i, outidx;
4539 // Sort requests by start sector
4540 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
4542 // Check if adjacent requests touch the same clusters. If so, combine them,
4543 // filling up gaps with zero sectors.
4544 outidx = 0;
4545 for (i = 1; i < num_reqs; i++) {
4546 int merge = 0;
4547 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
4549 // Handle exactly sequential writes and overlapping writes.
4550 if (reqs[i].sector <= oldreq_last) {
4551 merge = 1;
4554 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
4555 merge = 0;
4558 if (merge) {
4559 size_t size;
4560 QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
4561 qemu_iovec_init(qiov,
4562 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
4564 // Add the first request to the merged one. If the requests are
4565 // overlapping, drop the last sectors of the first request.
4566 size = (reqs[i].sector - reqs[outidx].sector) << 9;
4567 qemu_iovec_concat(qiov, reqs[outidx].qiov, 0, size);
4569 // We should need to add any zeros between the two requests
4570 assert (reqs[i].sector <= oldreq_last);
4572 // Add the second request
4573 qemu_iovec_concat(qiov, reqs[i].qiov, 0, reqs[i].qiov->size);
4575 // Add tail of first request, if necessary
4576 if (qiov->size < reqs[outidx].qiov->size) {
4577 qemu_iovec_concat(qiov, reqs[outidx].qiov, qiov->size,
4578 reqs[outidx].qiov->size - qiov->size);
4581 reqs[outidx].nb_sectors = qiov->size >> 9;
4582 reqs[outidx].qiov = qiov;
4584 mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
4585 } else {
4586 outidx++;
4587 reqs[outidx].sector = reqs[i].sector;
4588 reqs[outidx].nb_sectors = reqs[i].nb_sectors;
4589 reqs[outidx].qiov = reqs[i].qiov;
4593 return outidx + 1;
4597 * Submit multiple AIO write requests at once.
4599 * On success, the function returns 0 and all requests in the reqs array have
4600 * been submitted. In error case this function returns -1, and any of the
4601 * requests may or may not be submitted yet. In particular, this means that the
4602 * callback will be called for some of the requests, for others it won't. The
4603 * caller must check the error field of the BlockRequest to wait for the right
4604 * callbacks (if error != 0, no callback will be called).
4606 * The implementation may modify the contents of the reqs array, e.g. to merge
4607 * requests. However, the fields opaque and error are left unmodified as they
4608 * are used to signal failure for a single request to the caller.
4610 int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
4612 MultiwriteCB *mcb;
4613 int i;
4615 /* don't submit writes if we don't have a medium */
4616 if (bs->drv == NULL) {
4617 for (i = 0; i < num_reqs; i++) {
4618 reqs[i].error = -ENOMEDIUM;
4620 return -1;
4623 if (num_reqs == 0) {
4624 return 0;
4627 // Create MultiwriteCB structure
4628 mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
4629 mcb->num_requests = 0;
4630 mcb->num_callbacks = num_reqs;
4632 for (i = 0; i < num_reqs; i++) {
4633 mcb->callbacks[i].cb = reqs[i].cb;
4634 mcb->callbacks[i].opaque = reqs[i].opaque;
4637 // Check for mergable requests
4638 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
4640 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
4642 /* Run the aio requests. */
4643 mcb->num_requests = num_reqs;
4644 for (i = 0; i < num_reqs; i++) {
4645 bdrv_co_aio_rw_vector(bs, reqs[i].sector, reqs[i].qiov,
4646 reqs[i].nb_sectors, reqs[i].flags,
4647 multiwrite_cb, mcb,
4648 true);
4651 return 0;
4654 void bdrv_aio_cancel(BlockDriverAIOCB *acb)
4656 qemu_aio_ref(acb);
4657 bdrv_aio_cancel_async(acb);
4658 while (acb->refcnt > 1) {
4659 if (acb->aiocb_info->get_aio_context) {
4660 aio_poll(acb->aiocb_info->get_aio_context(acb), true);
4661 } else if (acb->bs) {
4662 aio_poll(bdrv_get_aio_context(acb->bs), true);
4663 } else {
4664 abort();
4667 qemu_aio_unref(acb);
4670 /* Async version of aio cancel. The caller is not blocked if the acb implements
4671 * cancel_async, otherwise we do nothing and let the request normally complete.
4672 * In either case the completion callback must be called. */
4673 void bdrv_aio_cancel_async(BlockDriverAIOCB *acb)
4675 if (acb->aiocb_info->cancel_async) {
4676 acb->aiocb_info->cancel_async(acb);
4680 /**************************************************************/
4681 /* async block device emulation */
4683 typedef struct BlockDriverAIOCBSync {
4684 BlockDriverAIOCB common;
4685 QEMUBH *bh;
4686 int ret;
4687 /* vector translation state */
4688 QEMUIOVector *qiov;
4689 uint8_t *bounce;
4690 int is_write;
4691 } BlockDriverAIOCBSync;
4693 static const AIOCBInfo bdrv_em_aiocb_info = {
4694 .aiocb_size = sizeof(BlockDriverAIOCBSync),
4697 static void bdrv_aio_bh_cb(void *opaque)
4699 BlockDriverAIOCBSync *acb = opaque;
4701 if (!acb->is_write && acb->ret >= 0) {
4702 qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
4704 qemu_vfree(acb->bounce);
4705 acb->common.cb(acb->common.opaque, acb->ret);
4706 qemu_bh_delete(acb->bh);
4707 acb->bh = NULL;
4708 qemu_aio_unref(acb);
4711 static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
4712 int64_t sector_num,
4713 QEMUIOVector *qiov,
4714 int nb_sectors,
4715 BlockDriverCompletionFunc *cb,
4716 void *opaque,
4717 int is_write)
4720 BlockDriverAIOCBSync *acb;
4722 acb = qemu_aio_get(&bdrv_em_aiocb_info, bs, cb, opaque);
4723 acb->is_write = is_write;
4724 acb->qiov = qiov;
4725 acb->bounce = qemu_try_blockalign(bs, qiov->size);
4726 acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_aio_bh_cb, acb);
4728 if (acb->bounce == NULL) {
4729 acb->ret = -ENOMEM;
4730 } else if (is_write) {
4731 qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
4732 acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
4733 } else {
4734 acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
4737 qemu_bh_schedule(acb->bh);
4739 return &acb->common;
4742 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
4743 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
4744 BlockDriverCompletionFunc *cb, void *opaque)
4746 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
4749 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
4750 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
4751 BlockDriverCompletionFunc *cb, void *opaque)
4753 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
4757 typedef struct BlockDriverAIOCBCoroutine {
4758 BlockDriverAIOCB common;
4759 BlockRequest req;
4760 bool is_write;
4761 bool *done;
4762 QEMUBH* bh;
4763 } BlockDriverAIOCBCoroutine;
4765 static const AIOCBInfo bdrv_em_co_aiocb_info = {
4766 .aiocb_size = sizeof(BlockDriverAIOCBCoroutine),
4769 static void bdrv_co_em_bh(void *opaque)
4771 BlockDriverAIOCBCoroutine *acb = opaque;
4773 acb->common.cb(acb->common.opaque, acb->req.error);
4775 qemu_bh_delete(acb->bh);
4776 qemu_aio_unref(acb);
4779 /* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
4780 static void coroutine_fn bdrv_co_do_rw(void *opaque)
4782 BlockDriverAIOCBCoroutine *acb = opaque;
4783 BlockDriverState *bs = acb->common.bs;
4785 if (!acb->is_write) {
4786 acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
4787 acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
4788 } else {
4789 acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
4790 acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
4793 acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
4794 qemu_bh_schedule(acb->bh);
4797 static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
4798 int64_t sector_num,
4799 QEMUIOVector *qiov,
4800 int nb_sectors,
4801 BdrvRequestFlags flags,
4802 BlockDriverCompletionFunc *cb,
4803 void *opaque,
4804 bool is_write)
4806 Coroutine *co;
4807 BlockDriverAIOCBCoroutine *acb;
4809 acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
4810 acb->req.sector = sector_num;
4811 acb->req.nb_sectors = nb_sectors;
4812 acb->req.qiov = qiov;
4813 acb->req.flags = flags;
4814 acb->is_write = is_write;
4816 co = qemu_coroutine_create(bdrv_co_do_rw);
4817 qemu_coroutine_enter(co, acb);
4819 return &acb->common;
4822 static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
4824 BlockDriverAIOCBCoroutine *acb = opaque;
4825 BlockDriverState *bs = acb->common.bs;
4827 acb->req.error = bdrv_co_flush(bs);
4828 acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
4829 qemu_bh_schedule(acb->bh);
4832 BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
4833 BlockDriverCompletionFunc *cb, void *opaque)
4835 trace_bdrv_aio_flush(bs, opaque);
4837 Coroutine *co;
4838 BlockDriverAIOCBCoroutine *acb;
4840 acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
4842 co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
4843 qemu_coroutine_enter(co, acb);
4845 return &acb->common;
4848 static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
4850 BlockDriverAIOCBCoroutine *acb = opaque;
4851 BlockDriverState *bs = acb->common.bs;
4853 acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
4854 acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
4855 qemu_bh_schedule(acb->bh);
4858 BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs,
4859 int64_t sector_num, int nb_sectors,
4860 BlockDriverCompletionFunc *cb, void *opaque)
4862 Coroutine *co;
4863 BlockDriverAIOCBCoroutine *acb;
4865 trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
4867 acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
4868 acb->req.sector = sector_num;
4869 acb->req.nb_sectors = nb_sectors;
4870 co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
4871 qemu_coroutine_enter(co, acb);
4873 return &acb->common;
4876 void bdrv_init(void)
4878 module_call_init(MODULE_INIT_BLOCK);
4881 void bdrv_init_with_whitelist(void)
4883 use_bdrv_whitelist = 1;
4884 bdrv_init();
4887 void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
4888 BlockDriverCompletionFunc *cb, void *opaque)
4890 BlockDriverAIOCB *acb;
4892 acb = g_slice_alloc(aiocb_info->aiocb_size);
4893 acb->aiocb_info = aiocb_info;
4894 acb->bs = bs;
4895 acb->cb = cb;
4896 acb->opaque = opaque;
4897 acb->refcnt = 1;
4898 return acb;
4901 void qemu_aio_ref(void *p)
4903 BlockDriverAIOCB *acb = p;
4904 acb->refcnt++;
4907 void qemu_aio_unref(void *p)
4909 BlockDriverAIOCB *acb = p;
4910 assert(acb->refcnt > 0);
4911 if (--acb->refcnt == 0) {
4912 g_slice_free1(acb->aiocb_info->aiocb_size, acb);
4916 /**************************************************************/
4917 /* Coroutine block device emulation */
4919 typedef struct CoroutineIOCompletion {
4920 Coroutine *coroutine;
4921 int ret;
4922 } CoroutineIOCompletion;
4924 static void bdrv_co_io_em_complete(void *opaque, int ret)
4926 CoroutineIOCompletion *co = opaque;
4928 co->ret = ret;
4929 qemu_coroutine_enter(co->coroutine, NULL);
4932 static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
4933 int nb_sectors, QEMUIOVector *iov,
4934 bool is_write)
4936 CoroutineIOCompletion co = {
4937 .coroutine = qemu_coroutine_self(),
4939 BlockDriverAIOCB *acb;
4941 if (is_write) {
4942 acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
4943 bdrv_co_io_em_complete, &co);
4944 } else {
4945 acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
4946 bdrv_co_io_em_complete, &co);
4949 trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
4950 if (!acb) {
4951 return -EIO;
4953 qemu_coroutine_yield();
4955 return co.ret;
4958 static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
4959 int64_t sector_num, int nb_sectors,
4960 QEMUIOVector *iov)
4962 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
4965 static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
4966 int64_t sector_num, int nb_sectors,
4967 QEMUIOVector *iov)
4969 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
4972 static void coroutine_fn bdrv_flush_co_entry(void *opaque)
4974 RwCo *rwco = opaque;
4976 rwco->ret = bdrv_co_flush(rwco->bs);
4979 int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
4981 int ret;
4983 if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
4984 return 0;
4987 /* Write back cached data to the OS even with cache=unsafe */
4988 BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_OS);
4989 if (bs->drv->bdrv_co_flush_to_os) {
4990 ret = bs->drv->bdrv_co_flush_to_os(bs);
4991 if (ret < 0) {
4992 return ret;
4996 /* But don't actually force it to the disk with cache=unsafe */
4997 if (bs->open_flags & BDRV_O_NO_FLUSH) {
4998 goto flush_parent;
5001 BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_DISK);
5002 if (bs->drv->bdrv_co_flush_to_disk) {
5003 ret = bs->drv->bdrv_co_flush_to_disk(bs);
5004 } else if (bs->drv->bdrv_aio_flush) {
5005 BlockDriverAIOCB *acb;
5006 CoroutineIOCompletion co = {
5007 .coroutine = qemu_coroutine_self(),
5010 acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
5011 if (acb == NULL) {
5012 ret = -EIO;
5013 } else {
5014 qemu_coroutine_yield();
5015 ret = co.ret;
5017 } else {
5019 * Some block drivers always operate in either writethrough or unsafe
5020 * mode and don't support bdrv_flush therefore. Usually qemu doesn't
5021 * know how the server works (because the behaviour is hardcoded or
5022 * depends on server-side configuration), so we can't ensure that
5023 * everything is safe on disk. Returning an error doesn't work because
5024 * that would break guests even if the server operates in writethrough
5025 * mode.
5027 * Let's hope the user knows what he's doing.
5029 ret = 0;
5031 if (ret < 0) {
5032 return ret;
5035 /* Now flush the underlying protocol. It will also have BDRV_O_NO_FLUSH
5036 * in the case of cache=unsafe, so there are no useless flushes.
5038 flush_parent:
5039 return bdrv_co_flush(bs->file);
5042 void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
5044 Error *local_err = NULL;
5045 int ret;
5047 if (!bs->drv) {
5048 return;
5051 if (!(bs->open_flags & BDRV_O_INCOMING)) {
5052 return;
5054 bs->open_flags &= ~BDRV_O_INCOMING;
5056 if (bs->drv->bdrv_invalidate_cache) {
5057 bs->drv->bdrv_invalidate_cache(bs, &local_err);
5058 } else if (bs->file) {
5059 bdrv_invalidate_cache(bs->file, &local_err);
5061 if (local_err) {
5062 error_propagate(errp, local_err);
5063 return;
5066 ret = refresh_total_sectors(bs, bs->total_sectors);
5067 if (ret < 0) {
5068 error_setg_errno(errp, -ret, "Could not refresh total sector count");
5069 return;
5073 void bdrv_invalidate_cache_all(Error **errp)
5075 BlockDriverState *bs;
5076 Error *local_err = NULL;
5078 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
5079 AioContext *aio_context = bdrv_get_aio_context(bs);
5081 aio_context_acquire(aio_context);
5082 bdrv_invalidate_cache(bs, &local_err);
5083 aio_context_release(aio_context);
5084 if (local_err) {
5085 error_propagate(errp, local_err);
5086 return;
5091 int bdrv_flush(BlockDriverState *bs)
5093 Coroutine *co;
5094 RwCo rwco = {
5095 .bs = bs,
5096 .ret = NOT_DONE,
5099 if (qemu_in_coroutine()) {
5100 /* Fast-path if already in coroutine context */
5101 bdrv_flush_co_entry(&rwco);
5102 } else {
5103 AioContext *aio_context = bdrv_get_aio_context(bs);
5105 co = qemu_coroutine_create(bdrv_flush_co_entry);
5106 qemu_coroutine_enter(co, &rwco);
5107 while (rwco.ret == NOT_DONE) {
5108 aio_poll(aio_context, true);
5112 return rwco.ret;
5115 typedef struct DiscardCo {
5116 BlockDriverState *bs;
5117 int64_t sector_num;
5118 int nb_sectors;
5119 int ret;
5120 } DiscardCo;
5121 static void coroutine_fn bdrv_discard_co_entry(void *opaque)
5123 DiscardCo *rwco = opaque;
5125 rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
5128 /* if no limit is specified in the BlockLimits use a default
5129 * of 32768 512-byte sectors (16 MiB) per request.
5131 #define MAX_DISCARD_DEFAULT 32768
5133 int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
5134 int nb_sectors)
5136 int max_discard;
5138 if (!bs->drv) {
5139 return -ENOMEDIUM;
5140 } else if (bdrv_check_request(bs, sector_num, nb_sectors)) {
5141 return -EIO;
5142 } else if (bs->read_only) {
5143 return -EROFS;
5146 bdrv_reset_dirty(bs, sector_num, nb_sectors);
5148 /* Do nothing if disabled. */
5149 if (!(bs->open_flags & BDRV_O_UNMAP)) {
5150 return 0;
5153 if (!bs->drv->bdrv_co_discard && !bs->drv->bdrv_aio_discard) {
5154 return 0;
5157 max_discard = bs->bl.max_discard ? bs->bl.max_discard : MAX_DISCARD_DEFAULT;
5158 while (nb_sectors > 0) {
5159 int ret;
5160 int num = nb_sectors;
5162 /* align request */
5163 if (bs->bl.discard_alignment &&
5164 num >= bs->bl.discard_alignment &&
5165 sector_num % bs->bl.discard_alignment) {
5166 if (num > bs->bl.discard_alignment) {
5167 num = bs->bl.discard_alignment;
5169 num -= sector_num % bs->bl.discard_alignment;
5172 /* limit request size */
5173 if (num > max_discard) {
5174 num = max_discard;
5177 if (bs->drv->bdrv_co_discard) {
5178 ret = bs->drv->bdrv_co_discard(bs, sector_num, num);
5179 } else {
5180 BlockDriverAIOCB *acb;
5181 CoroutineIOCompletion co = {
5182 .coroutine = qemu_coroutine_self(),
5185 acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
5186 bdrv_co_io_em_complete, &co);
5187 if (acb == NULL) {
5188 return -EIO;
5189 } else {
5190 qemu_coroutine_yield();
5191 ret = co.ret;
5194 if (ret && ret != -ENOTSUP) {
5195 return ret;
5198 sector_num += num;
5199 nb_sectors -= num;
5201 return 0;
5204 int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
5206 Coroutine *co;
5207 DiscardCo rwco = {
5208 .bs = bs,
5209 .sector_num = sector_num,
5210 .nb_sectors = nb_sectors,
5211 .ret = NOT_DONE,
5214 if (qemu_in_coroutine()) {
5215 /* Fast-path if already in coroutine context */
5216 bdrv_discard_co_entry(&rwco);
5217 } else {
5218 AioContext *aio_context = bdrv_get_aio_context(bs);
5220 co = qemu_coroutine_create(bdrv_discard_co_entry);
5221 qemu_coroutine_enter(co, &rwco);
5222 while (rwco.ret == NOT_DONE) {
5223 aio_poll(aio_context, true);
5227 return rwco.ret;
5230 /**************************************************************/
5231 /* removable device support */
5234 * Return TRUE if the media is present
5236 int bdrv_is_inserted(BlockDriverState *bs)
5238 BlockDriver *drv = bs->drv;
5240 if (!drv)
5241 return 0;
5242 if (!drv->bdrv_is_inserted)
5243 return 1;
5244 return drv->bdrv_is_inserted(bs);
5248 * Return whether the media changed since the last call to this
5249 * function, or -ENOTSUP if we don't know. Most drivers don't know.
5251 int bdrv_media_changed(BlockDriverState *bs)
5253 BlockDriver *drv = bs->drv;
5255 if (drv && drv->bdrv_media_changed) {
5256 return drv->bdrv_media_changed(bs);
5258 return -ENOTSUP;
5262 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
5264 void bdrv_eject(BlockDriverState *bs, bool eject_flag)
5266 BlockDriver *drv = bs->drv;
5267 const char *device_name;
5269 if (drv && drv->bdrv_eject) {
5270 drv->bdrv_eject(bs, eject_flag);
5273 device_name = bdrv_get_device_name(bs);
5274 if (device_name[0] != '\0') {
5275 qapi_event_send_device_tray_moved(device_name,
5276 eject_flag, &error_abort);
5281 * Lock or unlock the media (if it is locked, the user won't be able
5282 * to eject it manually).
5284 void bdrv_lock_medium(BlockDriverState *bs, bool locked)
5286 BlockDriver *drv = bs->drv;
5288 trace_bdrv_lock_medium(bs, locked);
5290 if (drv && drv->bdrv_lock_medium) {
5291 drv->bdrv_lock_medium(bs, locked);
5295 /* needed for generic scsi interface */
5297 int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
5299 BlockDriver *drv = bs->drv;
5301 if (drv && drv->bdrv_ioctl)
5302 return drv->bdrv_ioctl(bs, req, buf);
5303 return -ENOTSUP;
5306 BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
5307 unsigned long int req, void *buf,
5308 BlockDriverCompletionFunc *cb, void *opaque)
5310 BlockDriver *drv = bs->drv;
5312 if (drv && drv->bdrv_aio_ioctl)
5313 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
5314 return NULL;
5317 void bdrv_set_guest_block_size(BlockDriverState *bs, int align)
5319 bs->guest_block_size = align;
5322 void *qemu_blockalign(BlockDriverState *bs, size_t size)
5324 return qemu_memalign(bdrv_opt_mem_align(bs), size);
5327 void *qemu_try_blockalign(BlockDriverState *bs, size_t size)
5329 size_t align = bdrv_opt_mem_align(bs);
5331 /* Ensure that NULL is never returned on success */
5332 assert(align > 0);
5333 if (size == 0) {
5334 size = align;
5337 return qemu_try_memalign(align, size);
5341 * Check if all memory in this vector is sector aligned.
5343 bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
5345 int i;
5346 size_t alignment = bdrv_opt_mem_align(bs);
5348 for (i = 0; i < qiov->niov; i++) {
5349 if ((uintptr_t) qiov->iov[i].iov_base % alignment) {
5350 return false;
5352 if (qiov->iov[i].iov_len % alignment) {
5353 return false;
5357 return true;
5360 BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, int granularity,
5361 Error **errp)
5363 int64_t bitmap_size;
5364 BdrvDirtyBitmap *bitmap;
5366 assert((granularity & (granularity - 1)) == 0);
5368 granularity >>= BDRV_SECTOR_BITS;
5369 assert(granularity);
5370 bitmap_size = bdrv_nb_sectors(bs);
5371 if (bitmap_size < 0) {
5372 error_setg_errno(errp, -bitmap_size, "could not get length of device");
5373 errno = -bitmap_size;
5374 return NULL;
5376 bitmap = g_new0(BdrvDirtyBitmap, 1);
5377 bitmap->bitmap = hbitmap_alloc(bitmap_size, ffs(granularity) - 1);
5378 QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
5379 return bitmap;
5382 void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
5384 BdrvDirtyBitmap *bm, *next;
5385 QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
5386 if (bm == bitmap) {
5387 QLIST_REMOVE(bitmap, list);
5388 hbitmap_free(bitmap->bitmap);
5389 g_free(bitmap);
5390 return;
5395 BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
5397 BdrvDirtyBitmap *bm;
5398 BlockDirtyInfoList *list = NULL;
5399 BlockDirtyInfoList **plist = &list;
5401 QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
5402 BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1);
5403 BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1);
5404 info->count = bdrv_get_dirty_count(bs, bm);
5405 info->granularity =
5406 ((int64_t) BDRV_SECTOR_SIZE << hbitmap_granularity(bm->bitmap));
5407 entry->value = info;
5408 *plist = entry;
5409 plist = &entry->next;
5412 return list;
5415 int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector)
5417 if (bitmap) {
5418 return hbitmap_get(bitmap->bitmap, sector);
5419 } else {
5420 return 0;
5424 void bdrv_dirty_iter_init(BlockDriverState *bs,
5425 BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
5427 hbitmap_iter_init(hbi, bitmap->bitmap, 0);
5430 void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
5431 int nr_sectors)
5433 BdrvDirtyBitmap *bitmap;
5434 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
5435 hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
5439 void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors)
5441 BdrvDirtyBitmap *bitmap;
5442 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
5443 hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
5447 int64_t bdrv_get_dirty_count(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
5449 return hbitmap_count(bitmap->bitmap);
5452 /* Get a reference to bs */
5453 void bdrv_ref(BlockDriverState *bs)
5455 bs->refcnt++;
5458 /* Release a previously grabbed reference to bs.
5459 * If after releasing, reference count is zero, the BlockDriverState is
5460 * deleted. */
5461 void bdrv_unref(BlockDriverState *bs)
5463 if (!bs) {
5464 return;
5466 assert(bs->refcnt > 0);
5467 if (--bs->refcnt == 0) {
5468 bdrv_delete(bs);
5472 struct BdrvOpBlocker {
5473 Error *reason;
5474 QLIST_ENTRY(BdrvOpBlocker) list;
5477 bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
5479 BdrvOpBlocker *blocker;
5480 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
5481 if (!QLIST_EMPTY(&bs->op_blockers[op])) {
5482 blocker = QLIST_FIRST(&bs->op_blockers[op]);
5483 if (errp) {
5484 error_setg(errp, "Device '%s' is busy: %s",
5485 bdrv_get_device_name(bs),
5486 error_get_pretty(blocker->reason));
5488 return true;
5490 return false;
5493 void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
5495 BdrvOpBlocker *blocker;
5496 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
5498 blocker = g_new0(BdrvOpBlocker, 1);
5499 blocker->reason = reason;
5500 QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list);
5503 void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
5505 BdrvOpBlocker *blocker, *next;
5506 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
5507 QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
5508 if (blocker->reason == reason) {
5509 QLIST_REMOVE(blocker, list);
5510 g_free(blocker);
5515 void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
5517 int i;
5518 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
5519 bdrv_op_block(bs, i, reason);
5523 void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason)
5525 int i;
5526 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
5527 bdrv_op_unblock(bs, i, reason);
5531 bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
5533 int i;
5535 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
5536 if (!QLIST_EMPTY(&bs->op_blockers[i])) {
5537 return false;
5540 return true;
5543 void bdrv_iostatus_enable(BlockDriverState *bs)
5545 bs->iostatus_enabled = true;
5546 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
5549 /* The I/O status is only enabled if the drive explicitly
5550 * enables it _and_ the VM is configured to stop on errors */
5551 bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
5553 return (bs->iostatus_enabled &&
5554 (bs->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
5555 bs->on_write_error == BLOCKDEV_ON_ERROR_STOP ||
5556 bs->on_read_error == BLOCKDEV_ON_ERROR_STOP));
5559 void bdrv_iostatus_disable(BlockDriverState *bs)
5561 bs->iostatus_enabled = false;
5564 void bdrv_iostatus_reset(BlockDriverState *bs)
5566 if (bdrv_iostatus_is_enabled(bs)) {
5567 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
5568 if (bs->job) {
5569 block_job_iostatus_reset(bs->job);
5574 void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
5576 assert(bdrv_iostatus_is_enabled(bs));
5577 if (bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
5578 bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
5579 BLOCK_DEVICE_IO_STATUS_FAILED;
5583 void bdrv_img_create(const char *filename, const char *fmt,
5584 const char *base_filename, const char *base_fmt,
5585 char *options, uint64_t img_size, int flags,
5586 Error **errp, bool quiet)
5588 QemuOptsList *create_opts = NULL;
5589 QemuOpts *opts = NULL;
5590 const char *backing_fmt, *backing_file;
5591 int64_t size;
5592 BlockDriver *drv, *proto_drv;
5593 BlockDriver *backing_drv = NULL;
5594 Error *local_err = NULL;
5595 int ret = 0;
5597 /* Find driver and parse its options */
5598 drv = bdrv_find_format(fmt);
5599 if (!drv) {
5600 error_setg(errp, "Unknown file format '%s'", fmt);
5601 return;
5604 proto_drv = bdrv_find_protocol(filename, true);
5605 if (!proto_drv) {
5606 error_setg(errp, "Unknown protocol '%s'", filename);
5607 return;
5610 create_opts = qemu_opts_append(create_opts, drv->create_opts);
5611 create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
5613 /* Create parameter list with default values */
5614 opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
5615 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size);
5617 /* Parse -o options */
5618 if (options) {
5619 if (qemu_opts_do_parse(opts, options, NULL) != 0) {
5620 error_setg(errp, "Invalid options for file format '%s'", fmt);
5621 goto out;
5625 if (base_filename) {
5626 if (qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename)) {
5627 error_setg(errp, "Backing file not supported for file format '%s'",
5628 fmt);
5629 goto out;
5633 if (base_fmt) {
5634 if (qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt)) {
5635 error_setg(errp, "Backing file format not supported for file "
5636 "format '%s'", fmt);
5637 goto out;
5641 backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
5642 if (backing_file) {
5643 if (!strcmp(filename, backing_file)) {
5644 error_setg(errp, "Error: Trying to create an image with the "
5645 "same filename as the backing file");
5646 goto out;
5650 backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
5651 if (backing_fmt) {
5652 backing_drv = bdrv_find_format(backing_fmt);
5653 if (!backing_drv) {
5654 error_setg(errp, "Unknown backing file format '%s'",
5655 backing_fmt);
5656 goto out;
5660 // The size for the image must always be specified, with one exception:
5661 // If we are using a backing file, we can obtain the size from there
5662 size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
5663 if (size == -1) {
5664 if (backing_file) {
5665 BlockDriverState *bs;
5666 int64_t size;
5667 int back_flags;
5669 /* backing files always opened read-only */
5670 back_flags =
5671 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
5673 bs = NULL;
5674 ret = bdrv_open(&bs, backing_file, NULL, NULL, back_flags,
5675 backing_drv, &local_err);
5676 if (ret < 0) {
5677 error_setg_errno(errp, -ret, "Could not open '%s': %s",
5678 backing_file,
5679 error_get_pretty(local_err));
5680 error_free(local_err);
5681 local_err = NULL;
5682 goto out;
5684 size = bdrv_getlength(bs);
5685 if (size < 0) {
5686 error_setg_errno(errp, -size, "Could not get size of '%s'",
5687 backing_file);
5688 bdrv_unref(bs);
5689 goto out;
5692 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size);
5694 bdrv_unref(bs);
5695 } else {
5696 error_setg(errp, "Image creation needs a size parameter");
5697 goto out;
5701 if (!quiet) {
5702 printf("Formatting '%s', fmt=%s ", filename, fmt);
5703 qemu_opts_print(opts);
5704 puts("");
5707 ret = bdrv_create(drv, filename, opts, &local_err);
5709 if (ret == -EFBIG) {
5710 /* This is generally a better message than whatever the driver would
5711 * deliver (especially because of the cluster_size_hint), since that
5712 * is most probably not much different from "image too large". */
5713 const char *cluster_size_hint = "";
5714 if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) {
5715 cluster_size_hint = " (try using a larger cluster size)";
5717 error_setg(errp, "The image size is too large for file format '%s'"
5718 "%s", fmt, cluster_size_hint);
5719 error_free(local_err);
5720 local_err = NULL;
5723 out:
5724 qemu_opts_del(opts);
5725 qemu_opts_free(create_opts);
5726 if (local_err) {
5727 error_propagate(errp, local_err);
5731 AioContext *bdrv_get_aio_context(BlockDriverState *bs)
5733 return bs->aio_context;
5736 void bdrv_detach_aio_context(BlockDriverState *bs)
5738 BdrvAioNotifier *baf;
5740 if (!bs->drv) {
5741 return;
5744 QLIST_FOREACH(baf, &bs->aio_notifiers, list) {
5745 baf->detach_aio_context(baf->opaque);
5748 if (bs->io_limits_enabled) {
5749 throttle_detach_aio_context(&bs->throttle_state);
5751 if (bs->drv->bdrv_detach_aio_context) {
5752 bs->drv->bdrv_detach_aio_context(bs);
5754 if (bs->file) {
5755 bdrv_detach_aio_context(bs->file);
5757 if (bs->backing_hd) {
5758 bdrv_detach_aio_context(bs->backing_hd);
5761 bs->aio_context = NULL;
5764 void bdrv_attach_aio_context(BlockDriverState *bs,
5765 AioContext *new_context)
5767 BdrvAioNotifier *ban;
5769 if (!bs->drv) {
5770 return;
5773 bs->aio_context = new_context;
5775 if (bs->backing_hd) {
5776 bdrv_attach_aio_context(bs->backing_hd, new_context);
5778 if (bs->file) {
5779 bdrv_attach_aio_context(bs->file, new_context);
5781 if (bs->drv->bdrv_attach_aio_context) {
5782 bs->drv->bdrv_attach_aio_context(bs, new_context);
5784 if (bs->io_limits_enabled) {
5785 throttle_attach_aio_context(&bs->throttle_state, new_context);
5788 QLIST_FOREACH(ban, &bs->aio_notifiers, list) {
5789 ban->attached_aio_context(new_context, ban->opaque);
5793 void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
5795 bdrv_drain_all(); /* ensure there are no in-flight requests */
5797 bdrv_detach_aio_context(bs);
5799 /* This function executes in the old AioContext so acquire the new one in
5800 * case it runs in a different thread.
5802 aio_context_acquire(new_context);
5803 bdrv_attach_aio_context(bs, new_context);
5804 aio_context_release(new_context);
5807 void bdrv_add_aio_context_notifier(BlockDriverState *bs,
5808 void (*attached_aio_context)(AioContext *new_context, void *opaque),
5809 void (*detach_aio_context)(void *opaque), void *opaque)
5811 BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1);
5812 *ban = (BdrvAioNotifier){
5813 .attached_aio_context = attached_aio_context,
5814 .detach_aio_context = detach_aio_context,
5815 .opaque = opaque
5818 QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list);
5821 void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
5822 void (*attached_aio_context)(AioContext *,
5823 void *),
5824 void (*detach_aio_context)(void *),
5825 void *opaque)
5827 BdrvAioNotifier *ban, *ban_next;
5829 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
5830 if (ban->attached_aio_context == attached_aio_context &&
5831 ban->detach_aio_context == detach_aio_context &&
5832 ban->opaque == opaque)
5834 QLIST_REMOVE(ban, list);
5835 g_free(ban);
5837 return;
5841 abort();
5844 void bdrv_add_before_write_notifier(BlockDriverState *bs,
5845 NotifierWithReturn *notifier)
5847 notifier_with_return_list_add(&bs->before_write_notifiers, notifier);
5850 int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts)
5852 if (!bs->drv->bdrv_amend_options) {
5853 return -ENOTSUP;
5855 return bs->drv->bdrv_amend_options(bs, opts);
5858 /* This function will be called by the bdrv_recurse_is_first_non_filter method
5859 * of block filter and by bdrv_is_first_non_filter.
5860 * It is used to test if the given bs is the candidate or recurse more in the
5861 * node graph.
5863 bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
5864 BlockDriverState *candidate)
5866 /* return false if basic checks fails */
5867 if (!bs || !bs->drv) {
5868 return false;
5871 /* the code reached a non block filter driver -> check if the bs is
5872 * the same as the candidate. It's the recursion termination condition.
5874 if (!bs->drv->is_filter) {
5875 return bs == candidate;
5877 /* Down this path the driver is a block filter driver */
5879 /* If the block filter recursion method is defined use it to recurse down
5880 * the node graph.
5882 if (bs->drv->bdrv_recurse_is_first_non_filter) {
5883 return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate);
5886 /* the driver is a block filter but don't allow to recurse -> return false
5888 return false;
5891 /* This function checks if the candidate is the first non filter bs down it's
5892 * bs chain. Since we don't have pointers to parents it explore all bs chains
5893 * from the top. Some filters can choose not to pass down the recursion.
5895 bool bdrv_is_first_non_filter(BlockDriverState *candidate)
5897 BlockDriverState *bs;
5899 /* walk down the bs forest recursively */
5900 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
5901 bool perm;
5903 /* try to recurse in this top level bs */
5904 perm = bdrv_recurse_is_first_non_filter(bs, candidate);
5906 /* candidate is the first non filter */
5907 if (perm) {
5908 return true;
5912 return false;
5915 BlockDriverState *check_to_replace_node(const char *node_name, Error **errp)
5917 BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
5918 if (!to_replace_bs) {
5919 error_setg(errp, "Node name '%s' not found", node_name);
5920 return NULL;
5923 if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
5924 return NULL;
5927 /* We don't want arbitrary node of the BDS chain to be replaced only the top
5928 * most non filter in order to prevent data corruption.
5929 * Another benefit is that this tests exclude backing files which are
5930 * blocked by the backing blockers.
5932 if (!bdrv_is_first_non_filter(to_replace_bs)) {
5933 error_setg(errp, "Only top most non filter can be replaced");
5934 return NULL;
5937 return to_replace_bs;
5940 void bdrv_io_plug(BlockDriverState *bs)
5942 BlockDriver *drv = bs->drv;
5943 if (drv && drv->bdrv_io_plug) {
5944 drv->bdrv_io_plug(bs);
5945 } else if (bs->file) {
5946 bdrv_io_plug(bs->file);
5950 void bdrv_io_unplug(BlockDriverState *bs)
5952 BlockDriver *drv = bs->drv;
5953 if (drv && drv->bdrv_io_unplug) {
5954 drv->bdrv_io_unplug(bs);
5955 } else if (bs->file) {
5956 bdrv_io_unplug(bs->file);
5960 void bdrv_flush_io_queue(BlockDriverState *bs)
5962 BlockDriver *drv = bs->drv;
5963 if (drv && drv->bdrv_flush_io_queue) {
5964 drv->bdrv_flush_io_queue(bs);
5965 } else if (bs->file) {
5966 bdrv_flush_io_queue(bs->file);
5970 static bool append_open_options(QDict *d, BlockDriverState *bs)
5972 const QDictEntry *entry;
5973 bool found_any = false;
5975 for (entry = qdict_first(bs->options); entry;
5976 entry = qdict_next(bs->options, entry))
5978 /* Only take options for this level and exclude all non-driver-specific
5979 * options */
5980 if (!strchr(qdict_entry_key(entry), '.') &&
5981 strcmp(qdict_entry_key(entry), "node-name"))
5983 qobject_incref(qdict_entry_value(entry));
5984 qdict_put_obj(d, qdict_entry_key(entry), qdict_entry_value(entry));
5985 found_any = true;
5989 return found_any;
5992 /* Updates the following BDS fields:
5993 * - exact_filename: A filename which may be used for opening a block device
5994 * which (mostly) equals the given BDS (even without any
5995 * other options; so reading and writing must return the same
5996 * results, but caching etc. may be different)
5997 * - full_open_options: Options which, when given when opening a block device
5998 * (without a filename), result in a BDS (mostly)
5999 * equalling the given one
6000 * - filename: If exact_filename is set, it is copied here. Otherwise,
6001 * full_open_options is converted to a JSON object, prefixed with
6002 * "json:" (for use through the JSON pseudo protocol) and put here.
6004 void bdrv_refresh_filename(BlockDriverState *bs)
6006 BlockDriver *drv = bs->drv;
6007 QDict *opts;
6009 if (!drv) {
6010 return;
6013 /* This BDS's file name will most probably depend on its file's name, so
6014 * refresh that first */
6015 if (bs->file) {
6016 bdrv_refresh_filename(bs->file);
6019 if (drv->bdrv_refresh_filename) {
6020 /* Obsolete information is of no use here, so drop the old file name
6021 * information before refreshing it */
6022 bs->exact_filename[0] = '\0';
6023 if (bs->full_open_options) {
6024 QDECREF(bs->full_open_options);
6025 bs->full_open_options = NULL;
6028 drv->bdrv_refresh_filename(bs);
6029 } else if (bs->file) {
6030 /* Try to reconstruct valid information from the underlying file */
6031 bool has_open_options;
6033 bs->exact_filename[0] = '\0';
6034 if (bs->full_open_options) {
6035 QDECREF(bs->full_open_options);
6036 bs->full_open_options = NULL;
6039 opts = qdict_new();
6040 has_open_options = append_open_options(opts, bs);
6042 /* If no specific options have been given for this BDS, the filename of
6043 * the underlying file should suffice for this one as well */
6044 if (bs->file->exact_filename[0] && !has_open_options) {
6045 strcpy(bs->exact_filename, bs->file->exact_filename);
6047 /* Reconstructing the full options QDict is simple for most format block
6048 * drivers, as long as the full options are known for the underlying
6049 * file BDS. The full options QDict of that file BDS should somehow
6050 * contain a representation of the filename, therefore the following
6051 * suffices without querying the (exact_)filename of this BDS. */
6052 if (bs->file->full_open_options) {
6053 qdict_put_obj(opts, "driver",
6054 QOBJECT(qstring_from_str(drv->format_name)));
6055 QINCREF(bs->file->full_open_options);
6056 qdict_put_obj(opts, "file", QOBJECT(bs->file->full_open_options));
6058 bs->full_open_options = opts;
6059 } else {
6060 QDECREF(opts);
6062 } else if (!bs->full_open_options && qdict_size(bs->options)) {
6063 /* There is no underlying file BDS (at least referenced by BDS.file),
6064 * so the full options QDict should be equal to the options given
6065 * specifically for this block device when it was opened (plus the
6066 * driver specification).
6067 * Because those options don't change, there is no need to update
6068 * full_open_options when it's already set. */
6070 opts = qdict_new();
6071 append_open_options(opts, bs);
6072 qdict_put_obj(opts, "driver",
6073 QOBJECT(qstring_from_str(drv->format_name)));
6075 if (bs->exact_filename[0]) {
6076 /* This may not work for all block protocol drivers (some may
6077 * require this filename to be parsed), but we have to find some
6078 * default solution here, so just include it. If some block driver
6079 * does not support pure options without any filename at all or
6080 * needs some special format of the options QDict, it needs to
6081 * implement the driver-specific bdrv_refresh_filename() function.
6083 qdict_put_obj(opts, "filename",
6084 QOBJECT(qstring_from_str(bs->exact_filename)));
6087 bs->full_open_options = opts;
6090 if (bs->exact_filename[0]) {
6091 pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename);
6092 } else if (bs->full_open_options) {
6093 QString *json = qobject_to_json(QOBJECT(bs->full_open_options));
6094 snprintf(bs->filename, sizeof(bs->filename), "json:%s",
6095 qstring_get_str(json));
6096 QDECREF(json);
6100 /* This accessor function purpose is to allow the device models to access the
6101 * BlockAcctStats structure embedded inside a BlockDriverState without being
6102 * aware of the BlockDriverState structure layout.
6103 * It will go away when the BlockAcctStats structure will be moved inside
6104 * the device models.
6106 BlockAcctStats *bdrv_get_stats(BlockDriverState *bs)
6108 return &bs->stats;