virtio-blk: Functions for op blocker management
[qemu/ar7.git] / block.c
blob41ab00efeac07f0df6ef3ff9db48e9e29b8b3bc3
1 /*
2 * QEMU System Emulator block driver
4 * Copyright (c) 2003 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
24 #include "config-host.h"
25 #include "qemu-common.h"
26 #include "trace.h"
27 #include "block/block_int.h"
28 #include "block/blockjob.h"
29 #include "qemu/error-report.h"
30 #include "qemu/module.h"
31 #include "qapi/qmp/qerror.h"
32 #include "qapi/qmp/qbool.h"
33 #include "qapi/qmp/qjson.h"
34 #include "sysemu/block-backend.h"
35 #include "sysemu/sysemu.h"
36 #include "qemu/notify.h"
37 #include "qemu/coroutine.h"
38 #include "block/qapi.h"
39 #include "qmp-commands.h"
40 #include "qemu/timer.h"
41 #include "qapi-event.h"
42 #include "block/throttle-groups.h"
44 #ifdef CONFIG_BSD
45 #include <sys/types.h>
46 #include <sys/stat.h>
47 #include <sys/ioctl.h>
48 #include <sys/queue.h>
49 #ifndef __DragonFly__
50 #include <sys/disk.h>
51 #endif
52 #endif
54 #ifdef _WIN32
55 #include <windows.h>
56 #endif
58 /**
59 * A BdrvDirtyBitmap can be in three possible states:
60 * (1) successor is NULL and disabled is false: full r/w mode
61 * (2) successor is NULL and disabled is true: read only mode ("disabled")
62 * (3) successor is set: frozen mode.
63 * A frozen bitmap cannot be renamed, deleted, anonymized, cleared, set,
64 * or enabled. A frozen bitmap can only abdicate() or reclaim().
66 struct BdrvDirtyBitmap {
67 HBitmap *bitmap; /* Dirty sector bitmap implementation */
68 BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */
69 char *name; /* Optional non-empty unique ID */
70 int64_t size; /* Size of the bitmap (Number of sectors) */
71 bool disabled; /* Bitmap is read-only */
72 QLIST_ENTRY(BdrvDirtyBitmap) list;
75 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
77 struct BdrvStates bdrv_states = QTAILQ_HEAD_INITIALIZER(bdrv_states);
79 static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
80 QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
82 static QLIST_HEAD(, BlockDriver) bdrv_drivers =
83 QLIST_HEAD_INITIALIZER(bdrv_drivers);
85 static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
86 const char *reference, QDict *options, int flags,
87 BlockDriverState *parent,
88 const BdrvChildRole *child_role, Error **errp);
90 static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs);
91 static void bdrv_release_named_dirty_bitmaps(BlockDriverState *bs);
93 /* If non-zero, use only whitelisted block drivers */
94 static int use_bdrv_whitelist;
96 #ifdef _WIN32
97 static int is_windows_drive_prefix(const char *filename)
99 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
100 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
101 filename[1] == ':');
104 int is_windows_drive(const char *filename)
106 if (is_windows_drive_prefix(filename) &&
107 filename[2] == '\0')
108 return 1;
109 if (strstart(filename, "\\\\.\\", NULL) ||
110 strstart(filename, "//./", NULL))
111 return 1;
112 return 0;
114 #endif
116 size_t bdrv_opt_mem_align(BlockDriverState *bs)
118 if (!bs || !bs->drv) {
119 /* page size or 4k (hdd sector size) should be on the safe side */
120 return MAX(4096, getpagesize());
123 return bs->bl.opt_mem_alignment;
126 size_t bdrv_min_mem_align(BlockDriverState *bs)
128 if (!bs || !bs->drv) {
129 /* page size or 4k (hdd sector size) should be on the safe side */
130 return MAX(4096, getpagesize());
133 return bs->bl.min_mem_alignment;
136 /* check if the path starts with "<protocol>:" */
137 int path_has_protocol(const char *path)
139 const char *p;
141 #ifdef _WIN32
142 if (is_windows_drive(path) ||
143 is_windows_drive_prefix(path)) {
144 return 0;
146 p = path + strcspn(path, ":/\\");
147 #else
148 p = path + strcspn(path, ":/");
149 #endif
151 return *p == ':';
154 int path_is_absolute(const char *path)
156 #ifdef _WIN32
157 /* specific case for names like: "\\.\d:" */
158 if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
159 return 1;
161 return (*path == '/' || *path == '\\');
162 #else
163 return (*path == '/');
164 #endif
167 /* if filename is absolute, just copy it to dest. Otherwise, build a
168 path to it by considering it is relative to base_path. URL are
169 supported. */
170 void path_combine(char *dest, int dest_size,
171 const char *base_path,
172 const char *filename)
174 const char *p, *p1;
175 int len;
177 if (dest_size <= 0)
178 return;
179 if (path_is_absolute(filename)) {
180 pstrcpy(dest, dest_size, filename);
181 } else {
182 p = strchr(base_path, ':');
183 if (p)
184 p++;
185 else
186 p = base_path;
187 p1 = strrchr(base_path, '/');
188 #ifdef _WIN32
190 const char *p2;
191 p2 = strrchr(base_path, '\\');
192 if (!p1 || p2 > p1)
193 p1 = p2;
195 #endif
196 if (p1)
197 p1++;
198 else
199 p1 = base_path;
200 if (p1 > p)
201 p = p1;
202 len = p - base_path;
203 if (len > dest_size - 1)
204 len = dest_size - 1;
205 memcpy(dest, base_path, len);
206 dest[len] = '\0';
207 pstrcat(dest, dest_size, filename);
211 void bdrv_get_full_backing_filename_from_filename(const char *backed,
212 const char *backing,
213 char *dest, size_t sz,
214 Error **errp)
216 if (backing[0] == '\0' || path_has_protocol(backing) ||
217 path_is_absolute(backing))
219 pstrcpy(dest, sz, backing);
220 } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) {
221 error_setg(errp, "Cannot use relative backing file names for '%s'",
222 backed);
223 } else {
224 path_combine(dest, sz, backed, backing);
228 void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz,
229 Error **errp)
231 char *backed = bs->exact_filename[0] ? bs->exact_filename : bs->filename;
233 bdrv_get_full_backing_filename_from_filename(backed, bs->backing_file,
234 dest, sz, errp);
237 void bdrv_register(BlockDriver *bdrv)
239 bdrv_setup_io_funcs(bdrv);
241 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
244 BlockDriverState *bdrv_new_root(void)
246 BlockDriverState *bs = bdrv_new();
248 QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list);
249 return bs;
252 BlockDriverState *bdrv_new(void)
254 BlockDriverState *bs;
255 int i;
257 bs = g_new0(BlockDriverState, 1);
258 QLIST_INIT(&bs->dirty_bitmaps);
259 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
260 QLIST_INIT(&bs->op_blockers[i]);
262 notifier_list_init(&bs->close_notifiers);
263 notifier_with_return_list_init(&bs->before_write_notifiers);
264 qemu_co_queue_init(&bs->throttled_reqs[0]);
265 qemu_co_queue_init(&bs->throttled_reqs[1]);
266 bs->refcnt = 1;
267 bs->aio_context = qemu_get_aio_context();
269 return bs;
272 void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify)
274 notifier_list_add(&bs->close_notifiers, notify);
277 BlockDriver *bdrv_find_format(const char *format_name)
279 BlockDriver *drv1;
280 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
281 if (!strcmp(drv1->format_name, format_name)) {
282 return drv1;
285 return NULL;
288 static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
290 static const char *whitelist_rw[] = {
291 CONFIG_BDRV_RW_WHITELIST
293 static const char *whitelist_ro[] = {
294 CONFIG_BDRV_RO_WHITELIST
296 const char **p;
298 if (!whitelist_rw[0] && !whitelist_ro[0]) {
299 return 1; /* no whitelist, anything goes */
302 for (p = whitelist_rw; *p; p++) {
303 if (!strcmp(drv->format_name, *p)) {
304 return 1;
307 if (read_only) {
308 for (p = whitelist_ro; *p; p++) {
309 if (!strcmp(drv->format_name, *p)) {
310 return 1;
314 return 0;
317 typedef struct CreateCo {
318 BlockDriver *drv;
319 char *filename;
320 QemuOpts *opts;
321 int ret;
322 Error *err;
323 } CreateCo;
325 static void coroutine_fn bdrv_create_co_entry(void *opaque)
327 Error *local_err = NULL;
328 int ret;
330 CreateCo *cco = opaque;
331 assert(cco->drv);
333 ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err);
334 if (local_err) {
335 error_propagate(&cco->err, local_err);
337 cco->ret = ret;
340 int bdrv_create(BlockDriver *drv, const char* filename,
341 QemuOpts *opts, Error **errp)
343 int ret;
345 Coroutine *co;
346 CreateCo cco = {
347 .drv = drv,
348 .filename = g_strdup(filename),
349 .opts = opts,
350 .ret = NOT_DONE,
351 .err = NULL,
354 if (!drv->bdrv_create) {
355 error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
356 ret = -ENOTSUP;
357 goto out;
360 if (qemu_in_coroutine()) {
361 /* Fast-path if already in coroutine context */
362 bdrv_create_co_entry(&cco);
363 } else {
364 co = qemu_coroutine_create(bdrv_create_co_entry);
365 qemu_coroutine_enter(co, &cco);
366 while (cco.ret == NOT_DONE) {
367 aio_poll(qemu_get_aio_context(), true);
371 ret = cco.ret;
372 if (ret < 0) {
373 if (cco.err) {
374 error_propagate(errp, cco.err);
375 } else {
376 error_setg_errno(errp, -ret, "Could not create image");
380 out:
381 g_free(cco.filename);
382 return ret;
385 int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp)
387 BlockDriver *drv;
388 Error *local_err = NULL;
389 int ret;
391 drv = bdrv_find_protocol(filename, true, errp);
392 if (drv == NULL) {
393 return -ENOENT;
396 ret = bdrv_create(drv, filename, opts, &local_err);
397 if (local_err) {
398 error_propagate(errp, local_err);
400 return ret;
404 * Try to get @bs's logical and physical block size.
405 * On success, store them in @bsz struct and return 0.
406 * On failure return -errno.
407 * @bs must not be empty.
409 int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
411 BlockDriver *drv = bs->drv;
413 if (drv && drv->bdrv_probe_blocksizes) {
414 return drv->bdrv_probe_blocksizes(bs, bsz);
417 return -ENOTSUP;
421 * Try to get @bs's geometry (cyls, heads, sectors).
422 * On success, store them in @geo struct and return 0.
423 * On failure return -errno.
424 * @bs must not be empty.
426 int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
428 BlockDriver *drv = bs->drv;
430 if (drv && drv->bdrv_probe_geometry) {
431 return drv->bdrv_probe_geometry(bs, geo);
434 return -ENOTSUP;
438 * Create a uniquely-named empty temporary file.
439 * Return 0 upon success, otherwise a negative errno value.
441 int get_tmp_filename(char *filename, int size)
443 #ifdef _WIN32
444 char temp_dir[MAX_PATH];
445 /* GetTempFileName requires that its output buffer (4th param)
446 have length MAX_PATH or greater. */
447 assert(size >= MAX_PATH);
448 return (GetTempPath(MAX_PATH, temp_dir)
449 && GetTempFileName(temp_dir, "qem", 0, filename)
450 ? 0 : -GetLastError());
451 #else
452 int fd;
453 const char *tmpdir;
454 tmpdir = getenv("TMPDIR");
455 if (!tmpdir) {
456 tmpdir = "/var/tmp";
458 if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
459 return -EOVERFLOW;
461 fd = mkstemp(filename);
462 if (fd < 0) {
463 return -errno;
465 if (close(fd) != 0) {
466 unlink(filename);
467 return -errno;
469 return 0;
470 #endif
474 * Detect host devices. By convention, /dev/cdrom[N] is always
475 * recognized as a host CDROM.
477 static BlockDriver *find_hdev_driver(const char *filename)
479 int score_max = 0, score;
480 BlockDriver *drv = NULL, *d;
482 QLIST_FOREACH(d, &bdrv_drivers, list) {
483 if (d->bdrv_probe_device) {
484 score = d->bdrv_probe_device(filename);
485 if (score > score_max) {
486 score_max = score;
487 drv = d;
492 return drv;
495 BlockDriver *bdrv_find_protocol(const char *filename,
496 bool allow_protocol_prefix,
497 Error **errp)
499 BlockDriver *drv1;
500 char protocol[128];
501 int len;
502 const char *p;
504 /* TODO Drivers without bdrv_file_open must be specified explicitly */
507 * XXX(hch): we really should not let host device detection
508 * override an explicit protocol specification, but moving this
509 * later breaks access to device names with colons in them.
510 * Thanks to the brain-dead persistent naming schemes on udev-
511 * based Linux systems those actually are quite common.
513 drv1 = find_hdev_driver(filename);
514 if (drv1) {
515 return drv1;
518 if (!path_has_protocol(filename) || !allow_protocol_prefix) {
519 return &bdrv_file;
522 p = strchr(filename, ':');
523 assert(p != NULL);
524 len = p - filename;
525 if (len > sizeof(protocol) - 1)
526 len = sizeof(protocol) - 1;
527 memcpy(protocol, filename, len);
528 protocol[len] = '\0';
529 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
530 if (drv1->protocol_name &&
531 !strcmp(drv1->protocol_name, protocol)) {
532 return drv1;
536 error_setg(errp, "Unknown protocol '%s'", protocol);
537 return NULL;
541 * Guess image format by probing its contents.
542 * This is not a good idea when your image is raw (CVE-2008-2004), but
543 * we do it anyway for backward compatibility.
545 * @buf contains the image's first @buf_size bytes.
546 * @buf_size is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE,
547 * but can be smaller if the image file is smaller)
548 * @filename is its filename.
550 * For all block drivers, call the bdrv_probe() method to get its
551 * probing score.
552 * Return the first block driver with the highest probing score.
554 BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
555 const char *filename)
557 int score_max = 0, score;
558 BlockDriver *drv = NULL, *d;
560 QLIST_FOREACH(d, &bdrv_drivers, list) {
561 if (d->bdrv_probe) {
562 score = d->bdrv_probe(buf, buf_size, filename);
563 if (score > score_max) {
564 score_max = score;
565 drv = d;
570 return drv;
573 static int find_image_format(BlockDriverState *bs, const char *filename,
574 BlockDriver **pdrv, Error **errp)
576 BlockDriver *drv;
577 uint8_t buf[BLOCK_PROBE_BUF_SIZE];
578 int ret = 0;
580 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
581 if (bdrv_is_sg(bs) || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
582 *pdrv = &bdrv_raw;
583 return ret;
586 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
587 if (ret < 0) {
588 error_setg_errno(errp, -ret, "Could not read image for determining its "
589 "format");
590 *pdrv = NULL;
591 return ret;
594 drv = bdrv_probe_all(buf, ret, filename);
595 if (!drv) {
596 error_setg(errp, "Could not determine image format: No compatible "
597 "driver found");
598 ret = -ENOENT;
600 *pdrv = drv;
601 return ret;
605 * Set the current 'total_sectors' value
606 * Return 0 on success, -errno on error.
608 static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
610 BlockDriver *drv = bs->drv;
612 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
613 if (bdrv_is_sg(bs))
614 return 0;
616 /* query actual device if possible, otherwise just trust the hint */
617 if (drv->bdrv_getlength) {
618 int64_t length = drv->bdrv_getlength(bs);
619 if (length < 0) {
620 return length;
622 hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
625 bs->total_sectors = hint;
626 return 0;
630 * Combines a QDict of new block driver @options with any missing options taken
631 * from @old_options, so that leaving out an option defaults to its old value.
633 static void bdrv_join_options(BlockDriverState *bs, QDict *options,
634 QDict *old_options)
636 if (bs->drv && bs->drv->bdrv_join_options) {
637 bs->drv->bdrv_join_options(options, old_options);
638 } else {
639 qdict_join(options, old_options, false);
644 * Set open flags for a given discard mode
646 * Return 0 on success, -1 if the discard mode was invalid.
648 int bdrv_parse_discard_flags(const char *mode, int *flags)
650 *flags &= ~BDRV_O_UNMAP;
652 if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
653 /* do nothing */
654 } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
655 *flags |= BDRV_O_UNMAP;
656 } else {
657 return -1;
660 return 0;
664 * Set open flags for a given cache mode
666 * Return 0 on success, -1 if the cache mode was invalid.
668 int bdrv_parse_cache_flags(const char *mode, int *flags)
670 *flags &= ~BDRV_O_CACHE_MASK;
672 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
673 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
674 } else if (!strcmp(mode, "directsync")) {
675 *flags |= BDRV_O_NOCACHE;
676 } else if (!strcmp(mode, "writeback")) {
677 *flags |= BDRV_O_CACHE_WB;
678 } else if (!strcmp(mode, "unsafe")) {
679 *flags |= BDRV_O_CACHE_WB;
680 *flags |= BDRV_O_NO_FLUSH;
681 } else if (!strcmp(mode, "writethrough")) {
682 /* this is the default */
683 } else {
684 return -1;
687 return 0;
691 * Returns the flags that a temporary snapshot should get, based on the
692 * originally requested flags (the originally requested image will have flags
693 * like a backing file)
695 static int bdrv_temp_snapshot_flags(int flags)
697 return (flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
701 * Returns the options and flags that bs->file should get if a protocol driver
702 * is expected, based on the given options and flags for the parent BDS
704 static void bdrv_inherited_options(int *child_flags, QDict *child_options,
705 int parent_flags, QDict *parent_options)
707 int flags = parent_flags;
709 /* Enable protocol handling, disable format probing for bs->file */
710 flags |= BDRV_O_PROTOCOL;
712 /* If the cache mode isn't explicitly set, inherit direct and no-flush from
713 * the parent. */
714 qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_DIRECT);
715 qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_NO_FLUSH);
717 /* Our block drivers take care to send flushes and respect unmap policy,
718 * so we can default to enable both on lower layers regardless of the
719 * corresponding parent options. */
720 qdict_set_default_str(child_options, BDRV_OPT_CACHE_WB, "on");
721 flags |= BDRV_O_UNMAP;
723 /* Clear flags that only apply to the top layer */
724 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ);
726 *child_flags = flags;
729 const BdrvChildRole child_file = {
730 .inherit_options = bdrv_inherited_options,
734 * Returns the options and flags that bs->file should get if the use of formats
735 * (and not only protocols) is permitted for it, based on the given options and
736 * flags for the parent BDS
738 static void bdrv_inherited_fmt_options(int *child_flags, QDict *child_options,
739 int parent_flags, QDict *parent_options)
741 child_file.inherit_options(child_flags, child_options,
742 parent_flags, parent_options);
744 *child_flags &= ~BDRV_O_PROTOCOL;
747 const BdrvChildRole child_format = {
748 .inherit_options = bdrv_inherited_fmt_options,
752 * Returns the options and flags that bs->backing should get, based on the
753 * given options and flags for the parent BDS
755 static void bdrv_backing_options(int *child_flags, QDict *child_options,
756 int parent_flags, QDict *parent_options)
758 int flags = parent_flags;
760 /* The cache mode is inherited unmodified for backing files */
761 qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_WB);
762 qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_DIRECT);
763 qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_NO_FLUSH);
765 /* backing files always opened read-only */
766 flags &= ~(BDRV_O_RDWR | BDRV_O_COPY_ON_READ);
768 /* snapshot=on is handled on the top layer */
769 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY);
771 *child_flags = flags;
774 static const BdrvChildRole child_backing = {
775 .inherit_options = bdrv_backing_options,
778 static int bdrv_open_flags(BlockDriverState *bs, int flags)
780 int open_flags = flags | BDRV_O_CACHE_WB;
783 * Clear flags that are internal to the block layer before opening the
784 * image.
786 open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL);
789 * Snapshots should be writable.
791 if (flags & BDRV_O_TEMPORARY) {
792 open_flags |= BDRV_O_RDWR;
795 return open_flags;
798 static void update_flags_from_options(int *flags, QemuOpts *opts)
800 *flags &= ~BDRV_O_CACHE_MASK;
802 assert(qemu_opt_find(opts, BDRV_OPT_CACHE_WB));
803 if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_WB, false)) {
804 *flags |= BDRV_O_CACHE_WB;
807 assert(qemu_opt_find(opts, BDRV_OPT_CACHE_NO_FLUSH));
808 if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_NO_FLUSH, false)) {
809 *flags |= BDRV_O_NO_FLUSH;
812 assert(qemu_opt_find(opts, BDRV_OPT_CACHE_DIRECT));
813 if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_DIRECT, false)) {
814 *flags |= BDRV_O_NOCACHE;
818 static void update_options_from_flags(QDict *options, int flags)
820 if (!qdict_haskey(options, BDRV_OPT_CACHE_WB)) {
821 qdict_put(options, BDRV_OPT_CACHE_WB,
822 qbool_from_bool(flags & BDRV_O_CACHE_WB));
824 if (!qdict_haskey(options, BDRV_OPT_CACHE_DIRECT)) {
825 qdict_put(options, BDRV_OPT_CACHE_DIRECT,
826 qbool_from_bool(flags & BDRV_O_NOCACHE));
828 if (!qdict_haskey(options, BDRV_OPT_CACHE_NO_FLUSH)) {
829 qdict_put(options, BDRV_OPT_CACHE_NO_FLUSH,
830 qbool_from_bool(flags & BDRV_O_NO_FLUSH));
834 static void bdrv_assign_node_name(BlockDriverState *bs,
835 const char *node_name,
836 Error **errp)
838 char *gen_node_name = NULL;
840 if (!node_name) {
841 node_name = gen_node_name = id_generate(ID_BLOCK);
842 } else if (!id_wellformed(node_name)) {
844 * Check for empty string or invalid characters, but not if it is
845 * generated (generated names use characters not available to the user)
847 error_setg(errp, "Invalid node name");
848 return;
851 /* takes care of avoiding namespaces collisions */
852 if (blk_by_name(node_name)) {
853 error_setg(errp, "node-name=%s is conflicting with a device id",
854 node_name);
855 goto out;
858 /* takes care of avoiding duplicates node names */
859 if (bdrv_find_node(node_name)) {
860 error_setg(errp, "Duplicate node name");
861 goto out;
864 /* copy node name into the bs and insert it into the graph list */
865 pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
866 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
867 out:
868 g_free(gen_node_name);
871 static QemuOptsList bdrv_runtime_opts = {
872 .name = "bdrv_common",
873 .head = QTAILQ_HEAD_INITIALIZER(bdrv_runtime_opts.head),
874 .desc = {
876 .name = "node-name",
877 .type = QEMU_OPT_STRING,
878 .help = "Node name of the block device node",
881 .name = "driver",
882 .type = QEMU_OPT_STRING,
883 .help = "Block driver to use for the node",
886 .name = BDRV_OPT_CACHE_WB,
887 .type = QEMU_OPT_BOOL,
888 .help = "Enable writeback mode",
891 .name = BDRV_OPT_CACHE_DIRECT,
892 .type = QEMU_OPT_BOOL,
893 .help = "Bypass software writeback cache on the host",
896 .name = BDRV_OPT_CACHE_NO_FLUSH,
897 .type = QEMU_OPT_BOOL,
898 .help = "Ignore flush requests",
900 { /* end of list */ }
905 * Common part for opening disk images and files
907 * Removes all processed options from *options.
909 static int bdrv_open_common(BlockDriverState *bs, BdrvChild *file,
910 QDict *options, Error **errp)
912 int ret, open_flags;
913 const char *filename;
914 const char *driver_name = NULL;
915 const char *node_name = NULL;
916 QemuOpts *opts;
917 BlockDriver *drv;
918 Error *local_err = NULL;
920 assert(bs->file == NULL);
921 assert(options != NULL && bs->options != options);
923 opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
924 qemu_opts_absorb_qdict(opts, options, &local_err);
925 if (local_err) {
926 error_propagate(errp, local_err);
927 ret = -EINVAL;
928 goto fail_opts;
931 driver_name = qemu_opt_get(opts, "driver");
932 drv = bdrv_find_format(driver_name);
933 assert(drv != NULL);
935 if (file != NULL) {
936 filename = file->bs->filename;
937 } else {
938 filename = qdict_get_try_str(options, "filename");
941 if (drv->bdrv_needs_filename && !filename) {
942 error_setg(errp, "The '%s' block driver requires a file name",
943 drv->format_name);
944 ret = -EINVAL;
945 goto fail_opts;
948 trace_bdrv_open_common(bs, filename ?: "", bs->open_flags,
949 drv->format_name);
951 node_name = qemu_opt_get(opts, "node-name");
952 bdrv_assign_node_name(bs, node_name, &local_err);
953 if (local_err) {
954 error_propagate(errp, local_err);
955 ret = -EINVAL;
956 goto fail_opts;
959 bs->request_alignment = 512;
960 bs->zero_beyond_eof = true;
961 bs->read_only = !(bs->open_flags & BDRV_O_RDWR);
963 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
964 error_setg(errp,
965 !bs->read_only && bdrv_is_whitelisted(drv, true)
966 ? "Driver '%s' can only be used for read-only devices"
967 : "Driver '%s' is not whitelisted",
968 drv->format_name);
969 ret = -ENOTSUP;
970 goto fail_opts;
973 assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
974 if (bs->open_flags & BDRV_O_COPY_ON_READ) {
975 if (!bs->read_only) {
976 bdrv_enable_copy_on_read(bs);
977 } else {
978 error_setg(errp, "Can't use copy-on-read on read-only device");
979 ret = -EINVAL;
980 goto fail_opts;
984 if (filename != NULL) {
985 pstrcpy(bs->filename, sizeof(bs->filename), filename);
986 } else {
987 bs->filename[0] = '\0';
989 pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename);
991 bs->drv = drv;
992 bs->opaque = g_malloc0(drv->instance_size);
994 /* Apply cache mode options */
995 update_flags_from_options(&bs->open_flags, opts);
996 bdrv_set_enable_write_cache(bs, bs->open_flags & BDRV_O_CACHE_WB);
998 /* Open the image, either directly or using a protocol */
999 open_flags = bdrv_open_flags(bs, bs->open_flags);
1000 if (drv->bdrv_file_open) {
1001 assert(file == NULL);
1002 assert(!drv->bdrv_needs_filename || filename != NULL);
1003 ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
1004 } else {
1005 if (file == NULL) {
1006 error_setg(errp, "Can't use '%s' as a block driver for the "
1007 "protocol level", drv->format_name);
1008 ret = -EINVAL;
1009 goto free_and_fail;
1011 bs->file = file;
1012 ret = drv->bdrv_open(bs, options, open_flags, &local_err);
1015 if (ret < 0) {
1016 if (local_err) {
1017 error_propagate(errp, local_err);
1018 } else if (bs->filename[0]) {
1019 error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
1020 } else {
1021 error_setg_errno(errp, -ret, "Could not open image");
1023 goto free_and_fail;
1026 if (bs->encrypted) {
1027 error_report("Encrypted images are deprecated");
1028 error_printf("Support for them will be removed in a future release.\n"
1029 "You can use 'qemu-img convert' to convert your image"
1030 " to an unencrypted one.\n");
1033 ret = refresh_total_sectors(bs, bs->total_sectors);
1034 if (ret < 0) {
1035 error_setg_errno(errp, -ret, "Could not refresh total sector count");
1036 goto free_and_fail;
1039 bdrv_refresh_limits(bs, &local_err);
1040 if (local_err) {
1041 error_propagate(errp, local_err);
1042 ret = -EINVAL;
1043 goto free_and_fail;
1046 assert(bdrv_opt_mem_align(bs) != 0);
1047 assert(bdrv_min_mem_align(bs) != 0);
1048 assert((bs->request_alignment != 0) || bdrv_is_sg(bs));
1050 qemu_opts_del(opts);
1051 return 0;
1053 free_and_fail:
1054 bs->file = NULL;
1055 g_free(bs->opaque);
1056 bs->opaque = NULL;
1057 bs->drv = NULL;
1058 fail_opts:
1059 qemu_opts_del(opts);
1060 return ret;
1063 static QDict *parse_json_filename(const char *filename, Error **errp)
1065 QObject *options_obj;
1066 QDict *options;
1067 int ret;
1069 ret = strstart(filename, "json:", &filename);
1070 assert(ret);
1072 options_obj = qobject_from_json(filename);
1073 if (!options_obj) {
1074 error_setg(errp, "Could not parse the JSON options");
1075 return NULL;
1078 if (qobject_type(options_obj) != QTYPE_QDICT) {
1079 qobject_decref(options_obj);
1080 error_setg(errp, "Invalid JSON object given");
1081 return NULL;
1084 options = qobject_to_qdict(options_obj);
1085 qdict_flatten(options);
1087 return options;
1090 static void parse_json_protocol(QDict *options, const char **pfilename,
1091 Error **errp)
1093 QDict *json_options;
1094 Error *local_err = NULL;
1096 /* Parse json: pseudo-protocol */
1097 if (!*pfilename || !g_str_has_prefix(*pfilename, "json:")) {
1098 return;
1101 json_options = parse_json_filename(*pfilename, &local_err);
1102 if (local_err) {
1103 error_propagate(errp, local_err);
1104 return;
1107 /* Options given in the filename have lower priority than options
1108 * specified directly */
1109 qdict_join(options, json_options, false);
1110 QDECREF(json_options);
1111 *pfilename = NULL;
1115 * Fills in default options for opening images and converts the legacy
1116 * filename/flags pair to option QDict entries.
1117 * The BDRV_O_PROTOCOL flag in *flags will be set or cleared accordingly if a
1118 * block driver has been specified explicitly.
1120 static int bdrv_fill_options(QDict **options, const char *filename,
1121 int *flags, Error **errp)
1123 const char *drvname;
1124 bool protocol = *flags & BDRV_O_PROTOCOL;
1125 bool parse_filename = false;
1126 BlockDriver *drv = NULL;
1127 Error *local_err = NULL;
1129 drvname = qdict_get_try_str(*options, "driver");
1130 if (drvname) {
1131 drv = bdrv_find_format(drvname);
1132 if (!drv) {
1133 error_setg(errp, "Unknown driver '%s'", drvname);
1134 return -ENOENT;
1136 /* If the user has explicitly specified the driver, this choice should
1137 * override the BDRV_O_PROTOCOL flag */
1138 protocol = drv->bdrv_file_open;
1141 if (protocol) {
1142 *flags |= BDRV_O_PROTOCOL;
1143 } else {
1144 *flags &= ~BDRV_O_PROTOCOL;
1147 /* Translate cache options from flags into options */
1148 update_options_from_flags(*options, *flags);
1150 /* Fetch the file name from the options QDict if necessary */
1151 if (protocol && filename) {
1152 if (!qdict_haskey(*options, "filename")) {
1153 qdict_put(*options, "filename", qstring_from_str(filename));
1154 parse_filename = true;
1155 } else {
1156 error_setg(errp, "Can't specify 'file' and 'filename' options at "
1157 "the same time");
1158 return -EINVAL;
1162 /* Find the right block driver */
1163 filename = qdict_get_try_str(*options, "filename");
1165 if (!drvname && protocol) {
1166 if (filename) {
1167 drv = bdrv_find_protocol(filename, parse_filename, errp);
1168 if (!drv) {
1169 return -EINVAL;
1172 drvname = drv->format_name;
1173 qdict_put(*options, "driver", qstring_from_str(drvname));
1174 } else {
1175 error_setg(errp, "Must specify either driver or file");
1176 return -EINVAL;
1180 assert(drv || !protocol);
1182 /* Driver-specific filename parsing */
1183 if (drv && drv->bdrv_parse_filename && parse_filename) {
1184 drv->bdrv_parse_filename(filename, *options, &local_err);
1185 if (local_err) {
1186 error_propagate(errp, local_err);
1187 return -EINVAL;
1190 if (!drv->bdrv_needs_filename) {
1191 qdict_del(*options, "filename");
1195 if (runstate_check(RUN_STATE_INMIGRATE)) {
1196 *flags |= BDRV_O_INACTIVE;
1199 return 0;
1202 static BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
1203 BlockDriverState *child_bs,
1204 const char *child_name,
1205 const BdrvChildRole *child_role)
1207 BdrvChild *child = g_new(BdrvChild, 1);
1208 *child = (BdrvChild) {
1209 .bs = child_bs,
1210 .name = g_strdup(child_name),
1211 .role = child_role,
1214 QLIST_INSERT_HEAD(&parent_bs->children, child, next);
1215 QLIST_INSERT_HEAD(&child_bs->parents, child, next_parent);
1217 return child;
1220 static void bdrv_detach_child(BdrvChild *child)
1222 QLIST_REMOVE(child, next);
1223 QLIST_REMOVE(child, next_parent);
1224 g_free(child->name);
1225 g_free(child);
1228 void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child)
1230 BlockDriverState *child_bs;
1232 if (child == NULL) {
1233 return;
1236 if (child->bs->inherits_from == parent) {
1237 child->bs->inherits_from = NULL;
1240 child_bs = child->bs;
1241 bdrv_detach_child(child);
1242 bdrv_unref(child_bs);
1246 * Sets the backing file link of a BDS. A new reference is created; callers
1247 * which don't need their own reference any more must call bdrv_unref().
1249 void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
1251 if (backing_hd) {
1252 bdrv_ref(backing_hd);
1255 if (bs->backing) {
1256 assert(bs->backing_blocker);
1257 bdrv_op_unblock_all(bs->backing->bs, bs->backing_blocker);
1258 bdrv_unref_child(bs, bs->backing);
1259 } else if (backing_hd) {
1260 error_setg(&bs->backing_blocker,
1261 "node is used as backing hd of '%s'",
1262 bdrv_get_device_or_node_name(bs));
1265 if (!backing_hd) {
1266 error_free(bs->backing_blocker);
1267 bs->backing_blocker = NULL;
1268 bs->backing = NULL;
1269 goto out;
1271 bs->backing = bdrv_attach_child(bs, backing_hd, "backing", &child_backing);
1272 bs->open_flags &= ~BDRV_O_NO_BACKING;
1273 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename);
1274 pstrcpy(bs->backing_format, sizeof(bs->backing_format),
1275 backing_hd->drv ? backing_hd->drv->format_name : "");
1277 bdrv_op_block_all(backing_hd, bs->backing_blocker);
1278 /* Otherwise we won't be able to commit due to check in bdrv_commit */
1279 bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
1280 bs->backing_blocker);
1281 out:
1282 bdrv_refresh_limits(bs, NULL);
1286 * Opens the backing file for a BlockDriverState if not yet open
1288 * bdref_key specifies the key for the image's BlockdevRef in the options QDict.
1289 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1290 * itself, all options starting with "${bdref_key}." are considered part of the
1291 * BlockdevRef.
1293 * TODO Can this be unified with bdrv_open_image()?
1295 int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
1296 const char *bdref_key, Error **errp)
1298 char *backing_filename = g_malloc0(PATH_MAX);
1299 char *bdref_key_dot;
1300 const char *reference = NULL;
1301 int ret = 0;
1302 BlockDriverState *backing_hd;
1303 QDict *options;
1304 QDict *tmp_parent_options = NULL;
1305 Error *local_err = NULL;
1307 if (bs->backing != NULL) {
1308 goto free_exit;
1311 /* NULL means an empty set of options */
1312 if (parent_options == NULL) {
1313 tmp_parent_options = qdict_new();
1314 parent_options = tmp_parent_options;
1317 bs->open_flags &= ~BDRV_O_NO_BACKING;
1319 bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1320 qdict_extract_subqdict(parent_options, &options, bdref_key_dot);
1321 g_free(bdref_key_dot);
1323 reference = qdict_get_try_str(parent_options, bdref_key);
1324 if (reference || qdict_haskey(options, "file.filename")) {
1325 backing_filename[0] = '\0';
1326 } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
1327 QDECREF(options);
1328 goto free_exit;
1329 } else {
1330 bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX,
1331 &local_err);
1332 if (local_err) {
1333 ret = -EINVAL;
1334 error_propagate(errp, local_err);
1335 QDECREF(options);
1336 goto free_exit;
1340 if (!bs->drv || !bs->drv->supports_backing) {
1341 ret = -EINVAL;
1342 error_setg(errp, "Driver doesn't support backing files");
1343 QDECREF(options);
1344 goto free_exit;
1347 if (bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) {
1348 qdict_put(options, "driver", qstring_from_str(bs->backing_format));
1351 backing_hd = NULL;
1352 ret = bdrv_open_inherit(&backing_hd,
1353 *backing_filename ? backing_filename : NULL,
1354 reference, options, 0, bs, &child_backing,
1355 errp);
1356 if (ret < 0) {
1357 bs->open_flags |= BDRV_O_NO_BACKING;
1358 error_prepend(errp, "Could not open backing file: ");
1359 goto free_exit;
1362 /* Hook up the backing file link; drop our reference, bs owns the
1363 * backing_hd reference now */
1364 bdrv_set_backing_hd(bs, backing_hd);
1365 bdrv_unref(backing_hd);
1367 qdict_del(parent_options, bdref_key);
1369 free_exit:
1370 g_free(backing_filename);
1371 QDECREF(tmp_parent_options);
1372 return ret;
1376 * Opens a disk image whose options are given as BlockdevRef in another block
1377 * device's options.
1379 * If allow_none is true, no image will be opened if filename is false and no
1380 * BlockdevRef is given. NULL will be returned, but errp remains unset.
1382 * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
1383 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1384 * itself, all options starting with "${bdref_key}." are considered part of the
1385 * BlockdevRef.
1387 * The BlockdevRef will be removed from the options QDict.
1389 BdrvChild *bdrv_open_child(const char *filename,
1390 QDict *options, const char *bdref_key,
1391 BlockDriverState* parent,
1392 const BdrvChildRole *child_role,
1393 bool allow_none, Error **errp)
1395 BdrvChild *c = NULL;
1396 BlockDriverState *bs;
1397 QDict *image_options;
1398 int ret;
1399 char *bdref_key_dot;
1400 const char *reference;
1402 assert(child_role != NULL);
1404 bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1405 qdict_extract_subqdict(options, &image_options, bdref_key_dot);
1406 g_free(bdref_key_dot);
1408 reference = qdict_get_try_str(options, bdref_key);
1409 if (!filename && !reference && !qdict_size(image_options)) {
1410 if (!allow_none) {
1411 error_setg(errp, "A block device must be specified for \"%s\"",
1412 bdref_key);
1414 QDECREF(image_options);
1415 goto done;
1418 bs = NULL;
1419 ret = bdrv_open_inherit(&bs, filename, reference, image_options, 0,
1420 parent, child_role, errp);
1421 if (ret < 0) {
1422 goto done;
1425 c = bdrv_attach_child(parent, bs, bdref_key, child_role);
1427 done:
1428 qdict_del(options, bdref_key);
1429 return c;
1432 int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
1434 /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
1435 char *tmp_filename = g_malloc0(PATH_MAX + 1);
1436 int64_t total_size;
1437 QemuOpts *opts = NULL;
1438 QDict *snapshot_options;
1439 BlockDriverState *bs_snapshot;
1440 Error *local_err = NULL;
1441 int ret;
1443 /* if snapshot, we create a temporary backing file and open it
1444 instead of opening 'filename' directly */
1446 /* Get the required size from the image */
1447 total_size = bdrv_getlength(bs);
1448 if (total_size < 0) {
1449 ret = total_size;
1450 error_setg_errno(errp, -total_size, "Could not get image size");
1451 goto out;
1454 /* Create the temporary image */
1455 ret = get_tmp_filename(tmp_filename, PATH_MAX + 1);
1456 if (ret < 0) {
1457 error_setg_errno(errp, -ret, "Could not get temporary filename");
1458 goto out;
1461 opts = qemu_opts_create(bdrv_qcow2.create_opts, NULL, 0,
1462 &error_abort);
1463 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size, &error_abort);
1464 ret = bdrv_create(&bdrv_qcow2, tmp_filename, opts, errp);
1465 qemu_opts_del(opts);
1466 if (ret < 0) {
1467 error_prepend(errp, "Could not create temporary overlay '%s': ",
1468 tmp_filename);
1469 goto out;
1472 /* Prepare a new options QDict for the temporary file */
1473 snapshot_options = qdict_new();
1474 qdict_put(snapshot_options, "file.driver",
1475 qstring_from_str("file"));
1476 qdict_put(snapshot_options, "file.filename",
1477 qstring_from_str(tmp_filename));
1478 qdict_put(snapshot_options, "driver",
1479 qstring_from_str("qcow2"));
1481 bs_snapshot = bdrv_new();
1483 ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options,
1484 flags, &local_err);
1485 if (ret < 0) {
1486 error_propagate(errp, local_err);
1487 goto out;
1490 bdrv_append(bs_snapshot, bs);
1492 out:
1493 g_free(tmp_filename);
1494 return ret;
1498 * Opens a disk image (raw, qcow2, vmdk, ...)
1500 * options is a QDict of options to pass to the block drivers, or NULL for an
1501 * empty set of options. The reference to the QDict belongs to the block layer
1502 * after the call (even on failure), so if the caller intends to reuse the
1503 * dictionary, it needs to use QINCREF() before calling bdrv_open.
1505 * If *pbs is NULL, a new BDS will be created with a pointer to it stored there.
1506 * If it is not NULL, the referenced BDS will be reused.
1508 * The reference parameter may be used to specify an existing block device which
1509 * should be opened. If specified, neither options nor a filename may be given,
1510 * nor can an existing BDS be reused (that is, *pbs has to be NULL).
1512 static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
1513 const char *reference, QDict *options, int flags,
1514 BlockDriverState *parent,
1515 const BdrvChildRole *child_role, Error **errp)
1517 int ret;
1518 BdrvChild *file = NULL;
1519 BlockDriverState *bs;
1520 BlockDriver *drv = NULL;
1521 const char *drvname;
1522 const char *backing;
1523 Error *local_err = NULL;
1524 int snapshot_flags = 0;
1526 assert(pbs);
1527 assert(!child_role || !flags);
1528 assert(!child_role == !parent);
1530 if (reference) {
1531 bool options_non_empty = options ? qdict_size(options) : false;
1532 QDECREF(options);
1534 if (*pbs) {
1535 error_setg(errp, "Cannot reuse an existing BDS when referencing "
1536 "another block device");
1537 return -EINVAL;
1540 if (filename || options_non_empty) {
1541 error_setg(errp, "Cannot reference an existing block device with "
1542 "additional options or a new filename");
1543 return -EINVAL;
1546 bs = bdrv_lookup_bs(reference, reference, errp);
1547 if (!bs) {
1548 return -ENODEV;
1550 bdrv_ref(bs);
1551 *pbs = bs;
1552 return 0;
1555 if (*pbs) {
1556 bs = *pbs;
1557 } else {
1558 bs = bdrv_new();
1561 /* NULL means an empty set of options */
1562 if (options == NULL) {
1563 options = qdict_new();
1566 /* json: syntax counts as explicit options, as if in the QDict */
1567 parse_json_protocol(options, &filename, &local_err);
1568 if (local_err) {
1569 ret = -EINVAL;
1570 goto fail;
1573 bs->explicit_options = qdict_clone_shallow(options);
1575 if (child_role) {
1576 bs->inherits_from = parent;
1577 child_role->inherit_options(&flags, options,
1578 parent->open_flags, parent->options);
1581 ret = bdrv_fill_options(&options, filename, &flags, &local_err);
1582 if (local_err) {
1583 goto fail;
1586 bs->open_flags = flags;
1587 bs->options = options;
1588 options = qdict_clone_shallow(options);
1590 /* Find the right image format driver */
1591 drvname = qdict_get_try_str(options, "driver");
1592 if (drvname) {
1593 drv = bdrv_find_format(drvname);
1594 if (!drv) {
1595 error_setg(errp, "Unknown driver: '%s'", drvname);
1596 ret = -EINVAL;
1597 goto fail;
1601 assert(drvname || !(flags & BDRV_O_PROTOCOL));
1603 backing = qdict_get_try_str(options, "backing");
1604 if (backing && *backing == '\0') {
1605 flags |= BDRV_O_NO_BACKING;
1606 qdict_del(options, "backing");
1609 /* Open image file without format layer */
1610 if ((flags & BDRV_O_PROTOCOL) == 0) {
1611 if (flags & BDRV_O_RDWR) {
1612 flags |= BDRV_O_ALLOW_RDWR;
1614 if (flags & BDRV_O_SNAPSHOT) {
1615 snapshot_flags = bdrv_temp_snapshot_flags(flags);
1616 bdrv_backing_options(&flags, options, flags, options);
1619 bs->open_flags = flags;
1621 file = bdrv_open_child(filename, options, "file", bs,
1622 &child_file, true, &local_err);
1623 if (local_err) {
1624 ret = -EINVAL;
1625 goto fail;
1629 /* Image format probing */
1630 bs->probed = !drv;
1631 if (!drv && file) {
1632 ret = find_image_format(file->bs, filename, &drv, &local_err);
1633 if (ret < 0) {
1634 goto fail;
1637 * This option update would logically belong in bdrv_fill_options(),
1638 * but we first need to open bs->file for the probing to work, while
1639 * opening bs->file already requires the (mostly) final set of options
1640 * so that cache mode etc. can be inherited.
1642 * Adding the driver later is somewhat ugly, but it's not an option
1643 * that would ever be inherited, so it's correct. We just need to make
1644 * sure to update both bs->options (which has the full effective
1645 * options for bs) and options (which has file.* already removed).
1647 qdict_put(bs->options, "driver", qstring_from_str(drv->format_name));
1648 qdict_put(options, "driver", qstring_from_str(drv->format_name));
1649 } else if (!drv) {
1650 error_setg(errp, "Must specify either driver or file");
1651 ret = -EINVAL;
1652 goto fail;
1655 /* BDRV_O_PROTOCOL must be set iff a protocol BDS is about to be created */
1656 assert(!!(flags & BDRV_O_PROTOCOL) == !!drv->bdrv_file_open);
1657 /* file must be NULL if a protocol BDS is about to be created
1658 * (the inverse results in an error message from bdrv_open_common()) */
1659 assert(!(flags & BDRV_O_PROTOCOL) || !file);
1661 /* Open the image */
1662 ret = bdrv_open_common(bs, file, options, &local_err);
1663 if (ret < 0) {
1664 goto fail;
1667 if (file && (bs->file != file)) {
1668 bdrv_unref_child(bs, file);
1669 file = NULL;
1672 /* If there is a backing file, use it */
1673 if ((flags & BDRV_O_NO_BACKING) == 0) {
1674 ret = bdrv_open_backing_file(bs, options, "backing", &local_err);
1675 if (ret < 0) {
1676 goto close_and_fail;
1680 bdrv_refresh_filename(bs);
1682 /* Check if any unknown options were used */
1683 if (options && (qdict_size(options) != 0)) {
1684 const QDictEntry *entry = qdict_first(options);
1685 if (flags & BDRV_O_PROTOCOL) {
1686 error_setg(errp, "Block protocol '%s' doesn't support the option "
1687 "'%s'", drv->format_name, entry->key);
1688 } else {
1689 error_setg(errp, "Block format '%s' used by device '%s' doesn't "
1690 "support the option '%s'", drv->format_name,
1691 bdrv_get_device_name(bs), entry->key);
1694 ret = -EINVAL;
1695 goto close_and_fail;
1698 if (!bdrv_key_required(bs)) {
1699 if (bs->blk) {
1700 blk_dev_change_media_cb(bs->blk, true);
1702 } else if (!runstate_check(RUN_STATE_PRELAUNCH)
1703 && !runstate_check(RUN_STATE_INMIGRATE)
1704 && !runstate_check(RUN_STATE_PAUSED)) { /* HACK */
1705 error_setg(errp,
1706 "Guest must be stopped for opening of encrypted image");
1707 ret = -EBUSY;
1708 goto close_and_fail;
1711 QDECREF(options);
1712 *pbs = bs;
1714 /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
1715 * temporary snapshot afterwards. */
1716 if (snapshot_flags) {
1717 ret = bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err);
1718 if (local_err) {
1719 goto close_and_fail;
1723 return 0;
1725 fail:
1726 if (file != NULL) {
1727 bdrv_unref_child(bs, file);
1729 QDECREF(bs->explicit_options);
1730 QDECREF(bs->options);
1731 QDECREF(options);
1732 bs->options = NULL;
1733 if (!*pbs) {
1734 /* If *pbs is NULL, a new BDS has been created in this function and
1735 needs to be freed now. Otherwise, it does not need to be closed,
1736 since it has not really been opened yet. */
1737 bdrv_unref(bs);
1739 if (local_err) {
1740 error_propagate(errp, local_err);
1742 return ret;
1744 close_and_fail:
1745 /* See fail path, but now the BDS has to be always closed */
1746 if (*pbs) {
1747 bdrv_close(bs);
1748 } else {
1749 bdrv_unref(bs);
1751 QDECREF(options);
1752 if (local_err) {
1753 error_propagate(errp, local_err);
1755 return ret;
1758 int bdrv_open(BlockDriverState **pbs, const char *filename,
1759 const char *reference, QDict *options, int flags, Error **errp)
1761 return bdrv_open_inherit(pbs, filename, reference, options, flags, NULL,
1762 NULL, errp);
1765 typedef struct BlockReopenQueueEntry {
1766 bool prepared;
1767 BDRVReopenState state;
1768 QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
1769 } BlockReopenQueueEntry;
1772 * Adds a BlockDriverState to a simple queue for an atomic, transactional
1773 * reopen of multiple devices.
1775 * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
1776 * already performed, or alternatively may be NULL a new BlockReopenQueue will
1777 * be created and initialized. This newly created BlockReopenQueue should be
1778 * passed back in for subsequent calls that are intended to be of the same
1779 * atomic 'set'.
1781 * bs is the BlockDriverState to add to the reopen queue.
1783 * options contains the changed options for the associated bs
1784 * (the BlockReopenQueue takes ownership)
1786 * flags contains the open flags for the associated bs
1788 * returns a pointer to bs_queue, which is either the newly allocated
1789 * bs_queue, or the existing bs_queue being used.
1792 static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
1793 BlockDriverState *bs,
1794 QDict *options,
1795 int flags,
1796 const BdrvChildRole *role,
1797 QDict *parent_options,
1798 int parent_flags)
1800 assert(bs != NULL);
1802 BlockReopenQueueEntry *bs_entry;
1803 BdrvChild *child;
1804 QDict *old_options, *explicit_options;
1806 if (bs_queue == NULL) {
1807 bs_queue = g_new0(BlockReopenQueue, 1);
1808 QSIMPLEQ_INIT(bs_queue);
1811 if (!options) {
1812 options = qdict_new();
1816 * Precedence of options:
1817 * 1. Explicitly passed in options (highest)
1818 * 2. Set in flags (only for top level)
1819 * 3. Retained from explicitly set options of bs
1820 * 4. Inherited from parent node
1821 * 5. Retained from effective options of bs
1824 if (!parent_options) {
1826 * Any setting represented by flags is always updated. If the
1827 * corresponding QDict option is set, it takes precedence. Otherwise
1828 * the flag is translated into a QDict option. The old setting of bs is
1829 * not considered.
1831 update_options_from_flags(options, flags);
1834 /* Old explicitly set values (don't overwrite by inherited value) */
1835 old_options = qdict_clone_shallow(bs->explicit_options);
1836 bdrv_join_options(bs, options, old_options);
1837 QDECREF(old_options);
1839 explicit_options = qdict_clone_shallow(options);
1841 /* Inherit from parent node */
1842 if (parent_options) {
1843 assert(!flags);
1844 role->inherit_options(&flags, options, parent_flags, parent_options);
1847 /* Old values are used for options that aren't set yet */
1848 old_options = qdict_clone_shallow(bs->options);
1849 bdrv_join_options(bs, options, old_options);
1850 QDECREF(old_options);
1852 /* bdrv_open() masks this flag out */
1853 flags &= ~BDRV_O_PROTOCOL;
1855 QLIST_FOREACH(child, &bs->children, next) {
1856 QDict *new_child_options;
1857 char *child_key_dot;
1859 /* reopen can only change the options of block devices that were
1860 * implicitly created and inherited options. For other (referenced)
1861 * block devices, a syntax like "backing.foo" results in an error. */
1862 if (child->bs->inherits_from != bs) {
1863 continue;
1866 child_key_dot = g_strdup_printf("%s.", child->name);
1867 qdict_extract_subqdict(options, &new_child_options, child_key_dot);
1868 g_free(child_key_dot);
1870 bdrv_reopen_queue_child(bs_queue, child->bs, new_child_options, 0,
1871 child->role, options, flags);
1874 bs_entry = g_new0(BlockReopenQueueEntry, 1);
1875 QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
1877 bs_entry->state.bs = bs;
1878 bs_entry->state.options = options;
1879 bs_entry->state.explicit_options = explicit_options;
1880 bs_entry->state.flags = flags;
1882 return bs_queue;
1885 BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
1886 BlockDriverState *bs,
1887 QDict *options, int flags)
1889 return bdrv_reopen_queue_child(bs_queue, bs, options, flags,
1890 NULL, NULL, 0);
1894 * Reopen multiple BlockDriverStates atomically & transactionally.
1896 * The queue passed in (bs_queue) must have been built up previous
1897 * via bdrv_reopen_queue().
1899 * Reopens all BDS specified in the queue, with the appropriate
1900 * flags. All devices are prepared for reopen, and failure of any
1901 * device will cause all device changes to be abandonded, and intermediate
1902 * data cleaned up.
1904 * If all devices prepare successfully, then the changes are committed
1905 * to all devices.
1908 int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
1910 int ret = -1;
1911 BlockReopenQueueEntry *bs_entry, *next;
1912 Error *local_err = NULL;
1914 assert(bs_queue != NULL);
1916 bdrv_drain_all();
1918 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1919 if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
1920 error_propagate(errp, local_err);
1921 goto cleanup;
1923 bs_entry->prepared = true;
1926 /* If we reach this point, we have success and just need to apply the
1927 * changes
1929 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1930 bdrv_reopen_commit(&bs_entry->state);
1933 ret = 0;
1935 cleanup:
1936 QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
1937 if (ret && bs_entry->prepared) {
1938 bdrv_reopen_abort(&bs_entry->state);
1939 } else if (ret) {
1940 QDECREF(bs_entry->state.explicit_options);
1942 QDECREF(bs_entry->state.options);
1943 g_free(bs_entry);
1945 g_free(bs_queue);
1946 return ret;
1950 /* Reopen a single BlockDriverState with the specified flags. */
1951 int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
1953 int ret = -1;
1954 Error *local_err = NULL;
1955 BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, NULL, bdrv_flags);
1957 ret = bdrv_reopen_multiple(queue, &local_err);
1958 if (local_err != NULL) {
1959 error_propagate(errp, local_err);
1961 return ret;
1966 * Prepares a BlockDriverState for reopen. All changes are staged in the
1967 * 'opaque' field of the BDRVReopenState, which is used and allocated by
1968 * the block driver layer .bdrv_reopen_prepare()
1970 * bs is the BlockDriverState to reopen
1971 * flags are the new open flags
1972 * queue is the reopen queue
1974 * Returns 0 on success, non-zero on error. On error errp will be set
1975 * as well.
1977 * On failure, bdrv_reopen_abort() will be called to clean up any data.
1978 * It is the responsibility of the caller to then call the abort() or
1979 * commit() for any other BDS that have been left in a prepare() state
1982 int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
1983 Error **errp)
1985 int ret = -1;
1986 Error *local_err = NULL;
1987 BlockDriver *drv;
1988 QemuOpts *opts;
1989 const char *value;
1991 assert(reopen_state != NULL);
1992 assert(reopen_state->bs->drv != NULL);
1993 drv = reopen_state->bs->drv;
1995 /* Process generic block layer options */
1996 opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
1997 qemu_opts_absorb_qdict(opts, reopen_state->options, &local_err);
1998 if (local_err) {
1999 error_propagate(errp, local_err);
2000 ret = -EINVAL;
2001 goto error;
2004 update_flags_from_options(&reopen_state->flags, opts);
2006 /* If a guest device is attached, it owns WCE */
2007 if (reopen_state->bs->blk && blk_get_attached_dev(reopen_state->bs->blk)) {
2008 bool old_wce = bdrv_enable_write_cache(reopen_state->bs);
2009 bool new_wce = (reopen_state->flags & BDRV_O_CACHE_WB);
2010 if (old_wce != new_wce) {
2011 error_setg(errp, "Cannot change cache.writeback: Device attached");
2012 ret = -EINVAL;
2013 goto error;
2017 /* node-name and driver must be unchanged. Put them back into the QDict, so
2018 * that they are checked at the end of this function. */
2019 value = qemu_opt_get(opts, "node-name");
2020 if (value) {
2021 qdict_put(reopen_state->options, "node-name", qstring_from_str(value));
2024 value = qemu_opt_get(opts, "driver");
2025 if (value) {
2026 qdict_put(reopen_state->options, "driver", qstring_from_str(value));
2029 /* if we are to stay read-only, do not allow permission change
2030 * to r/w */
2031 if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
2032 reopen_state->flags & BDRV_O_RDWR) {
2033 error_setg(errp, "Node '%s' is read only",
2034 bdrv_get_device_or_node_name(reopen_state->bs));
2035 goto error;
2039 ret = bdrv_flush(reopen_state->bs);
2040 if (ret) {
2041 error_setg_errno(errp, -ret, "Error flushing drive");
2042 goto error;
2045 if (drv->bdrv_reopen_prepare) {
2046 ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
2047 if (ret) {
2048 if (local_err != NULL) {
2049 error_propagate(errp, local_err);
2050 } else {
2051 error_setg(errp, "failed while preparing to reopen image '%s'",
2052 reopen_state->bs->filename);
2054 goto error;
2056 } else {
2057 /* It is currently mandatory to have a bdrv_reopen_prepare()
2058 * handler for each supported drv. */
2059 error_setg(errp, "Block format '%s' used by node '%s' "
2060 "does not support reopening files", drv->format_name,
2061 bdrv_get_device_or_node_name(reopen_state->bs));
2062 ret = -1;
2063 goto error;
2066 /* Options that are not handled are only okay if they are unchanged
2067 * compared to the old state. It is expected that some options are only
2068 * used for the initial open, but not reopen (e.g. filename) */
2069 if (qdict_size(reopen_state->options)) {
2070 const QDictEntry *entry = qdict_first(reopen_state->options);
2072 do {
2073 QString *new_obj = qobject_to_qstring(entry->value);
2074 const char *new = qstring_get_str(new_obj);
2075 const char *old = qdict_get_try_str(reopen_state->bs->options,
2076 entry->key);
2078 if (!old || strcmp(new, old)) {
2079 error_setg(errp, "Cannot change the option '%s'", entry->key);
2080 ret = -EINVAL;
2081 goto error;
2083 } while ((entry = qdict_next(reopen_state->options, entry)));
2086 ret = 0;
2088 error:
2089 qemu_opts_del(opts);
2090 return ret;
2094 * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
2095 * makes them final by swapping the staging BlockDriverState contents into
2096 * the active BlockDriverState contents.
2098 void bdrv_reopen_commit(BDRVReopenState *reopen_state)
2100 BlockDriver *drv;
2102 assert(reopen_state != NULL);
2103 drv = reopen_state->bs->drv;
2104 assert(drv != NULL);
2106 /* If there are any driver level actions to take */
2107 if (drv->bdrv_reopen_commit) {
2108 drv->bdrv_reopen_commit(reopen_state);
2111 /* set BDS specific flags now */
2112 QDECREF(reopen_state->bs->explicit_options);
2114 reopen_state->bs->explicit_options = reopen_state->explicit_options;
2115 reopen_state->bs->open_flags = reopen_state->flags;
2116 reopen_state->bs->enable_write_cache = !!(reopen_state->flags &
2117 BDRV_O_CACHE_WB);
2118 reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
2120 bdrv_refresh_limits(reopen_state->bs, NULL);
2124 * Abort the reopen, and delete and free the staged changes in
2125 * reopen_state
2127 void bdrv_reopen_abort(BDRVReopenState *reopen_state)
2129 BlockDriver *drv;
2131 assert(reopen_state != NULL);
2132 drv = reopen_state->bs->drv;
2133 assert(drv != NULL);
2135 if (drv->bdrv_reopen_abort) {
2136 drv->bdrv_reopen_abort(reopen_state);
2139 QDECREF(reopen_state->explicit_options);
2143 void bdrv_close(BlockDriverState *bs)
2145 BdrvAioNotifier *ban, *ban_next;
2147 if (bs->job) {
2148 block_job_cancel_sync(bs->job);
2151 /* Disable I/O limits and drain all pending throttled requests */
2152 if (bs->throttle_state) {
2153 bdrv_io_limits_disable(bs);
2156 bdrv_drained_begin(bs); /* complete I/O */
2157 bdrv_flush(bs);
2158 bdrv_drain(bs); /* in case flush left pending I/O */
2160 notifier_list_notify(&bs->close_notifiers, bs);
2162 bdrv_release_named_dirty_bitmaps(bs);
2163 assert(QLIST_EMPTY(&bs->dirty_bitmaps));
2165 if (bs->blk) {
2166 blk_dev_change_media_cb(bs->blk, false);
2169 if (bs->drv) {
2170 BdrvChild *child, *next;
2172 bs->drv->bdrv_close(bs);
2173 bs->drv = NULL;
2175 bdrv_set_backing_hd(bs, NULL);
2177 if (bs->file != NULL) {
2178 bdrv_unref_child(bs, bs->file);
2179 bs->file = NULL;
2182 QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
2183 /* TODO Remove bdrv_unref() from drivers' close function and use
2184 * bdrv_unref_child() here */
2185 if (child->bs->inherits_from == bs) {
2186 child->bs->inherits_from = NULL;
2188 bdrv_detach_child(child);
2191 g_free(bs->opaque);
2192 bs->opaque = NULL;
2193 bs->copy_on_read = 0;
2194 bs->backing_file[0] = '\0';
2195 bs->backing_format[0] = '\0';
2196 bs->total_sectors = 0;
2197 bs->encrypted = 0;
2198 bs->valid_key = 0;
2199 bs->sg = 0;
2200 bs->zero_beyond_eof = false;
2201 QDECREF(bs->options);
2202 QDECREF(bs->explicit_options);
2203 bs->options = NULL;
2204 QDECREF(bs->full_open_options);
2205 bs->full_open_options = NULL;
2208 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
2209 g_free(ban);
2211 QLIST_INIT(&bs->aio_notifiers);
2212 bdrv_drained_end(bs);
2215 void bdrv_close_all(void)
2217 BlockDriverState *bs;
2219 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
2220 AioContext *aio_context = bdrv_get_aio_context(bs);
2222 aio_context_acquire(aio_context);
2223 bdrv_close(bs);
2224 aio_context_release(aio_context);
2228 /* make a BlockDriverState anonymous by removing from bdrv_state and
2229 * graph_bdrv_state list.
2230 Also, NULL terminate the device_name to prevent double remove */
2231 void bdrv_make_anon(BlockDriverState *bs)
2234 * Take care to remove bs from bdrv_states only when it's actually
2235 * in it. Note that bs->device_list.tqe_prev is initially null,
2236 * and gets set to non-null by QTAILQ_INSERT_TAIL(). Establish
2237 * the useful invariant "bs in bdrv_states iff bs->tqe_prev" by
2238 * resetting it to null on remove.
2240 if (bs->device_list.tqe_prev) {
2241 QTAILQ_REMOVE(&bdrv_states, bs, device_list);
2242 bs->device_list.tqe_prev = NULL;
2244 if (bs->node_name[0] != '\0') {
2245 QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
2247 bs->node_name[0] = '\0';
2250 /* Fields that need to stay with the top-level BDS */
2251 static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
2252 BlockDriverState *bs_src)
2254 /* move some fields that need to stay attached to the device */
2256 /* dev info */
2257 bs_dest->copy_on_read = bs_src->copy_on_read;
2259 bs_dest->enable_write_cache = bs_src->enable_write_cache;
2261 /* dirty bitmap */
2262 bs_dest->dirty_bitmaps = bs_src->dirty_bitmaps;
2265 static void change_parent_backing_link(BlockDriverState *from,
2266 BlockDriverState *to)
2268 BdrvChild *c, *next;
2270 QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) {
2271 assert(c->role != &child_backing);
2272 c->bs = to;
2273 QLIST_REMOVE(c, next_parent);
2274 QLIST_INSERT_HEAD(&to->parents, c, next_parent);
2275 bdrv_ref(to);
2276 bdrv_unref(from);
2278 if (from->blk) {
2279 blk_set_bs(from->blk, to);
2280 if (!to->device_list.tqe_prev) {
2281 QTAILQ_INSERT_BEFORE(from, to, device_list);
2283 QTAILQ_REMOVE(&bdrv_states, from, device_list);
2287 static void swap_feature_fields(BlockDriverState *bs_top,
2288 BlockDriverState *bs_new)
2290 BlockDriverState tmp;
2292 bdrv_move_feature_fields(&tmp, bs_top);
2293 bdrv_move_feature_fields(bs_top, bs_new);
2294 bdrv_move_feature_fields(bs_new, &tmp);
2296 assert(!bs_new->throttle_state);
2297 if (bs_top->throttle_state) {
2298 assert(bs_top->io_limits_enabled);
2299 bdrv_io_limits_enable(bs_new, throttle_group_get_name(bs_top));
2300 bdrv_io_limits_disable(bs_top);
2305 * Add new bs contents at the top of an image chain while the chain is
2306 * live, while keeping required fields on the top layer.
2308 * This will modify the BlockDriverState fields, and swap contents
2309 * between bs_new and bs_top. Both bs_new and bs_top are modified.
2311 * bs_new must not be attached to a BlockBackend.
2313 * This function does not create any image files.
2315 * bdrv_append() takes ownership of a bs_new reference and unrefs it because
2316 * that's what the callers commonly need. bs_new will be referenced by the old
2317 * parents of bs_top after bdrv_append() returns. If the caller needs to keep a
2318 * reference of its own, it must call bdrv_ref().
2320 void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
2322 assert(!bdrv_requests_pending(bs_top));
2323 assert(!bdrv_requests_pending(bs_new));
2325 bdrv_ref(bs_top);
2326 change_parent_backing_link(bs_top, bs_new);
2328 /* Some fields always stay on top of the backing file chain */
2329 swap_feature_fields(bs_top, bs_new);
2331 bdrv_set_backing_hd(bs_new, bs_top);
2332 bdrv_unref(bs_top);
2334 /* bs_new is now referenced by its new parents, we don't need the
2335 * additional reference any more. */
2336 bdrv_unref(bs_new);
2339 void bdrv_replace_in_backing_chain(BlockDriverState *old, BlockDriverState *new)
2341 assert(!bdrv_requests_pending(old));
2342 assert(!bdrv_requests_pending(new));
2344 bdrv_ref(old);
2346 if (old->blk) {
2347 /* As long as these fields aren't in BlockBackend, but in the top-level
2348 * BlockDriverState, it's not possible for a BDS to have two BBs.
2350 * We really want to copy the fields from old to new, but we go for a
2351 * swap instead so that pointers aren't duplicated and cause trouble.
2352 * (Also, bdrv_swap() used to do the same.) */
2353 assert(!new->blk);
2354 swap_feature_fields(old, new);
2356 change_parent_backing_link(old, new);
2358 /* Change backing files if a previously independent node is added to the
2359 * chain. For active commit, we replace top by its own (indirect) backing
2360 * file and don't do anything here so we don't build a loop. */
2361 if (new->backing == NULL && !bdrv_chain_contains(backing_bs(old), new)) {
2362 bdrv_set_backing_hd(new, backing_bs(old));
2363 bdrv_set_backing_hd(old, NULL);
2366 bdrv_unref(old);
2369 static void bdrv_delete(BlockDriverState *bs)
2371 assert(!bs->job);
2372 assert(bdrv_op_blocker_is_empty(bs));
2373 assert(!bs->refcnt);
2375 bdrv_close(bs);
2377 /* remove from list, if necessary */
2378 bdrv_make_anon(bs);
2380 g_free(bs);
2384 * Run consistency checks on an image
2386 * Returns 0 if the check could be completed (it doesn't mean that the image is
2387 * free of errors) or -errno when an internal error occurred. The results of the
2388 * check are stored in res.
2390 int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
2392 if (bs->drv == NULL) {
2393 return -ENOMEDIUM;
2395 if (bs->drv->bdrv_check == NULL) {
2396 return -ENOTSUP;
2399 memset(res, 0, sizeof(*res));
2400 return bs->drv->bdrv_check(bs, res, fix);
2403 #define COMMIT_BUF_SECTORS 2048
2405 /* commit COW file into the raw image */
2406 int bdrv_commit(BlockDriverState *bs)
2408 BlockDriver *drv = bs->drv;
2409 int64_t sector, total_sectors, length, backing_length;
2410 int n, ro, open_flags;
2411 int ret = 0;
2412 uint8_t *buf = NULL;
2414 if (!drv)
2415 return -ENOMEDIUM;
2417 if (!bs->backing) {
2418 return -ENOTSUP;
2421 if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) ||
2422 bdrv_op_is_blocked(bs->backing->bs, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) {
2423 return -EBUSY;
2426 ro = bs->backing->bs->read_only;
2427 open_flags = bs->backing->bs->open_flags;
2429 if (ro) {
2430 if (bdrv_reopen(bs->backing->bs, open_flags | BDRV_O_RDWR, NULL)) {
2431 return -EACCES;
2435 length = bdrv_getlength(bs);
2436 if (length < 0) {
2437 ret = length;
2438 goto ro_cleanup;
2441 backing_length = bdrv_getlength(bs->backing->bs);
2442 if (backing_length < 0) {
2443 ret = backing_length;
2444 goto ro_cleanup;
2447 /* If our top snapshot is larger than the backing file image,
2448 * grow the backing file image if possible. If not possible,
2449 * we must return an error */
2450 if (length > backing_length) {
2451 ret = bdrv_truncate(bs->backing->bs, length);
2452 if (ret < 0) {
2453 goto ro_cleanup;
2457 total_sectors = length >> BDRV_SECTOR_BITS;
2459 /* qemu_try_blockalign() for bs will choose an alignment that works for
2460 * bs->backing->bs as well, so no need to compare the alignment manually. */
2461 buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
2462 if (buf == NULL) {
2463 ret = -ENOMEM;
2464 goto ro_cleanup;
2467 for (sector = 0; sector < total_sectors; sector += n) {
2468 ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
2469 if (ret < 0) {
2470 goto ro_cleanup;
2472 if (ret) {
2473 ret = bdrv_read(bs, sector, buf, n);
2474 if (ret < 0) {
2475 goto ro_cleanup;
2478 ret = bdrv_write(bs->backing->bs, sector, buf, n);
2479 if (ret < 0) {
2480 goto ro_cleanup;
2485 if (drv->bdrv_make_empty) {
2486 ret = drv->bdrv_make_empty(bs);
2487 if (ret < 0) {
2488 goto ro_cleanup;
2490 bdrv_flush(bs);
2494 * Make sure all data we wrote to the backing device is actually
2495 * stable on disk.
2497 if (bs->backing) {
2498 bdrv_flush(bs->backing->bs);
2501 ret = 0;
2502 ro_cleanup:
2503 qemu_vfree(buf);
2505 if (ro) {
2506 /* ignoring error return here */
2507 bdrv_reopen(bs->backing->bs, open_flags & ~BDRV_O_RDWR, NULL);
2510 return ret;
2513 int bdrv_commit_all(void)
2515 BlockDriverState *bs;
2517 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
2518 AioContext *aio_context = bdrv_get_aio_context(bs);
2520 aio_context_acquire(aio_context);
2521 if (bs->drv && bs->backing) {
2522 int ret = bdrv_commit(bs);
2523 if (ret < 0) {
2524 aio_context_release(aio_context);
2525 return ret;
2528 aio_context_release(aio_context);
2530 return 0;
2534 * Return values:
2535 * 0 - success
2536 * -EINVAL - backing format specified, but no file
2537 * -ENOSPC - can't update the backing file because no space is left in the
2538 * image file header
2539 * -ENOTSUP - format driver doesn't support changing the backing file
2541 int bdrv_change_backing_file(BlockDriverState *bs,
2542 const char *backing_file, const char *backing_fmt)
2544 BlockDriver *drv = bs->drv;
2545 int ret;
2547 /* Backing file format doesn't make sense without a backing file */
2548 if (backing_fmt && !backing_file) {
2549 return -EINVAL;
2552 if (drv->bdrv_change_backing_file != NULL) {
2553 ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
2554 } else {
2555 ret = -ENOTSUP;
2558 if (ret == 0) {
2559 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
2560 pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
2562 return ret;
2566 * Finds the image layer in the chain that has 'bs' as its backing file.
2568 * active is the current topmost image.
2570 * Returns NULL if bs is not found in active's image chain,
2571 * or if active == bs.
2573 * Returns the bottommost base image if bs == NULL.
2575 BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
2576 BlockDriverState *bs)
2578 while (active && bs != backing_bs(active)) {
2579 active = backing_bs(active);
2582 return active;
2585 /* Given a BDS, searches for the base layer. */
2586 BlockDriverState *bdrv_find_base(BlockDriverState *bs)
2588 return bdrv_find_overlay(bs, NULL);
2592 * Drops images above 'base' up to and including 'top', and sets the image
2593 * above 'top' to have base as its backing file.
2595 * Requires that the overlay to 'top' is opened r/w, so that the backing file
2596 * information in 'bs' can be properly updated.
2598 * E.g., this will convert the following chain:
2599 * bottom <- base <- intermediate <- top <- active
2601 * to
2603 * bottom <- base <- active
2605 * It is allowed for bottom==base, in which case it converts:
2607 * base <- intermediate <- top <- active
2609 * to
2611 * base <- active
2613 * If backing_file_str is non-NULL, it will be used when modifying top's
2614 * overlay image metadata.
2616 * Error conditions:
2617 * if active == top, that is considered an error
2620 int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
2621 BlockDriverState *base, const char *backing_file_str)
2623 BlockDriverState *new_top_bs = NULL;
2624 int ret = -EIO;
2626 if (!top->drv || !base->drv) {
2627 goto exit;
2630 new_top_bs = bdrv_find_overlay(active, top);
2632 if (new_top_bs == NULL) {
2633 /* we could not find the image above 'top', this is an error */
2634 goto exit;
2637 /* special case of new_top_bs->backing->bs already pointing to base - nothing
2638 * to do, no intermediate images */
2639 if (backing_bs(new_top_bs) == base) {
2640 ret = 0;
2641 goto exit;
2644 /* Make sure that base is in the backing chain of top */
2645 if (!bdrv_chain_contains(top, base)) {
2646 goto exit;
2649 /* success - we can delete the intermediate states, and link top->base */
2650 backing_file_str = backing_file_str ? backing_file_str : base->filename;
2651 ret = bdrv_change_backing_file(new_top_bs, backing_file_str,
2652 base->drv ? base->drv->format_name : "");
2653 if (ret) {
2654 goto exit;
2656 bdrv_set_backing_hd(new_top_bs, base);
2658 ret = 0;
2659 exit:
2660 return ret;
2664 * Truncate file to 'offset' bytes (needed only for file protocols)
2666 int bdrv_truncate(BlockDriverState *bs, int64_t offset)
2668 BlockDriver *drv = bs->drv;
2669 int ret;
2670 if (!drv)
2671 return -ENOMEDIUM;
2672 if (!drv->bdrv_truncate)
2673 return -ENOTSUP;
2674 if (bs->read_only)
2675 return -EACCES;
2677 ret = drv->bdrv_truncate(bs, offset);
2678 if (ret == 0) {
2679 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
2680 bdrv_dirty_bitmap_truncate(bs);
2681 if (bs->blk) {
2682 blk_dev_resize_cb(bs->blk);
2685 return ret;
2689 * Length of a allocated file in bytes. Sparse files are counted by actual
2690 * allocated space. Return < 0 if error or unknown.
2692 int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
2694 BlockDriver *drv = bs->drv;
2695 if (!drv) {
2696 return -ENOMEDIUM;
2698 if (drv->bdrv_get_allocated_file_size) {
2699 return drv->bdrv_get_allocated_file_size(bs);
2701 if (bs->file) {
2702 return bdrv_get_allocated_file_size(bs->file->bs);
2704 return -ENOTSUP;
2708 * Return number of sectors on success, -errno on error.
2710 int64_t bdrv_nb_sectors(BlockDriverState *bs)
2712 BlockDriver *drv = bs->drv;
2714 if (!drv)
2715 return -ENOMEDIUM;
2717 if (drv->has_variable_length) {
2718 int ret = refresh_total_sectors(bs, bs->total_sectors);
2719 if (ret < 0) {
2720 return ret;
2723 return bs->total_sectors;
2727 * Return length in bytes on success, -errno on error.
2728 * The length is always a multiple of BDRV_SECTOR_SIZE.
2730 int64_t bdrv_getlength(BlockDriverState *bs)
2732 int64_t ret = bdrv_nb_sectors(bs);
2734 ret = ret > INT64_MAX / BDRV_SECTOR_SIZE ? -EFBIG : ret;
2735 return ret < 0 ? ret : ret * BDRV_SECTOR_SIZE;
2738 /* return 0 as number of sectors if no device present or error */
2739 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
2741 int64_t nb_sectors = bdrv_nb_sectors(bs);
2743 *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
2746 int bdrv_is_read_only(BlockDriverState *bs)
2748 return bs->read_only;
2751 int bdrv_is_sg(BlockDriverState *bs)
2753 return bs->sg;
2756 int bdrv_enable_write_cache(BlockDriverState *bs)
2758 return bs->enable_write_cache;
2761 void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)
2763 bs->enable_write_cache = wce;
2765 /* so a reopen() will preserve wce */
2766 if (wce) {
2767 bs->open_flags |= BDRV_O_CACHE_WB;
2768 } else {
2769 bs->open_flags &= ~BDRV_O_CACHE_WB;
2773 int bdrv_is_encrypted(BlockDriverState *bs)
2775 if (bs->backing && bs->backing->bs->encrypted) {
2776 return 1;
2778 return bs->encrypted;
2781 int bdrv_key_required(BlockDriverState *bs)
2783 BdrvChild *backing = bs->backing;
2785 if (backing && backing->bs->encrypted && !backing->bs->valid_key) {
2786 return 1;
2788 return (bs->encrypted && !bs->valid_key);
2791 int bdrv_set_key(BlockDriverState *bs, const char *key)
2793 int ret;
2794 if (bs->backing && bs->backing->bs->encrypted) {
2795 ret = bdrv_set_key(bs->backing->bs, key);
2796 if (ret < 0)
2797 return ret;
2798 if (!bs->encrypted)
2799 return 0;
2801 if (!bs->encrypted) {
2802 return -EINVAL;
2803 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
2804 return -ENOMEDIUM;
2806 ret = bs->drv->bdrv_set_key(bs, key);
2807 if (ret < 0) {
2808 bs->valid_key = 0;
2809 } else if (!bs->valid_key) {
2810 bs->valid_key = 1;
2811 if (bs->blk) {
2812 /* call the change callback now, we skipped it on open */
2813 blk_dev_change_media_cb(bs->blk, true);
2816 return ret;
2820 * Provide an encryption key for @bs.
2821 * If @key is non-null:
2822 * If @bs is not encrypted, fail.
2823 * Else if the key is invalid, fail.
2824 * Else set @bs's key to @key, replacing the existing key, if any.
2825 * If @key is null:
2826 * If @bs is encrypted and still lacks a key, fail.
2827 * Else do nothing.
2828 * On failure, store an error object through @errp if non-null.
2830 void bdrv_add_key(BlockDriverState *bs, const char *key, Error **errp)
2832 if (key) {
2833 if (!bdrv_is_encrypted(bs)) {
2834 error_setg(errp, "Node '%s' is not encrypted",
2835 bdrv_get_device_or_node_name(bs));
2836 } else if (bdrv_set_key(bs, key) < 0) {
2837 error_setg(errp, QERR_INVALID_PASSWORD);
2839 } else {
2840 if (bdrv_key_required(bs)) {
2841 error_set(errp, ERROR_CLASS_DEVICE_ENCRYPTED,
2842 "'%s' (%s) is encrypted",
2843 bdrv_get_device_or_node_name(bs),
2844 bdrv_get_encrypted_filename(bs));
2849 const char *bdrv_get_format_name(BlockDriverState *bs)
2851 return bs->drv ? bs->drv->format_name : NULL;
2854 static int qsort_strcmp(const void *a, const void *b)
2856 return strcmp(a, b);
2859 void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
2860 void *opaque)
2862 BlockDriver *drv;
2863 int count = 0;
2864 int i;
2865 const char **formats = NULL;
2867 QLIST_FOREACH(drv, &bdrv_drivers, list) {
2868 if (drv->format_name) {
2869 bool found = false;
2870 int i = count;
2871 while (formats && i && !found) {
2872 found = !strcmp(formats[--i], drv->format_name);
2875 if (!found) {
2876 formats = g_renew(const char *, formats, count + 1);
2877 formats[count++] = drv->format_name;
2882 qsort(formats, count, sizeof(formats[0]), qsort_strcmp);
2884 for (i = 0; i < count; i++) {
2885 it(opaque, formats[i]);
2888 g_free(formats);
2891 /* This function is to find a node in the bs graph */
2892 BlockDriverState *bdrv_find_node(const char *node_name)
2894 BlockDriverState *bs;
2896 assert(node_name);
2898 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
2899 if (!strcmp(node_name, bs->node_name)) {
2900 return bs;
2903 return NULL;
2906 /* Put this QMP function here so it can access the static graph_bdrv_states. */
2907 BlockDeviceInfoList *bdrv_named_nodes_list(Error **errp)
2909 BlockDeviceInfoList *list, *entry;
2910 BlockDriverState *bs;
2912 list = NULL;
2913 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
2914 BlockDeviceInfo *info = bdrv_block_device_info(bs, errp);
2915 if (!info) {
2916 qapi_free_BlockDeviceInfoList(list);
2917 return NULL;
2919 entry = g_malloc0(sizeof(*entry));
2920 entry->value = info;
2921 entry->next = list;
2922 list = entry;
2925 return list;
2928 BlockDriverState *bdrv_lookup_bs(const char *device,
2929 const char *node_name,
2930 Error **errp)
2932 BlockBackend *blk;
2933 BlockDriverState *bs;
2935 if (device) {
2936 blk = blk_by_name(device);
2938 if (blk) {
2939 bs = blk_bs(blk);
2940 if (!bs) {
2941 error_setg(errp, "Device '%s' has no medium", device);
2944 return bs;
2948 if (node_name) {
2949 bs = bdrv_find_node(node_name);
2951 if (bs) {
2952 return bs;
2956 error_setg(errp, "Cannot find device=%s nor node_name=%s",
2957 device ? device : "",
2958 node_name ? node_name : "");
2959 return NULL;
2962 /* If 'base' is in the same chain as 'top', return true. Otherwise,
2963 * return false. If either argument is NULL, return false. */
2964 bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
2966 while (top && top != base) {
2967 top = backing_bs(top);
2970 return top != NULL;
2973 BlockDriverState *bdrv_next_node(BlockDriverState *bs)
2975 if (!bs) {
2976 return QTAILQ_FIRST(&graph_bdrv_states);
2978 return QTAILQ_NEXT(bs, node_list);
2981 BlockDriverState *bdrv_next(BlockDriverState *bs)
2983 if (!bs) {
2984 return QTAILQ_FIRST(&bdrv_states);
2986 return QTAILQ_NEXT(bs, device_list);
2989 const char *bdrv_get_node_name(const BlockDriverState *bs)
2991 return bs->node_name;
2994 /* TODO check what callers really want: bs->node_name or blk_name() */
2995 const char *bdrv_get_device_name(const BlockDriverState *bs)
2997 return bs->blk ? blk_name(bs->blk) : "";
3000 /* This can be used to identify nodes that might not have a device
3001 * name associated. Since node and device names live in the same
3002 * namespace, the result is unambiguous. The exception is if both are
3003 * absent, then this returns an empty (non-null) string. */
3004 const char *bdrv_get_device_or_node_name(const BlockDriverState *bs)
3006 return bs->blk ? blk_name(bs->blk) : bs->node_name;
3009 int bdrv_get_flags(BlockDriverState *bs)
3011 return bs->open_flags;
3014 int bdrv_has_zero_init_1(BlockDriverState *bs)
3016 return 1;
3019 int bdrv_has_zero_init(BlockDriverState *bs)
3021 assert(bs->drv);
3023 /* If BS is a copy on write image, it is initialized to
3024 the contents of the base image, which may not be zeroes. */
3025 if (bs->backing) {
3026 return 0;
3028 if (bs->drv->bdrv_has_zero_init) {
3029 return bs->drv->bdrv_has_zero_init(bs);
3032 /* safe default */
3033 return 0;
3036 bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs)
3038 BlockDriverInfo bdi;
3040 if (bs->backing) {
3041 return false;
3044 if (bdrv_get_info(bs, &bdi) == 0) {
3045 return bdi.unallocated_blocks_are_zero;
3048 return false;
3051 bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
3053 BlockDriverInfo bdi;
3055 if (bs->backing || !(bs->open_flags & BDRV_O_UNMAP)) {
3056 return false;
3059 if (bdrv_get_info(bs, &bdi) == 0) {
3060 return bdi.can_write_zeroes_with_unmap;
3063 return false;
3066 const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
3068 if (bs->backing && bs->backing->bs->encrypted)
3069 return bs->backing_file;
3070 else if (bs->encrypted)
3071 return bs->filename;
3072 else
3073 return NULL;
3076 void bdrv_get_backing_filename(BlockDriverState *bs,
3077 char *filename, int filename_size)
3079 pstrcpy(filename, filename_size, bs->backing_file);
3082 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
3084 BlockDriver *drv = bs->drv;
3085 if (!drv)
3086 return -ENOMEDIUM;
3087 if (!drv->bdrv_get_info)
3088 return -ENOTSUP;
3089 memset(bdi, 0, sizeof(*bdi));
3090 return drv->bdrv_get_info(bs, bdi);
3093 ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs)
3095 BlockDriver *drv = bs->drv;
3096 if (drv && drv->bdrv_get_specific_info) {
3097 return drv->bdrv_get_specific_info(bs);
3099 return NULL;
3102 void bdrv_debug_event(BlockDriverState *bs, BlkdebugEvent event)
3104 if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
3105 return;
3108 bs->drv->bdrv_debug_event(bs, event);
3111 int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
3112 const char *tag)
3114 while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
3115 bs = bs->file ? bs->file->bs : NULL;
3118 if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
3119 return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
3122 return -ENOTSUP;
3125 int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
3127 while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) {
3128 bs = bs->file ? bs->file->bs : NULL;
3131 if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) {
3132 return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
3135 return -ENOTSUP;
3138 int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
3140 while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) {
3141 bs = bs->file ? bs->file->bs : NULL;
3144 if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
3145 return bs->drv->bdrv_debug_resume(bs, tag);
3148 return -ENOTSUP;
3151 bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
3153 while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
3154 bs = bs->file ? bs->file->bs : NULL;
3157 if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
3158 return bs->drv->bdrv_debug_is_suspended(bs, tag);
3161 return false;
3164 int bdrv_is_snapshot(BlockDriverState *bs)
3166 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
3169 /* backing_file can either be relative, or absolute, or a protocol. If it is
3170 * relative, it must be relative to the chain. So, passing in bs->filename
3171 * from a BDS as backing_file should not be done, as that may be relative to
3172 * the CWD rather than the chain. */
3173 BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
3174 const char *backing_file)
3176 char *filename_full = NULL;
3177 char *backing_file_full = NULL;
3178 char *filename_tmp = NULL;
3179 int is_protocol = 0;
3180 BlockDriverState *curr_bs = NULL;
3181 BlockDriverState *retval = NULL;
3183 if (!bs || !bs->drv || !backing_file) {
3184 return NULL;
3187 filename_full = g_malloc(PATH_MAX);
3188 backing_file_full = g_malloc(PATH_MAX);
3189 filename_tmp = g_malloc(PATH_MAX);
3191 is_protocol = path_has_protocol(backing_file);
3193 for (curr_bs = bs; curr_bs->backing; curr_bs = curr_bs->backing->bs) {
3195 /* If either of the filename paths is actually a protocol, then
3196 * compare unmodified paths; otherwise make paths relative */
3197 if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
3198 if (strcmp(backing_file, curr_bs->backing_file) == 0) {
3199 retval = curr_bs->backing->bs;
3200 break;
3202 } else {
3203 /* If not an absolute filename path, make it relative to the current
3204 * image's filename path */
3205 path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
3206 backing_file);
3208 /* We are going to compare absolute pathnames */
3209 if (!realpath(filename_tmp, filename_full)) {
3210 continue;
3213 /* We need to make sure the backing filename we are comparing against
3214 * is relative to the current image filename (or absolute) */
3215 path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
3216 curr_bs->backing_file);
3218 if (!realpath(filename_tmp, backing_file_full)) {
3219 continue;
3222 if (strcmp(backing_file_full, filename_full) == 0) {
3223 retval = curr_bs->backing->bs;
3224 break;
3229 g_free(filename_full);
3230 g_free(backing_file_full);
3231 g_free(filename_tmp);
3232 return retval;
3235 int bdrv_get_backing_file_depth(BlockDriverState *bs)
3237 if (!bs->drv) {
3238 return 0;
3241 if (!bs->backing) {
3242 return 0;
3245 return 1 + bdrv_get_backing_file_depth(bs->backing->bs);
3248 void bdrv_init(void)
3250 module_call_init(MODULE_INIT_BLOCK);
3253 void bdrv_init_with_whitelist(void)
3255 use_bdrv_whitelist = 1;
3256 bdrv_init();
3259 void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
3261 Error *local_err = NULL;
3262 int ret;
3264 if (!bs->drv) {
3265 return;
3268 if (!(bs->open_flags & BDRV_O_INACTIVE)) {
3269 return;
3271 bs->open_flags &= ~BDRV_O_INACTIVE;
3273 if (bs->drv->bdrv_invalidate_cache) {
3274 bs->drv->bdrv_invalidate_cache(bs, &local_err);
3275 } else if (bs->file) {
3276 bdrv_invalidate_cache(bs->file->bs, &local_err);
3278 if (local_err) {
3279 bs->open_flags |= BDRV_O_INACTIVE;
3280 error_propagate(errp, local_err);
3281 return;
3284 ret = refresh_total_sectors(bs, bs->total_sectors);
3285 if (ret < 0) {
3286 bs->open_flags |= BDRV_O_INACTIVE;
3287 error_setg_errno(errp, -ret, "Could not refresh total sector count");
3288 return;
3292 void bdrv_invalidate_cache_all(Error **errp)
3294 BlockDriverState *bs;
3295 Error *local_err = NULL;
3297 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
3298 AioContext *aio_context = bdrv_get_aio_context(bs);
3300 aio_context_acquire(aio_context);
3301 bdrv_invalidate_cache(bs, &local_err);
3302 aio_context_release(aio_context);
3303 if (local_err) {
3304 error_propagate(errp, local_err);
3305 return;
3310 static int bdrv_inactivate(BlockDriverState *bs)
3312 int ret;
3314 if (bs->drv->bdrv_inactivate) {
3315 ret = bs->drv->bdrv_inactivate(bs);
3316 if (ret < 0) {
3317 return ret;
3321 bs->open_flags |= BDRV_O_INACTIVE;
3322 return 0;
3325 int bdrv_inactivate_all(void)
3327 BlockDriverState *bs;
3328 int ret;
3330 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
3331 AioContext *aio_context = bdrv_get_aio_context(bs);
3333 aio_context_acquire(aio_context);
3334 ret = bdrv_inactivate(bs);
3335 aio_context_release(aio_context);
3336 if (ret < 0) {
3337 return ret;
3341 return 0;
3344 /**************************************************************/
3345 /* removable device support */
3348 * Return TRUE if the media is present
3350 bool bdrv_is_inserted(BlockDriverState *bs)
3352 BlockDriver *drv = bs->drv;
3353 BdrvChild *child;
3355 if (!drv) {
3356 return false;
3358 if (drv->bdrv_is_inserted) {
3359 return drv->bdrv_is_inserted(bs);
3361 QLIST_FOREACH(child, &bs->children, next) {
3362 if (!bdrv_is_inserted(child->bs)) {
3363 return false;
3366 return true;
3370 * Return whether the media changed since the last call to this
3371 * function, or -ENOTSUP if we don't know. Most drivers don't know.
3373 int bdrv_media_changed(BlockDriverState *bs)
3375 BlockDriver *drv = bs->drv;
3377 if (drv && drv->bdrv_media_changed) {
3378 return drv->bdrv_media_changed(bs);
3380 return -ENOTSUP;
3384 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3386 void bdrv_eject(BlockDriverState *bs, bool eject_flag)
3388 BlockDriver *drv = bs->drv;
3389 const char *device_name;
3391 if (drv && drv->bdrv_eject) {
3392 drv->bdrv_eject(bs, eject_flag);
3395 device_name = bdrv_get_device_name(bs);
3396 if (device_name[0] != '\0') {
3397 qapi_event_send_device_tray_moved(device_name,
3398 eject_flag, &error_abort);
3403 * Lock or unlock the media (if it is locked, the user won't be able
3404 * to eject it manually).
3406 void bdrv_lock_medium(BlockDriverState *bs, bool locked)
3408 BlockDriver *drv = bs->drv;
3410 trace_bdrv_lock_medium(bs, locked);
3412 if (drv && drv->bdrv_lock_medium) {
3413 drv->bdrv_lock_medium(bs, locked);
3417 BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name)
3419 BdrvDirtyBitmap *bm;
3421 assert(name);
3422 QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
3423 if (bm->name && !strcmp(name, bm->name)) {
3424 return bm;
3427 return NULL;
3430 void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap)
3432 assert(!bdrv_dirty_bitmap_frozen(bitmap));
3433 g_free(bitmap->name);
3434 bitmap->name = NULL;
3437 BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
3438 uint32_t granularity,
3439 const char *name,
3440 Error **errp)
3442 int64_t bitmap_size;
3443 BdrvDirtyBitmap *bitmap;
3444 uint32_t sector_granularity;
3446 assert((granularity & (granularity - 1)) == 0);
3448 if (name && bdrv_find_dirty_bitmap(bs, name)) {
3449 error_setg(errp, "Bitmap already exists: %s", name);
3450 return NULL;
3452 sector_granularity = granularity >> BDRV_SECTOR_BITS;
3453 assert(sector_granularity);
3454 bitmap_size = bdrv_nb_sectors(bs);
3455 if (bitmap_size < 0) {
3456 error_setg_errno(errp, -bitmap_size, "could not get length of device");
3457 errno = -bitmap_size;
3458 return NULL;
3460 bitmap = g_new0(BdrvDirtyBitmap, 1);
3461 bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(sector_granularity));
3462 bitmap->size = bitmap_size;
3463 bitmap->name = g_strdup(name);
3464 bitmap->disabled = false;
3465 QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
3466 return bitmap;
3469 bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap)
3471 return bitmap->successor;
3474 bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap)
3476 return !(bitmap->disabled || bitmap->successor);
3479 DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap)
3481 if (bdrv_dirty_bitmap_frozen(bitmap)) {
3482 return DIRTY_BITMAP_STATUS_FROZEN;
3483 } else if (!bdrv_dirty_bitmap_enabled(bitmap)) {
3484 return DIRTY_BITMAP_STATUS_DISABLED;
3485 } else {
3486 return DIRTY_BITMAP_STATUS_ACTIVE;
3491 * Create a successor bitmap destined to replace this bitmap after an operation.
3492 * Requires that the bitmap is not frozen and has no successor.
3494 int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs,
3495 BdrvDirtyBitmap *bitmap, Error **errp)
3497 uint64_t granularity;
3498 BdrvDirtyBitmap *child;
3500 if (bdrv_dirty_bitmap_frozen(bitmap)) {
3501 error_setg(errp, "Cannot create a successor for a bitmap that is "
3502 "currently frozen");
3503 return -1;
3505 assert(!bitmap->successor);
3507 /* Create an anonymous successor */
3508 granularity = bdrv_dirty_bitmap_granularity(bitmap);
3509 child = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp);
3510 if (!child) {
3511 return -1;
3514 /* Successor will be on or off based on our current state. */
3515 child->disabled = bitmap->disabled;
3517 /* Install the successor and freeze the parent */
3518 bitmap->successor = child;
3519 return 0;
3523 * For a bitmap with a successor, yield our name to the successor,
3524 * delete the old bitmap, and return a handle to the new bitmap.
3526 BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
3527 BdrvDirtyBitmap *bitmap,
3528 Error **errp)
3530 char *name;
3531 BdrvDirtyBitmap *successor = bitmap->successor;
3533 if (successor == NULL) {
3534 error_setg(errp, "Cannot relinquish control if "
3535 "there's no successor present");
3536 return NULL;
3539 name = bitmap->name;
3540 bitmap->name = NULL;
3541 successor->name = name;
3542 bitmap->successor = NULL;
3543 bdrv_release_dirty_bitmap(bs, bitmap);
3545 return successor;
3549 * In cases of failure where we can no longer safely delete the parent,
3550 * we may wish to re-join the parent and child/successor.
3551 * The merged parent will be un-frozen, but not explicitly re-enabled.
3553 BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
3554 BdrvDirtyBitmap *parent,
3555 Error **errp)
3557 BdrvDirtyBitmap *successor = parent->successor;
3559 if (!successor) {
3560 error_setg(errp, "Cannot reclaim a successor when none is present");
3561 return NULL;
3564 if (!hbitmap_merge(parent->bitmap, successor->bitmap)) {
3565 error_setg(errp, "Merging of parent and successor bitmap failed");
3566 return NULL;
3568 bdrv_release_dirty_bitmap(bs, successor);
3569 parent->successor = NULL;
3571 return parent;
3575 * Truncates _all_ bitmaps attached to a BDS.
3577 static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs)
3579 BdrvDirtyBitmap *bitmap;
3580 uint64_t size = bdrv_nb_sectors(bs);
3582 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
3583 assert(!bdrv_dirty_bitmap_frozen(bitmap));
3584 hbitmap_truncate(bitmap->bitmap, size);
3585 bitmap->size = size;
3589 static void bdrv_do_release_matching_dirty_bitmap(BlockDriverState *bs,
3590 BdrvDirtyBitmap *bitmap,
3591 bool only_named)
3593 BdrvDirtyBitmap *bm, *next;
3594 QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
3595 if ((!bitmap || bm == bitmap) && (!only_named || bm->name)) {
3596 assert(!bdrv_dirty_bitmap_frozen(bm));
3597 QLIST_REMOVE(bm, list);
3598 hbitmap_free(bm->bitmap);
3599 g_free(bm->name);
3600 g_free(bm);
3602 if (bitmap) {
3603 return;
3609 void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
3611 bdrv_do_release_matching_dirty_bitmap(bs, bitmap, false);
3615 * Release all named dirty bitmaps attached to a BDS (for use in bdrv_close()).
3616 * There must not be any frozen bitmaps attached.
3618 static void bdrv_release_named_dirty_bitmaps(BlockDriverState *bs)
3620 bdrv_do_release_matching_dirty_bitmap(bs, NULL, true);
3623 void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
3625 assert(!bdrv_dirty_bitmap_frozen(bitmap));
3626 bitmap->disabled = true;
3629 void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
3631 assert(!bdrv_dirty_bitmap_frozen(bitmap));
3632 bitmap->disabled = false;
3635 BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
3637 BdrvDirtyBitmap *bm;
3638 BlockDirtyInfoList *list = NULL;
3639 BlockDirtyInfoList **plist = &list;
3641 QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
3642 BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1);
3643 BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1);
3644 info->count = bdrv_get_dirty_count(bm);
3645 info->granularity = bdrv_dirty_bitmap_granularity(bm);
3646 info->has_name = !!bm->name;
3647 info->name = g_strdup(bm->name);
3648 info->status = bdrv_dirty_bitmap_status(bm);
3649 entry->value = info;
3650 *plist = entry;
3651 plist = &entry->next;
3654 return list;
3657 int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector)
3659 if (bitmap) {
3660 return hbitmap_get(bitmap->bitmap, sector);
3661 } else {
3662 return 0;
3667 * Chooses a default granularity based on the existing cluster size,
3668 * but clamped between [4K, 64K]. Defaults to 64K in the case that there
3669 * is no cluster size information available.
3671 uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs)
3673 BlockDriverInfo bdi;
3674 uint32_t granularity;
3676 if (bdrv_get_info(bs, &bdi) >= 0 && bdi.cluster_size > 0) {
3677 granularity = MAX(4096, bdi.cluster_size);
3678 granularity = MIN(65536, granularity);
3679 } else {
3680 granularity = 65536;
3683 return granularity;
3686 uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap)
3688 return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap);
3691 void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
3693 hbitmap_iter_init(hbi, bitmap->bitmap, 0);
3696 void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
3697 int64_t cur_sector, int nr_sectors)
3699 assert(bdrv_dirty_bitmap_enabled(bitmap));
3700 hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
3703 void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
3704 int64_t cur_sector, int nr_sectors)
3706 assert(bdrv_dirty_bitmap_enabled(bitmap));
3707 hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
3710 void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out)
3712 assert(bdrv_dirty_bitmap_enabled(bitmap));
3713 if (!out) {
3714 hbitmap_reset_all(bitmap->bitmap);
3715 } else {
3716 HBitmap *backup = bitmap->bitmap;
3717 bitmap->bitmap = hbitmap_alloc(bitmap->size,
3718 hbitmap_granularity(backup));
3719 *out = backup;
3723 void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in)
3725 HBitmap *tmp = bitmap->bitmap;
3726 assert(bdrv_dirty_bitmap_enabled(bitmap));
3727 bitmap->bitmap = in;
3728 hbitmap_free(tmp);
3731 void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
3732 int nr_sectors)
3734 BdrvDirtyBitmap *bitmap;
3735 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
3736 if (!bdrv_dirty_bitmap_enabled(bitmap)) {
3737 continue;
3739 hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
3744 * Advance an HBitmapIter to an arbitrary offset.
3746 void bdrv_set_dirty_iter(HBitmapIter *hbi, int64_t offset)
3748 assert(hbi->hb);
3749 hbitmap_iter_init(hbi, hbi->hb, offset);
3752 int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap)
3754 return hbitmap_count(bitmap->bitmap);
3757 /* Get a reference to bs */
3758 void bdrv_ref(BlockDriverState *bs)
3760 bs->refcnt++;
3763 /* Release a previously grabbed reference to bs.
3764 * If after releasing, reference count is zero, the BlockDriverState is
3765 * deleted. */
3766 void bdrv_unref(BlockDriverState *bs)
3768 if (!bs) {
3769 return;
3771 assert(bs->refcnt > 0);
3772 if (--bs->refcnt == 0) {
3773 bdrv_delete(bs);
3777 struct BdrvOpBlocker {
3778 Error *reason;
3779 QLIST_ENTRY(BdrvOpBlocker) list;
3782 bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
3784 BdrvOpBlocker *blocker;
3785 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3786 if (!QLIST_EMPTY(&bs->op_blockers[op])) {
3787 blocker = QLIST_FIRST(&bs->op_blockers[op]);
3788 if (errp) {
3789 *errp = error_copy(blocker->reason);
3790 error_prepend(errp, "Node '%s' is busy: ",
3791 bdrv_get_device_or_node_name(bs));
3793 return true;
3795 return false;
3798 void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
3800 BdrvOpBlocker *blocker;
3801 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3803 blocker = g_new0(BdrvOpBlocker, 1);
3804 blocker->reason = reason;
3805 QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list);
3808 void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
3810 BdrvOpBlocker *blocker, *next;
3811 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3812 QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
3813 if (blocker->reason == reason) {
3814 QLIST_REMOVE(blocker, list);
3815 g_free(blocker);
3820 void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
3822 int i;
3823 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3824 bdrv_op_block(bs, i, reason);
3828 void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason)
3830 int i;
3831 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3832 bdrv_op_unblock(bs, i, reason);
3836 bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
3838 int i;
3840 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3841 if (!QLIST_EMPTY(&bs->op_blockers[i])) {
3842 return false;
3845 return true;
3848 void bdrv_img_create(const char *filename, const char *fmt,
3849 const char *base_filename, const char *base_fmt,
3850 char *options, uint64_t img_size, int flags,
3851 Error **errp, bool quiet)
3853 QemuOptsList *create_opts = NULL;
3854 QemuOpts *opts = NULL;
3855 const char *backing_fmt, *backing_file;
3856 int64_t size;
3857 BlockDriver *drv, *proto_drv;
3858 Error *local_err = NULL;
3859 int ret = 0;
3861 /* Find driver and parse its options */
3862 drv = bdrv_find_format(fmt);
3863 if (!drv) {
3864 error_setg(errp, "Unknown file format '%s'", fmt);
3865 return;
3868 proto_drv = bdrv_find_protocol(filename, true, errp);
3869 if (!proto_drv) {
3870 return;
3873 if (!drv->create_opts) {
3874 error_setg(errp, "Format driver '%s' does not support image creation",
3875 drv->format_name);
3876 return;
3879 if (!proto_drv->create_opts) {
3880 error_setg(errp, "Protocol driver '%s' does not support image creation",
3881 proto_drv->format_name);
3882 return;
3885 create_opts = qemu_opts_append(create_opts, drv->create_opts);
3886 create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
3888 /* Create parameter list with default values */
3889 opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
3890 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort);
3892 /* Parse -o options */
3893 if (options) {
3894 qemu_opts_do_parse(opts, options, NULL, &local_err);
3895 if (local_err) {
3896 error_report_err(local_err);
3897 local_err = NULL;
3898 error_setg(errp, "Invalid options for file format '%s'", fmt);
3899 goto out;
3903 if (base_filename) {
3904 qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, &local_err);
3905 if (local_err) {
3906 error_setg(errp, "Backing file not supported for file format '%s'",
3907 fmt);
3908 goto out;
3912 if (base_fmt) {
3913 qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, &local_err);
3914 if (local_err) {
3915 error_setg(errp, "Backing file format not supported for file "
3916 "format '%s'", fmt);
3917 goto out;
3921 backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
3922 if (backing_file) {
3923 if (!strcmp(filename, backing_file)) {
3924 error_setg(errp, "Error: Trying to create an image with the "
3925 "same filename as the backing file");
3926 goto out;
3930 backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
3932 // The size for the image must always be specified, with one exception:
3933 // If we are using a backing file, we can obtain the size from there
3934 size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
3935 if (size == -1) {
3936 if (backing_file) {
3937 BlockDriverState *bs;
3938 char *full_backing = g_new0(char, PATH_MAX);
3939 int64_t size;
3940 int back_flags;
3941 QDict *backing_options = NULL;
3943 bdrv_get_full_backing_filename_from_filename(filename, backing_file,
3944 full_backing, PATH_MAX,
3945 &local_err);
3946 if (local_err) {
3947 g_free(full_backing);
3948 goto out;
3951 /* backing files always opened read-only */
3952 back_flags =
3953 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
3955 if (backing_fmt) {
3956 backing_options = qdict_new();
3957 qdict_put(backing_options, "driver",
3958 qstring_from_str(backing_fmt));
3961 bs = NULL;
3962 ret = bdrv_open(&bs, full_backing, NULL, backing_options,
3963 back_flags, &local_err);
3964 g_free(full_backing);
3965 if (ret < 0) {
3966 goto out;
3968 size = bdrv_getlength(bs);
3969 if (size < 0) {
3970 error_setg_errno(errp, -size, "Could not get size of '%s'",
3971 backing_file);
3972 bdrv_unref(bs);
3973 goto out;
3976 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort);
3978 bdrv_unref(bs);
3979 } else {
3980 error_setg(errp, "Image creation needs a size parameter");
3981 goto out;
3985 if (!quiet) {
3986 printf("Formatting '%s', fmt=%s ", filename, fmt);
3987 qemu_opts_print(opts, " ");
3988 puts("");
3991 ret = bdrv_create(drv, filename, opts, &local_err);
3993 if (ret == -EFBIG) {
3994 /* This is generally a better message than whatever the driver would
3995 * deliver (especially because of the cluster_size_hint), since that
3996 * is most probably not much different from "image too large". */
3997 const char *cluster_size_hint = "";
3998 if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) {
3999 cluster_size_hint = " (try using a larger cluster size)";
4001 error_setg(errp, "The image size is too large for file format '%s'"
4002 "%s", fmt, cluster_size_hint);
4003 error_free(local_err);
4004 local_err = NULL;
4007 out:
4008 qemu_opts_del(opts);
4009 qemu_opts_free(create_opts);
4010 if (local_err) {
4011 error_propagate(errp, local_err);
4015 AioContext *bdrv_get_aio_context(BlockDriverState *bs)
4017 return bs->aio_context;
4020 void bdrv_detach_aio_context(BlockDriverState *bs)
4022 BdrvAioNotifier *baf;
4024 if (!bs->drv) {
4025 return;
4028 QLIST_FOREACH(baf, &bs->aio_notifiers, list) {
4029 baf->detach_aio_context(baf->opaque);
4032 if (bs->throttle_state) {
4033 throttle_timers_detach_aio_context(&bs->throttle_timers);
4035 if (bs->drv->bdrv_detach_aio_context) {
4036 bs->drv->bdrv_detach_aio_context(bs);
4038 if (bs->file) {
4039 bdrv_detach_aio_context(bs->file->bs);
4041 if (bs->backing) {
4042 bdrv_detach_aio_context(bs->backing->bs);
4045 bs->aio_context = NULL;
4048 void bdrv_attach_aio_context(BlockDriverState *bs,
4049 AioContext *new_context)
4051 BdrvAioNotifier *ban;
4053 if (!bs->drv) {
4054 return;
4057 bs->aio_context = new_context;
4059 if (bs->backing) {
4060 bdrv_attach_aio_context(bs->backing->bs, new_context);
4062 if (bs->file) {
4063 bdrv_attach_aio_context(bs->file->bs, new_context);
4065 if (bs->drv->bdrv_attach_aio_context) {
4066 bs->drv->bdrv_attach_aio_context(bs, new_context);
4068 if (bs->throttle_state) {
4069 throttle_timers_attach_aio_context(&bs->throttle_timers, new_context);
4072 QLIST_FOREACH(ban, &bs->aio_notifiers, list) {
4073 ban->attached_aio_context(new_context, ban->opaque);
4077 void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
4079 bdrv_drain(bs); /* ensure there are no in-flight requests */
4081 bdrv_detach_aio_context(bs);
4083 /* This function executes in the old AioContext so acquire the new one in
4084 * case it runs in a different thread.
4086 aio_context_acquire(new_context);
4087 bdrv_attach_aio_context(bs, new_context);
4088 aio_context_release(new_context);
4091 void bdrv_add_aio_context_notifier(BlockDriverState *bs,
4092 void (*attached_aio_context)(AioContext *new_context, void *opaque),
4093 void (*detach_aio_context)(void *opaque), void *opaque)
4095 BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1);
4096 *ban = (BdrvAioNotifier){
4097 .attached_aio_context = attached_aio_context,
4098 .detach_aio_context = detach_aio_context,
4099 .opaque = opaque
4102 QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list);
4105 void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
4106 void (*attached_aio_context)(AioContext *,
4107 void *),
4108 void (*detach_aio_context)(void *),
4109 void *opaque)
4111 BdrvAioNotifier *ban, *ban_next;
4113 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
4114 if (ban->attached_aio_context == attached_aio_context &&
4115 ban->detach_aio_context == detach_aio_context &&
4116 ban->opaque == opaque)
4118 QLIST_REMOVE(ban, list);
4119 g_free(ban);
4121 return;
4125 abort();
4128 int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts,
4129 BlockDriverAmendStatusCB *status_cb, void *cb_opaque)
4131 if (!bs->drv->bdrv_amend_options) {
4132 return -ENOTSUP;
4134 return bs->drv->bdrv_amend_options(bs, opts, status_cb, cb_opaque);
4137 /* This function will be called by the bdrv_recurse_is_first_non_filter method
4138 * of block filter and by bdrv_is_first_non_filter.
4139 * It is used to test if the given bs is the candidate or recurse more in the
4140 * node graph.
4142 bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
4143 BlockDriverState *candidate)
4145 /* return false if basic checks fails */
4146 if (!bs || !bs->drv) {
4147 return false;
4150 /* the code reached a non block filter driver -> check if the bs is
4151 * the same as the candidate. It's the recursion termination condition.
4153 if (!bs->drv->is_filter) {
4154 return bs == candidate;
4156 /* Down this path the driver is a block filter driver */
4158 /* If the block filter recursion method is defined use it to recurse down
4159 * the node graph.
4161 if (bs->drv->bdrv_recurse_is_first_non_filter) {
4162 return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate);
4165 /* the driver is a block filter but don't allow to recurse -> return false
4167 return false;
4170 /* This function checks if the candidate is the first non filter bs down it's
4171 * bs chain. Since we don't have pointers to parents it explore all bs chains
4172 * from the top. Some filters can choose not to pass down the recursion.
4174 bool bdrv_is_first_non_filter(BlockDriverState *candidate)
4176 BlockDriverState *bs;
4178 /* walk down the bs forest recursively */
4179 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
4180 bool perm;
4182 /* try to recurse in this top level bs */
4183 perm = bdrv_recurse_is_first_non_filter(bs, candidate);
4185 /* candidate is the first non filter */
4186 if (perm) {
4187 return true;
4191 return false;
4194 BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs,
4195 const char *node_name, Error **errp)
4197 BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
4198 AioContext *aio_context;
4200 if (!to_replace_bs) {
4201 error_setg(errp, "Node name '%s' not found", node_name);
4202 return NULL;
4205 aio_context = bdrv_get_aio_context(to_replace_bs);
4206 aio_context_acquire(aio_context);
4208 if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
4209 to_replace_bs = NULL;
4210 goto out;
4213 /* We don't want arbitrary node of the BDS chain to be replaced only the top
4214 * most non filter in order to prevent data corruption.
4215 * Another benefit is that this tests exclude backing files which are
4216 * blocked by the backing blockers.
4218 if (!bdrv_recurse_is_first_non_filter(parent_bs, to_replace_bs)) {
4219 error_setg(errp, "Only top most non filter can be replaced");
4220 to_replace_bs = NULL;
4221 goto out;
4224 out:
4225 aio_context_release(aio_context);
4226 return to_replace_bs;
4229 static bool append_open_options(QDict *d, BlockDriverState *bs)
4231 const QDictEntry *entry;
4232 QemuOptDesc *desc;
4233 BdrvChild *child;
4234 bool found_any = false;
4235 const char *p;
4237 for (entry = qdict_first(bs->options); entry;
4238 entry = qdict_next(bs->options, entry))
4240 /* Exclude options for children */
4241 QLIST_FOREACH(child, &bs->children, next) {
4242 if (strstart(qdict_entry_key(entry), child->name, &p)
4243 && (!*p || *p == '.'))
4245 break;
4248 if (child) {
4249 continue;
4252 /* And exclude all non-driver-specific options */
4253 for (desc = bdrv_runtime_opts.desc; desc->name; desc++) {
4254 if (!strcmp(qdict_entry_key(entry), desc->name)) {
4255 break;
4258 if (desc->name) {
4259 continue;
4262 qobject_incref(qdict_entry_value(entry));
4263 qdict_put_obj(d, qdict_entry_key(entry), qdict_entry_value(entry));
4264 found_any = true;
4267 return found_any;
4270 /* Updates the following BDS fields:
4271 * - exact_filename: A filename which may be used for opening a block device
4272 * which (mostly) equals the given BDS (even without any
4273 * other options; so reading and writing must return the same
4274 * results, but caching etc. may be different)
4275 * - full_open_options: Options which, when given when opening a block device
4276 * (without a filename), result in a BDS (mostly)
4277 * equalling the given one
4278 * - filename: If exact_filename is set, it is copied here. Otherwise,
4279 * full_open_options is converted to a JSON object, prefixed with
4280 * "json:" (for use through the JSON pseudo protocol) and put here.
4282 void bdrv_refresh_filename(BlockDriverState *bs)
4284 BlockDriver *drv = bs->drv;
4285 QDict *opts;
4287 if (!drv) {
4288 return;
4291 /* This BDS's file name will most probably depend on its file's name, so
4292 * refresh that first */
4293 if (bs->file) {
4294 bdrv_refresh_filename(bs->file->bs);
4297 if (drv->bdrv_refresh_filename) {
4298 /* Obsolete information is of no use here, so drop the old file name
4299 * information before refreshing it */
4300 bs->exact_filename[0] = '\0';
4301 if (bs->full_open_options) {
4302 QDECREF(bs->full_open_options);
4303 bs->full_open_options = NULL;
4306 opts = qdict_new();
4307 append_open_options(opts, bs);
4308 drv->bdrv_refresh_filename(bs, opts);
4309 QDECREF(opts);
4310 } else if (bs->file) {
4311 /* Try to reconstruct valid information from the underlying file */
4312 bool has_open_options;
4314 bs->exact_filename[0] = '\0';
4315 if (bs->full_open_options) {
4316 QDECREF(bs->full_open_options);
4317 bs->full_open_options = NULL;
4320 opts = qdict_new();
4321 has_open_options = append_open_options(opts, bs);
4323 /* If no specific options have been given for this BDS, the filename of
4324 * the underlying file should suffice for this one as well */
4325 if (bs->file->bs->exact_filename[0] && !has_open_options) {
4326 strcpy(bs->exact_filename, bs->file->bs->exact_filename);
4328 /* Reconstructing the full options QDict is simple for most format block
4329 * drivers, as long as the full options are known for the underlying
4330 * file BDS. The full options QDict of that file BDS should somehow
4331 * contain a representation of the filename, therefore the following
4332 * suffices without querying the (exact_)filename of this BDS. */
4333 if (bs->file->bs->full_open_options) {
4334 qdict_put_obj(opts, "driver",
4335 QOBJECT(qstring_from_str(drv->format_name)));
4336 QINCREF(bs->file->bs->full_open_options);
4337 qdict_put_obj(opts, "file",
4338 QOBJECT(bs->file->bs->full_open_options));
4340 bs->full_open_options = opts;
4341 } else {
4342 QDECREF(opts);
4344 } else if (!bs->full_open_options && qdict_size(bs->options)) {
4345 /* There is no underlying file BDS (at least referenced by BDS.file),
4346 * so the full options QDict should be equal to the options given
4347 * specifically for this block device when it was opened (plus the
4348 * driver specification).
4349 * Because those options don't change, there is no need to update
4350 * full_open_options when it's already set. */
4352 opts = qdict_new();
4353 append_open_options(opts, bs);
4354 qdict_put_obj(opts, "driver",
4355 QOBJECT(qstring_from_str(drv->format_name)));
4357 if (bs->exact_filename[0]) {
4358 /* This may not work for all block protocol drivers (some may
4359 * require this filename to be parsed), but we have to find some
4360 * default solution here, so just include it. If some block driver
4361 * does not support pure options without any filename at all or
4362 * needs some special format of the options QDict, it needs to
4363 * implement the driver-specific bdrv_refresh_filename() function.
4365 qdict_put_obj(opts, "filename",
4366 QOBJECT(qstring_from_str(bs->exact_filename)));
4369 bs->full_open_options = opts;
4372 if (bs->exact_filename[0]) {
4373 pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename);
4374 } else if (bs->full_open_options) {
4375 QString *json = qobject_to_json(QOBJECT(bs->full_open_options));
4376 snprintf(bs->filename, sizeof(bs->filename), "json:%s",
4377 qstring_get_str(json));
4378 QDECREF(json);