block: Remove BDS close notifier
[qemu.git] / block.c
blobf4312d94f7cbe1265415a061d22a2903a2e99d2a
1 /*
2 * QEMU System Emulator block driver
4 * Copyright (c) 2003 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
24 #include "config-host.h"
25 #include "qemu-common.h"
26 #include "trace.h"
27 #include "block/block_int.h"
28 #include "block/blockjob.h"
29 #include "qemu/error-report.h"
30 #include "qemu/module.h"
31 #include "qapi/qmp/qerror.h"
32 #include "qapi/qmp/qbool.h"
33 #include "qapi/qmp/qjson.h"
34 #include "sysemu/block-backend.h"
35 #include "sysemu/sysemu.h"
36 #include "qemu/notify.h"
37 #include "qemu/coroutine.h"
38 #include "block/qapi.h"
39 #include "qmp-commands.h"
40 #include "qemu/timer.h"
41 #include "qapi-event.h"
42 #include "block/throttle-groups.h"
44 #ifdef CONFIG_BSD
45 #include <sys/types.h>
46 #include <sys/stat.h>
47 #include <sys/ioctl.h>
48 #include <sys/queue.h>
49 #ifndef __DragonFly__
50 #include <sys/disk.h>
51 #endif
52 #endif
54 #ifdef _WIN32
55 #include <windows.h>
56 #endif
58 /**
59 * A BdrvDirtyBitmap can be in three possible states:
60 * (1) successor is NULL and disabled is false: full r/w mode
61 * (2) successor is NULL and disabled is true: read only mode ("disabled")
62 * (3) successor is set: frozen mode.
63 * A frozen bitmap cannot be renamed, deleted, anonymized, cleared, set,
64 * or enabled. A frozen bitmap can only abdicate() or reclaim().
66 struct BdrvDirtyBitmap {
67 HBitmap *bitmap; /* Dirty sector bitmap implementation */
68 BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */
69 char *name; /* Optional non-empty unique ID */
70 int64_t size; /* Size of the bitmap (Number of sectors) */
71 bool disabled; /* Bitmap is read-only */
72 QLIST_ENTRY(BdrvDirtyBitmap) list;
75 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
77 struct BdrvStates bdrv_states = QTAILQ_HEAD_INITIALIZER(bdrv_states);
79 static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
80 QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
82 static QLIST_HEAD(, BlockDriver) bdrv_drivers =
83 QLIST_HEAD_INITIALIZER(bdrv_drivers);
85 static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
86 const char *reference, QDict *options, int flags,
87 BlockDriverState *parent,
88 const BdrvChildRole *child_role, Error **errp);
90 static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs);
91 static void bdrv_release_named_dirty_bitmaps(BlockDriverState *bs);
93 /* If non-zero, use only whitelisted block drivers */
94 static int use_bdrv_whitelist;
96 #ifdef _WIN32
97 static int is_windows_drive_prefix(const char *filename)
99 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
100 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
101 filename[1] == ':');
104 int is_windows_drive(const char *filename)
106 if (is_windows_drive_prefix(filename) &&
107 filename[2] == '\0')
108 return 1;
109 if (strstart(filename, "\\\\.\\", NULL) ||
110 strstart(filename, "//./", NULL))
111 return 1;
112 return 0;
114 #endif
116 size_t bdrv_opt_mem_align(BlockDriverState *bs)
118 if (!bs || !bs->drv) {
119 /* page size or 4k (hdd sector size) should be on the safe side */
120 return MAX(4096, getpagesize());
123 return bs->bl.opt_mem_alignment;
126 size_t bdrv_min_mem_align(BlockDriverState *bs)
128 if (!bs || !bs->drv) {
129 /* page size or 4k (hdd sector size) should be on the safe side */
130 return MAX(4096, getpagesize());
133 return bs->bl.min_mem_alignment;
136 /* check if the path starts with "<protocol>:" */
137 int path_has_protocol(const char *path)
139 const char *p;
141 #ifdef _WIN32
142 if (is_windows_drive(path) ||
143 is_windows_drive_prefix(path)) {
144 return 0;
146 p = path + strcspn(path, ":/\\");
147 #else
148 p = path + strcspn(path, ":/");
149 #endif
151 return *p == ':';
154 int path_is_absolute(const char *path)
156 #ifdef _WIN32
157 /* specific case for names like: "\\.\d:" */
158 if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
159 return 1;
161 return (*path == '/' || *path == '\\');
162 #else
163 return (*path == '/');
164 #endif
167 /* if filename is absolute, just copy it to dest. Otherwise, build a
168 path to it by considering it is relative to base_path. URL are
169 supported. */
170 void path_combine(char *dest, int dest_size,
171 const char *base_path,
172 const char *filename)
174 const char *p, *p1;
175 int len;
177 if (dest_size <= 0)
178 return;
179 if (path_is_absolute(filename)) {
180 pstrcpy(dest, dest_size, filename);
181 } else {
182 p = strchr(base_path, ':');
183 if (p)
184 p++;
185 else
186 p = base_path;
187 p1 = strrchr(base_path, '/');
188 #ifdef _WIN32
190 const char *p2;
191 p2 = strrchr(base_path, '\\');
192 if (!p1 || p2 > p1)
193 p1 = p2;
195 #endif
196 if (p1)
197 p1++;
198 else
199 p1 = base_path;
200 if (p1 > p)
201 p = p1;
202 len = p - base_path;
203 if (len > dest_size - 1)
204 len = dest_size - 1;
205 memcpy(dest, base_path, len);
206 dest[len] = '\0';
207 pstrcat(dest, dest_size, filename);
211 void bdrv_get_full_backing_filename_from_filename(const char *backed,
212 const char *backing,
213 char *dest, size_t sz,
214 Error **errp)
216 if (backing[0] == '\0' || path_has_protocol(backing) ||
217 path_is_absolute(backing))
219 pstrcpy(dest, sz, backing);
220 } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) {
221 error_setg(errp, "Cannot use relative backing file names for '%s'",
222 backed);
223 } else {
224 path_combine(dest, sz, backed, backing);
228 void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz,
229 Error **errp)
231 char *backed = bs->exact_filename[0] ? bs->exact_filename : bs->filename;
233 bdrv_get_full_backing_filename_from_filename(backed, bs->backing_file,
234 dest, sz, errp);
237 void bdrv_register(BlockDriver *bdrv)
239 bdrv_setup_io_funcs(bdrv);
241 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
244 BlockDriverState *bdrv_new_root(void)
246 BlockDriverState *bs = bdrv_new();
248 QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list);
249 return bs;
252 BlockDriverState *bdrv_new(void)
254 BlockDriverState *bs;
255 int i;
257 bs = g_new0(BlockDriverState, 1);
258 QLIST_INIT(&bs->dirty_bitmaps);
259 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
260 QLIST_INIT(&bs->op_blockers[i]);
262 notifier_with_return_list_init(&bs->before_write_notifiers);
263 qemu_co_queue_init(&bs->throttled_reqs[0]);
264 qemu_co_queue_init(&bs->throttled_reqs[1]);
265 bs->refcnt = 1;
266 bs->aio_context = qemu_get_aio_context();
268 return bs;
271 BlockDriver *bdrv_find_format(const char *format_name)
273 BlockDriver *drv1;
274 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
275 if (!strcmp(drv1->format_name, format_name)) {
276 return drv1;
279 return NULL;
282 static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
284 static const char *whitelist_rw[] = {
285 CONFIG_BDRV_RW_WHITELIST
287 static const char *whitelist_ro[] = {
288 CONFIG_BDRV_RO_WHITELIST
290 const char **p;
292 if (!whitelist_rw[0] && !whitelist_ro[0]) {
293 return 1; /* no whitelist, anything goes */
296 for (p = whitelist_rw; *p; p++) {
297 if (!strcmp(drv->format_name, *p)) {
298 return 1;
301 if (read_only) {
302 for (p = whitelist_ro; *p; p++) {
303 if (!strcmp(drv->format_name, *p)) {
304 return 1;
308 return 0;
311 typedef struct CreateCo {
312 BlockDriver *drv;
313 char *filename;
314 QemuOpts *opts;
315 int ret;
316 Error *err;
317 } CreateCo;
319 static void coroutine_fn bdrv_create_co_entry(void *opaque)
321 Error *local_err = NULL;
322 int ret;
324 CreateCo *cco = opaque;
325 assert(cco->drv);
327 ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err);
328 if (local_err) {
329 error_propagate(&cco->err, local_err);
331 cco->ret = ret;
334 int bdrv_create(BlockDriver *drv, const char* filename,
335 QemuOpts *opts, Error **errp)
337 int ret;
339 Coroutine *co;
340 CreateCo cco = {
341 .drv = drv,
342 .filename = g_strdup(filename),
343 .opts = opts,
344 .ret = NOT_DONE,
345 .err = NULL,
348 if (!drv->bdrv_create) {
349 error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
350 ret = -ENOTSUP;
351 goto out;
354 if (qemu_in_coroutine()) {
355 /* Fast-path if already in coroutine context */
356 bdrv_create_co_entry(&cco);
357 } else {
358 co = qemu_coroutine_create(bdrv_create_co_entry);
359 qemu_coroutine_enter(co, &cco);
360 while (cco.ret == NOT_DONE) {
361 aio_poll(qemu_get_aio_context(), true);
365 ret = cco.ret;
366 if (ret < 0) {
367 if (cco.err) {
368 error_propagate(errp, cco.err);
369 } else {
370 error_setg_errno(errp, -ret, "Could not create image");
374 out:
375 g_free(cco.filename);
376 return ret;
379 int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp)
381 BlockDriver *drv;
382 Error *local_err = NULL;
383 int ret;
385 drv = bdrv_find_protocol(filename, true, errp);
386 if (drv == NULL) {
387 return -ENOENT;
390 ret = bdrv_create(drv, filename, opts, &local_err);
391 if (local_err) {
392 error_propagate(errp, local_err);
394 return ret;
398 * Try to get @bs's logical and physical block size.
399 * On success, store them in @bsz struct and return 0.
400 * On failure return -errno.
401 * @bs must not be empty.
403 int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
405 BlockDriver *drv = bs->drv;
407 if (drv && drv->bdrv_probe_blocksizes) {
408 return drv->bdrv_probe_blocksizes(bs, bsz);
411 return -ENOTSUP;
415 * Try to get @bs's geometry (cyls, heads, sectors).
416 * On success, store them in @geo struct and return 0.
417 * On failure return -errno.
418 * @bs must not be empty.
420 int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
422 BlockDriver *drv = bs->drv;
424 if (drv && drv->bdrv_probe_geometry) {
425 return drv->bdrv_probe_geometry(bs, geo);
428 return -ENOTSUP;
432 * Create a uniquely-named empty temporary file.
433 * Return 0 upon success, otherwise a negative errno value.
435 int get_tmp_filename(char *filename, int size)
437 #ifdef _WIN32
438 char temp_dir[MAX_PATH];
439 /* GetTempFileName requires that its output buffer (4th param)
440 have length MAX_PATH or greater. */
441 assert(size >= MAX_PATH);
442 return (GetTempPath(MAX_PATH, temp_dir)
443 && GetTempFileName(temp_dir, "qem", 0, filename)
444 ? 0 : -GetLastError());
445 #else
446 int fd;
447 const char *tmpdir;
448 tmpdir = getenv("TMPDIR");
449 if (!tmpdir) {
450 tmpdir = "/var/tmp";
452 if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
453 return -EOVERFLOW;
455 fd = mkstemp(filename);
456 if (fd < 0) {
457 return -errno;
459 if (close(fd) != 0) {
460 unlink(filename);
461 return -errno;
463 return 0;
464 #endif
468 * Detect host devices. By convention, /dev/cdrom[N] is always
469 * recognized as a host CDROM.
471 static BlockDriver *find_hdev_driver(const char *filename)
473 int score_max = 0, score;
474 BlockDriver *drv = NULL, *d;
476 QLIST_FOREACH(d, &bdrv_drivers, list) {
477 if (d->bdrv_probe_device) {
478 score = d->bdrv_probe_device(filename);
479 if (score > score_max) {
480 score_max = score;
481 drv = d;
486 return drv;
489 BlockDriver *bdrv_find_protocol(const char *filename,
490 bool allow_protocol_prefix,
491 Error **errp)
493 BlockDriver *drv1;
494 char protocol[128];
495 int len;
496 const char *p;
498 /* TODO Drivers without bdrv_file_open must be specified explicitly */
501 * XXX(hch): we really should not let host device detection
502 * override an explicit protocol specification, but moving this
503 * later breaks access to device names with colons in them.
504 * Thanks to the brain-dead persistent naming schemes on udev-
505 * based Linux systems those actually are quite common.
507 drv1 = find_hdev_driver(filename);
508 if (drv1) {
509 return drv1;
512 if (!path_has_protocol(filename) || !allow_protocol_prefix) {
513 return &bdrv_file;
516 p = strchr(filename, ':');
517 assert(p != NULL);
518 len = p - filename;
519 if (len > sizeof(protocol) - 1)
520 len = sizeof(protocol) - 1;
521 memcpy(protocol, filename, len);
522 protocol[len] = '\0';
523 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
524 if (drv1->protocol_name &&
525 !strcmp(drv1->protocol_name, protocol)) {
526 return drv1;
530 error_setg(errp, "Unknown protocol '%s'", protocol);
531 return NULL;
535 * Guess image format by probing its contents.
536 * This is not a good idea when your image is raw (CVE-2008-2004), but
537 * we do it anyway for backward compatibility.
539 * @buf contains the image's first @buf_size bytes.
540 * @buf_size is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE,
541 * but can be smaller if the image file is smaller)
542 * @filename is its filename.
544 * For all block drivers, call the bdrv_probe() method to get its
545 * probing score.
546 * Return the first block driver with the highest probing score.
548 BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
549 const char *filename)
551 int score_max = 0, score;
552 BlockDriver *drv = NULL, *d;
554 QLIST_FOREACH(d, &bdrv_drivers, list) {
555 if (d->bdrv_probe) {
556 score = d->bdrv_probe(buf, buf_size, filename);
557 if (score > score_max) {
558 score_max = score;
559 drv = d;
564 return drv;
567 static int find_image_format(BlockDriverState *bs, const char *filename,
568 BlockDriver **pdrv, Error **errp)
570 BlockDriver *drv;
571 uint8_t buf[BLOCK_PROBE_BUF_SIZE];
572 int ret = 0;
574 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
575 if (bdrv_is_sg(bs) || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
576 *pdrv = &bdrv_raw;
577 return ret;
580 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
581 if (ret < 0) {
582 error_setg_errno(errp, -ret, "Could not read image for determining its "
583 "format");
584 *pdrv = NULL;
585 return ret;
588 drv = bdrv_probe_all(buf, ret, filename);
589 if (!drv) {
590 error_setg(errp, "Could not determine image format: No compatible "
591 "driver found");
592 ret = -ENOENT;
594 *pdrv = drv;
595 return ret;
599 * Set the current 'total_sectors' value
600 * Return 0 on success, -errno on error.
602 static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
604 BlockDriver *drv = bs->drv;
606 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
607 if (bdrv_is_sg(bs))
608 return 0;
610 /* query actual device if possible, otherwise just trust the hint */
611 if (drv->bdrv_getlength) {
612 int64_t length = drv->bdrv_getlength(bs);
613 if (length < 0) {
614 return length;
616 hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
619 bs->total_sectors = hint;
620 return 0;
624 * Combines a QDict of new block driver @options with any missing options taken
625 * from @old_options, so that leaving out an option defaults to its old value.
627 static void bdrv_join_options(BlockDriverState *bs, QDict *options,
628 QDict *old_options)
630 if (bs->drv && bs->drv->bdrv_join_options) {
631 bs->drv->bdrv_join_options(options, old_options);
632 } else {
633 qdict_join(options, old_options, false);
638 * Set open flags for a given discard mode
640 * Return 0 on success, -1 if the discard mode was invalid.
642 int bdrv_parse_discard_flags(const char *mode, int *flags)
644 *flags &= ~BDRV_O_UNMAP;
646 if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
647 /* do nothing */
648 } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
649 *flags |= BDRV_O_UNMAP;
650 } else {
651 return -1;
654 return 0;
658 * Set open flags for a given cache mode
660 * Return 0 on success, -1 if the cache mode was invalid.
662 int bdrv_parse_cache_flags(const char *mode, int *flags)
664 *flags &= ~BDRV_O_CACHE_MASK;
666 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
667 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
668 } else if (!strcmp(mode, "directsync")) {
669 *flags |= BDRV_O_NOCACHE;
670 } else if (!strcmp(mode, "writeback")) {
671 *flags |= BDRV_O_CACHE_WB;
672 } else if (!strcmp(mode, "unsafe")) {
673 *flags |= BDRV_O_CACHE_WB;
674 *flags |= BDRV_O_NO_FLUSH;
675 } else if (!strcmp(mode, "writethrough")) {
676 /* this is the default */
677 } else {
678 return -1;
681 return 0;
685 * Returns the flags that a temporary snapshot should get, based on the
686 * originally requested flags (the originally requested image will have flags
687 * like a backing file)
689 static int bdrv_temp_snapshot_flags(int flags)
691 return (flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
695 * Returns the options and flags that bs->file should get if a protocol driver
696 * is expected, based on the given options and flags for the parent BDS
698 static void bdrv_inherited_options(int *child_flags, QDict *child_options,
699 int parent_flags, QDict *parent_options)
701 int flags = parent_flags;
703 /* Enable protocol handling, disable format probing for bs->file */
704 flags |= BDRV_O_PROTOCOL;
706 /* If the cache mode isn't explicitly set, inherit direct and no-flush from
707 * the parent. */
708 qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_DIRECT);
709 qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_NO_FLUSH);
711 /* Our block drivers take care to send flushes and respect unmap policy,
712 * so we can default to enable both on lower layers regardless of the
713 * corresponding parent options. */
714 qdict_set_default_str(child_options, BDRV_OPT_CACHE_WB, "on");
715 flags |= BDRV_O_UNMAP;
717 /* Clear flags that only apply to the top layer */
718 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ);
720 *child_flags = flags;
723 const BdrvChildRole child_file = {
724 .inherit_options = bdrv_inherited_options,
728 * Returns the options and flags that bs->file should get if the use of formats
729 * (and not only protocols) is permitted for it, based on the given options and
730 * flags for the parent BDS
732 static void bdrv_inherited_fmt_options(int *child_flags, QDict *child_options,
733 int parent_flags, QDict *parent_options)
735 child_file.inherit_options(child_flags, child_options,
736 parent_flags, parent_options);
738 *child_flags &= ~BDRV_O_PROTOCOL;
741 const BdrvChildRole child_format = {
742 .inherit_options = bdrv_inherited_fmt_options,
746 * Returns the options and flags that bs->backing should get, based on the
747 * given options and flags for the parent BDS
749 static void bdrv_backing_options(int *child_flags, QDict *child_options,
750 int parent_flags, QDict *parent_options)
752 int flags = parent_flags;
754 /* The cache mode is inherited unmodified for backing files */
755 qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_WB);
756 qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_DIRECT);
757 qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_NO_FLUSH);
759 /* backing files always opened read-only */
760 flags &= ~(BDRV_O_RDWR | BDRV_O_COPY_ON_READ);
762 /* snapshot=on is handled on the top layer */
763 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY);
765 *child_flags = flags;
768 static const BdrvChildRole child_backing = {
769 .inherit_options = bdrv_backing_options,
772 static int bdrv_open_flags(BlockDriverState *bs, int flags)
774 int open_flags = flags | BDRV_O_CACHE_WB;
777 * Clear flags that are internal to the block layer before opening the
778 * image.
780 open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL);
783 * Snapshots should be writable.
785 if (flags & BDRV_O_TEMPORARY) {
786 open_flags |= BDRV_O_RDWR;
789 return open_flags;
792 static void update_flags_from_options(int *flags, QemuOpts *opts)
794 *flags &= ~BDRV_O_CACHE_MASK;
796 assert(qemu_opt_find(opts, BDRV_OPT_CACHE_WB));
797 if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_WB, false)) {
798 *flags |= BDRV_O_CACHE_WB;
801 assert(qemu_opt_find(opts, BDRV_OPT_CACHE_NO_FLUSH));
802 if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_NO_FLUSH, false)) {
803 *flags |= BDRV_O_NO_FLUSH;
806 assert(qemu_opt_find(opts, BDRV_OPT_CACHE_DIRECT));
807 if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_DIRECT, false)) {
808 *flags |= BDRV_O_NOCACHE;
812 static void update_options_from_flags(QDict *options, int flags)
814 if (!qdict_haskey(options, BDRV_OPT_CACHE_WB)) {
815 qdict_put(options, BDRV_OPT_CACHE_WB,
816 qbool_from_bool(flags & BDRV_O_CACHE_WB));
818 if (!qdict_haskey(options, BDRV_OPT_CACHE_DIRECT)) {
819 qdict_put(options, BDRV_OPT_CACHE_DIRECT,
820 qbool_from_bool(flags & BDRV_O_NOCACHE));
822 if (!qdict_haskey(options, BDRV_OPT_CACHE_NO_FLUSH)) {
823 qdict_put(options, BDRV_OPT_CACHE_NO_FLUSH,
824 qbool_from_bool(flags & BDRV_O_NO_FLUSH));
828 static void bdrv_assign_node_name(BlockDriverState *bs,
829 const char *node_name,
830 Error **errp)
832 char *gen_node_name = NULL;
834 if (!node_name) {
835 node_name = gen_node_name = id_generate(ID_BLOCK);
836 } else if (!id_wellformed(node_name)) {
838 * Check for empty string or invalid characters, but not if it is
839 * generated (generated names use characters not available to the user)
841 error_setg(errp, "Invalid node name");
842 return;
845 /* takes care of avoiding namespaces collisions */
846 if (blk_by_name(node_name)) {
847 error_setg(errp, "node-name=%s is conflicting with a device id",
848 node_name);
849 goto out;
852 /* takes care of avoiding duplicates node names */
853 if (bdrv_find_node(node_name)) {
854 error_setg(errp, "Duplicate node name");
855 goto out;
858 /* copy node name into the bs and insert it into the graph list */
859 pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
860 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
861 out:
862 g_free(gen_node_name);
865 static QemuOptsList bdrv_runtime_opts = {
866 .name = "bdrv_common",
867 .head = QTAILQ_HEAD_INITIALIZER(bdrv_runtime_opts.head),
868 .desc = {
870 .name = "node-name",
871 .type = QEMU_OPT_STRING,
872 .help = "Node name of the block device node",
875 .name = "driver",
876 .type = QEMU_OPT_STRING,
877 .help = "Block driver to use for the node",
880 .name = BDRV_OPT_CACHE_WB,
881 .type = QEMU_OPT_BOOL,
882 .help = "Enable writeback mode",
885 .name = BDRV_OPT_CACHE_DIRECT,
886 .type = QEMU_OPT_BOOL,
887 .help = "Bypass software writeback cache on the host",
890 .name = BDRV_OPT_CACHE_NO_FLUSH,
891 .type = QEMU_OPT_BOOL,
892 .help = "Ignore flush requests",
894 { /* end of list */ }
899 * Common part for opening disk images and files
901 * Removes all processed options from *options.
903 static int bdrv_open_common(BlockDriverState *bs, BdrvChild *file,
904 QDict *options, Error **errp)
906 int ret, open_flags;
907 const char *filename;
908 const char *driver_name = NULL;
909 const char *node_name = NULL;
910 QemuOpts *opts;
911 BlockDriver *drv;
912 Error *local_err = NULL;
914 assert(bs->file == NULL);
915 assert(options != NULL && bs->options != options);
917 opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
918 qemu_opts_absorb_qdict(opts, options, &local_err);
919 if (local_err) {
920 error_propagate(errp, local_err);
921 ret = -EINVAL;
922 goto fail_opts;
925 driver_name = qemu_opt_get(opts, "driver");
926 drv = bdrv_find_format(driver_name);
927 assert(drv != NULL);
929 if (file != NULL) {
930 filename = file->bs->filename;
931 } else {
932 filename = qdict_get_try_str(options, "filename");
935 if (drv->bdrv_needs_filename && !filename) {
936 error_setg(errp, "The '%s' block driver requires a file name",
937 drv->format_name);
938 ret = -EINVAL;
939 goto fail_opts;
942 trace_bdrv_open_common(bs, filename ?: "", bs->open_flags,
943 drv->format_name);
945 node_name = qemu_opt_get(opts, "node-name");
946 bdrv_assign_node_name(bs, node_name, &local_err);
947 if (local_err) {
948 error_propagate(errp, local_err);
949 ret = -EINVAL;
950 goto fail_opts;
953 bs->request_alignment = 512;
954 bs->zero_beyond_eof = true;
955 bs->read_only = !(bs->open_flags & BDRV_O_RDWR);
957 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
958 error_setg(errp,
959 !bs->read_only && bdrv_is_whitelisted(drv, true)
960 ? "Driver '%s' can only be used for read-only devices"
961 : "Driver '%s' is not whitelisted",
962 drv->format_name);
963 ret = -ENOTSUP;
964 goto fail_opts;
967 assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
968 if (bs->open_flags & BDRV_O_COPY_ON_READ) {
969 if (!bs->read_only) {
970 bdrv_enable_copy_on_read(bs);
971 } else {
972 error_setg(errp, "Can't use copy-on-read on read-only device");
973 ret = -EINVAL;
974 goto fail_opts;
978 if (filename != NULL) {
979 pstrcpy(bs->filename, sizeof(bs->filename), filename);
980 } else {
981 bs->filename[0] = '\0';
983 pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename);
985 bs->drv = drv;
986 bs->opaque = g_malloc0(drv->instance_size);
988 /* Apply cache mode options */
989 update_flags_from_options(&bs->open_flags, opts);
990 bdrv_set_enable_write_cache(bs, bs->open_flags & BDRV_O_CACHE_WB);
992 /* Open the image, either directly or using a protocol */
993 open_flags = bdrv_open_flags(bs, bs->open_flags);
994 if (drv->bdrv_file_open) {
995 assert(file == NULL);
996 assert(!drv->bdrv_needs_filename || filename != NULL);
997 ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
998 } else {
999 if (file == NULL) {
1000 error_setg(errp, "Can't use '%s' as a block driver for the "
1001 "protocol level", drv->format_name);
1002 ret = -EINVAL;
1003 goto free_and_fail;
1005 bs->file = file;
1006 ret = drv->bdrv_open(bs, options, open_flags, &local_err);
1009 if (ret < 0) {
1010 if (local_err) {
1011 error_propagate(errp, local_err);
1012 } else if (bs->filename[0]) {
1013 error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
1014 } else {
1015 error_setg_errno(errp, -ret, "Could not open image");
1017 goto free_and_fail;
1020 if (bs->encrypted) {
1021 error_report("Encrypted images are deprecated");
1022 error_printf("Support for them will be removed in a future release.\n"
1023 "You can use 'qemu-img convert' to convert your image"
1024 " to an unencrypted one.\n");
1027 ret = refresh_total_sectors(bs, bs->total_sectors);
1028 if (ret < 0) {
1029 error_setg_errno(errp, -ret, "Could not refresh total sector count");
1030 goto free_and_fail;
1033 bdrv_refresh_limits(bs, &local_err);
1034 if (local_err) {
1035 error_propagate(errp, local_err);
1036 ret = -EINVAL;
1037 goto free_and_fail;
1040 assert(bdrv_opt_mem_align(bs) != 0);
1041 assert(bdrv_min_mem_align(bs) != 0);
1042 assert((bs->request_alignment != 0) || bdrv_is_sg(bs));
1044 qemu_opts_del(opts);
1045 return 0;
1047 free_and_fail:
1048 bs->file = NULL;
1049 g_free(bs->opaque);
1050 bs->opaque = NULL;
1051 bs->drv = NULL;
1052 fail_opts:
1053 qemu_opts_del(opts);
1054 return ret;
1057 static QDict *parse_json_filename(const char *filename, Error **errp)
1059 QObject *options_obj;
1060 QDict *options;
1061 int ret;
1063 ret = strstart(filename, "json:", &filename);
1064 assert(ret);
1066 options_obj = qobject_from_json(filename);
1067 if (!options_obj) {
1068 error_setg(errp, "Could not parse the JSON options");
1069 return NULL;
1072 if (qobject_type(options_obj) != QTYPE_QDICT) {
1073 qobject_decref(options_obj);
1074 error_setg(errp, "Invalid JSON object given");
1075 return NULL;
1078 options = qobject_to_qdict(options_obj);
1079 qdict_flatten(options);
1081 return options;
1084 static void parse_json_protocol(QDict *options, const char **pfilename,
1085 Error **errp)
1087 QDict *json_options;
1088 Error *local_err = NULL;
1090 /* Parse json: pseudo-protocol */
1091 if (!*pfilename || !g_str_has_prefix(*pfilename, "json:")) {
1092 return;
1095 json_options = parse_json_filename(*pfilename, &local_err);
1096 if (local_err) {
1097 error_propagate(errp, local_err);
1098 return;
1101 /* Options given in the filename have lower priority than options
1102 * specified directly */
1103 qdict_join(options, json_options, false);
1104 QDECREF(json_options);
1105 *pfilename = NULL;
1109 * Fills in default options for opening images and converts the legacy
1110 * filename/flags pair to option QDict entries.
1111 * The BDRV_O_PROTOCOL flag in *flags will be set or cleared accordingly if a
1112 * block driver has been specified explicitly.
1114 static int bdrv_fill_options(QDict **options, const char *filename,
1115 int *flags, Error **errp)
1117 const char *drvname;
1118 bool protocol = *flags & BDRV_O_PROTOCOL;
1119 bool parse_filename = false;
1120 BlockDriver *drv = NULL;
1121 Error *local_err = NULL;
1123 drvname = qdict_get_try_str(*options, "driver");
1124 if (drvname) {
1125 drv = bdrv_find_format(drvname);
1126 if (!drv) {
1127 error_setg(errp, "Unknown driver '%s'", drvname);
1128 return -ENOENT;
1130 /* If the user has explicitly specified the driver, this choice should
1131 * override the BDRV_O_PROTOCOL flag */
1132 protocol = drv->bdrv_file_open;
1135 if (protocol) {
1136 *flags |= BDRV_O_PROTOCOL;
1137 } else {
1138 *flags &= ~BDRV_O_PROTOCOL;
1141 /* Translate cache options from flags into options */
1142 update_options_from_flags(*options, *flags);
1144 /* Fetch the file name from the options QDict if necessary */
1145 if (protocol && filename) {
1146 if (!qdict_haskey(*options, "filename")) {
1147 qdict_put(*options, "filename", qstring_from_str(filename));
1148 parse_filename = true;
1149 } else {
1150 error_setg(errp, "Can't specify 'file' and 'filename' options at "
1151 "the same time");
1152 return -EINVAL;
1156 /* Find the right block driver */
1157 filename = qdict_get_try_str(*options, "filename");
1159 if (!drvname && protocol) {
1160 if (filename) {
1161 drv = bdrv_find_protocol(filename, parse_filename, errp);
1162 if (!drv) {
1163 return -EINVAL;
1166 drvname = drv->format_name;
1167 qdict_put(*options, "driver", qstring_from_str(drvname));
1168 } else {
1169 error_setg(errp, "Must specify either driver or file");
1170 return -EINVAL;
1174 assert(drv || !protocol);
1176 /* Driver-specific filename parsing */
1177 if (drv && drv->bdrv_parse_filename && parse_filename) {
1178 drv->bdrv_parse_filename(filename, *options, &local_err);
1179 if (local_err) {
1180 error_propagate(errp, local_err);
1181 return -EINVAL;
1184 if (!drv->bdrv_needs_filename) {
1185 qdict_del(*options, "filename");
1189 if (runstate_check(RUN_STATE_INMIGRATE)) {
1190 *flags |= BDRV_O_INACTIVE;
1193 return 0;
1196 static BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
1197 BlockDriverState *child_bs,
1198 const char *child_name,
1199 const BdrvChildRole *child_role)
1201 BdrvChild *child = g_new(BdrvChild, 1);
1202 *child = (BdrvChild) {
1203 .bs = child_bs,
1204 .name = g_strdup(child_name),
1205 .role = child_role,
1208 QLIST_INSERT_HEAD(&parent_bs->children, child, next);
1209 QLIST_INSERT_HEAD(&child_bs->parents, child, next_parent);
1211 return child;
1214 static void bdrv_detach_child(BdrvChild *child)
1216 QLIST_REMOVE(child, next);
1217 QLIST_REMOVE(child, next_parent);
1218 g_free(child->name);
1219 g_free(child);
1222 void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child)
1224 BlockDriverState *child_bs;
1226 if (child == NULL) {
1227 return;
1230 if (child->bs->inherits_from == parent) {
1231 child->bs->inherits_from = NULL;
1234 child_bs = child->bs;
1235 bdrv_detach_child(child);
1236 bdrv_unref(child_bs);
1240 * Sets the backing file link of a BDS. A new reference is created; callers
1241 * which don't need their own reference any more must call bdrv_unref().
1243 void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
1245 if (backing_hd) {
1246 bdrv_ref(backing_hd);
1249 if (bs->backing) {
1250 assert(bs->backing_blocker);
1251 bdrv_op_unblock_all(bs->backing->bs, bs->backing_blocker);
1252 bdrv_unref_child(bs, bs->backing);
1253 } else if (backing_hd) {
1254 error_setg(&bs->backing_blocker,
1255 "node is used as backing hd of '%s'",
1256 bdrv_get_device_or_node_name(bs));
1259 if (!backing_hd) {
1260 error_free(bs->backing_blocker);
1261 bs->backing_blocker = NULL;
1262 bs->backing = NULL;
1263 goto out;
1265 bs->backing = bdrv_attach_child(bs, backing_hd, "backing", &child_backing);
1266 bs->open_flags &= ~BDRV_O_NO_BACKING;
1267 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename);
1268 pstrcpy(bs->backing_format, sizeof(bs->backing_format),
1269 backing_hd->drv ? backing_hd->drv->format_name : "");
1271 bdrv_op_block_all(backing_hd, bs->backing_blocker);
1272 /* Otherwise we won't be able to commit due to check in bdrv_commit */
1273 bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
1274 bs->backing_blocker);
1275 out:
1276 bdrv_refresh_limits(bs, NULL);
1280 * Opens the backing file for a BlockDriverState if not yet open
1282 * bdref_key specifies the key for the image's BlockdevRef in the options QDict.
1283 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1284 * itself, all options starting with "${bdref_key}." are considered part of the
1285 * BlockdevRef.
1287 * TODO Can this be unified with bdrv_open_image()?
1289 int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
1290 const char *bdref_key, Error **errp)
1292 char *backing_filename = g_malloc0(PATH_MAX);
1293 char *bdref_key_dot;
1294 const char *reference = NULL;
1295 int ret = 0;
1296 BlockDriverState *backing_hd;
1297 QDict *options;
1298 QDict *tmp_parent_options = NULL;
1299 Error *local_err = NULL;
1301 if (bs->backing != NULL) {
1302 goto free_exit;
1305 /* NULL means an empty set of options */
1306 if (parent_options == NULL) {
1307 tmp_parent_options = qdict_new();
1308 parent_options = tmp_parent_options;
1311 bs->open_flags &= ~BDRV_O_NO_BACKING;
1313 bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1314 qdict_extract_subqdict(parent_options, &options, bdref_key_dot);
1315 g_free(bdref_key_dot);
1317 reference = qdict_get_try_str(parent_options, bdref_key);
1318 if (reference || qdict_haskey(options, "file.filename")) {
1319 backing_filename[0] = '\0';
1320 } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
1321 QDECREF(options);
1322 goto free_exit;
1323 } else {
1324 bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX,
1325 &local_err);
1326 if (local_err) {
1327 ret = -EINVAL;
1328 error_propagate(errp, local_err);
1329 QDECREF(options);
1330 goto free_exit;
1334 if (!bs->drv || !bs->drv->supports_backing) {
1335 ret = -EINVAL;
1336 error_setg(errp, "Driver doesn't support backing files");
1337 QDECREF(options);
1338 goto free_exit;
1341 if (bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) {
1342 qdict_put(options, "driver", qstring_from_str(bs->backing_format));
1345 backing_hd = NULL;
1346 ret = bdrv_open_inherit(&backing_hd,
1347 *backing_filename ? backing_filename : NULL,
1348 reference, options, 0, bs, &child_backing,
1349 errp);
1350 if (ret < 0) {
1351 bs->open_flags |= BDRV_O_NO_BACKING;
1352 error_prepend(errp, "Could not open backing file: ");
1353 goto free_exit;
1356 /* Hook up the backing file link; drop our reference, bs owns the
1357 * backing_hd reference now */
1358 bdrv_set_backing_hd(bs, backing_hd);
1359 bdrv_unref(backing_hd);
1361 qdict_del(parent_options, bdref_key);
1363 free_exit:
1364 g_free(backing_filename);
1365 QDECREF(tmp_parent_options);
1366 return ret;
1370 * Opens a disk image whose options are given as BlockdevRef in another block
1371 * device's options.
1373 * If allow_none is true, no image will be opened if filename is false and no
1374 * BlockdevRef is given. NULL will be returned, but errp remains unset.
1376 * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
1377 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1378 * itself, all options starting with "${bdref_key}." are considered part of the
1379 * BlockdevRef.
1381 * The BlockdevRef will be removed from the options QDict.
1383 BdrvChild *bdrv_open_child(const char *filename,
1384 QDict *options, const char *bdref_key,
1385 BlockDriverState* parent,
1386 const BdrvChildRole *child_role,
1387 bool allow_none, Error **errp)
1389 BdrvChild *c = NULL;
1390 BlockDriverState *bs;
1391 QDict *image_options;
1392 int ret;
1393 char *bdref_key_dot;
1394 const char *reference;
1396 assert(child_role != NULL);
1398 bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1399 qdict_extract_subqdict(options, &image_options, bdref_key_dot);
1400 g_free(bdref_key_dot);
1402 reference = qdict_get_try_str(options, bdref_key);
1403 if (!filename && !reference && !qdict_size(image_options)) {
1404 if (!allow_none) {
1405 error_setg(errp, "A block device must be specified for \"%s\"",
1406 bdref_key);
1408 QDECREF(image_options);
1409 goto done;
1412 bs = NULL;
1413 ret = bdrv_open_inherit(&bs, filename, reference, image_options, 0,
1414 parent, child_role, errp);
1415 if (ret < 0) {
1416 goto done;
1419 c = bdrv_attach_child(parent, bs, bdref_key, child_role);
1421 done:
1422 qdict_del(options, bdref_key);
1423 return c;
1426 int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
1428 /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
1429 char *tmp_filename = g_malloc0(PATH_MAX + 1);
1430 int64_t total_size;
1431 QemuOpts *opts = NULL;
1432 QDict *snapshot_options;
1433 BlockDriverState *bs_snapshot;
1434 Error *local_err = NULL;
1435 int ret;
1437 /* if snapshot, we create a temporary backing file and open it
1438 instead of opening 'filename' directly */
1440 /* Get the required size from the image */
1441 total_size = bdrv_getlength(bs);
1442 if (total_size < 0) {
1443 ret = total_size;
1444 error_setg_errno(errp, -total_size, "Could not get image size");
1445 goto out;
1448 /* Create the temporary image */
1449 ret = get_tmp_filename(tmp_filename, PATH_MAX + 1);
1450 if (ret < 0) {
1451 error_setg_errno(errp, -ret, "Could not get temporary filename");
1452 goto out;
1455 opts = qemu_opts_create(bdrv_qcow2.create_opts, NULL, 0,
1456 &error_abort);
1457 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size, &error_abort);
1458 ret = bdrv_create(&bdrv_qcow2, tmp_filename, opts, errp);
1459 qemu_opts_del(opts);
1460 if (ret < 0) {
1461 error_prepend(errp, "Could not create temporary overlay '%s': ",
1462 tmp_filename);
1463 goto out;
1466 /* Prepare a new options QDict for the temporary file */
1467 snapshot_options = qdict_new();
1468 qdict_put(snapshot_options, "file.driver",
1469 qstring_from_str("file"));
1470 qdict_put(snapshot_options, "file.filename",
1471 qstring_from_str(tmp_filename));
1472 qdict_put(snapshot_options, "driver",
1473 qstring_from_str("qcow2"));
1475 bs_snapshot = bdrv_new();
1477 ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options,
1478 flags, &local_err);
1479 if (ret < 0) {
1480 error_propagate(errp, local_err);
1481 goto out;
1484 bdrv_append(bs_snapshot, bs);
1486 out:
1487 g_free(tmp_filename);
1488 return ret;
1492 * Opens a disk image (raw, qcow2, vmdk, ...)
1494 * options is a QDict of options to pass to the block drivers, or NULL for an
1495 * empty set of options. The reference to the QDict belongs to the block layer
1496 * after the call (even on failure), so if the caller intends to reuse the
1497 * dictionary, it needs to use QINCREF() before calling bdrv_open.
1499 * If *pbs is NULL, a new BDS will be created with a pointer to it stored there.
1500 * If it is not NULL, the referenced BDS will be reused.
1502 * The reference parameter may be used to specify an existing block device which
1503 * should be opened. If specified, neither options nor a filename may be given,
1504 * nor can an existing BDS be reused (that is, *pbs has to be NULL).
1506 static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
1507 const char *reference, QDict *options, int flags,
1508 BlockDriverState *parent,
1509 const BdrvChildRole *child_role, Error **errp)
1511 int ret;
1512 BdrvChild *file = NULL;
1513 BlockDriverState *bs;
1514 BlockDriver *drv = NULL;
1515 const char *drvname;
1516 const char *backing;
1517 Error *local_err = NULL;
1518 int snapshot_flags = 0;
1520 assert(pbs);
1521 assert(!child_role || !flags);
1522 assert(!child_role == !parent);
1524 if (reference) {
1525 bool options_non_empty = options ? qdict_size(options) : false;
1526 QDECREF(options);
1528 if (*pbs) {
1529 error_setg(errp, "Cannot reuse an existing BDS when referencing "
1530 "another block device");
1531 return -EINVAL;
1534 if (filename || options_non_empty) {
1535 error_setg(errp, "Cannot reference an existing block device with "
1536 "additional options or a new filename");
1537 return -EINVAL;
1540 bs = bdrv_lookup_bs(reference, reference, errp);
1541 if (!bs) {
1542 return -ENODEV;
1544 bdrv_ref(bs);
1545 *pbs = bs;
1546 return 0;
1549 if (*pbs) {
1550 bs = *pbs;
1551 } else {
1552 bs = bdrv_new();
1555 /* NULL means an empty set of options */
1556 if (options == NULL) {
1557 options = qdict_new();
1560 /* json: syntax counts as explicit options, as if in the QDict */
1561 parse_json_protocol(options, &filename, &local_err);
1562 if (local_err) {
1563 ret = -EINVAL;
1564 goto fail;
1567 bs->explicit_options = qdict_clone_shallow(options);
1569 if (child_role) {
1570 bs->inherits_from = parent;
1571 child_role->inherit_options(&flags, options,
1572 parent->open_flags, parent->options);
1575 ret = bdrv_fill_options(&options, filename, &flags, &local_err);
1576 if (local_err) {
1577 goto fail;
1580 bs->open_flags = flags;
1581 bs->options = options;
1582 options = qdict_clone_shallow(options);
1584 /* Find the right image format driver */
1585 drvname = qdict_get_try_str(options, "driver");
1586 if (drvname) {
1587 drv = bdrv_find_format(drvname);
1588 if (!drv) {
1589 error_setg(errp, "Unknown driver: '%s'", drvname);
1590 ret = -EINVAL;
1591 goto fail;
1595 assert(drvname || !(flags & BDRV_O_PROTOCOL));
1597 backing = qdict_get_try_str(options, "backing");
1598 if (backing && *backing == '\0') {
1599 flags |= BDRV_O_NO_BACKING;
1600 qdict_del(options, "backing");
1603 /* Open image file without format layer */
1604 if ((flags & BDRV_O_PROTOCOL) == 0) {
1605 if (flags & BDRV_O_RDWR) {
1606 flags |= BDRV_O_ALLOW_RDWR;
1608 if (flags & BDRV_O_SNAPSHOT) {
1609 snapshot_flags = bdrv_temp_snapshot_flags(flags);
1610 bdrv_backing_options(&flags, options, flags, options);
1613 bs->open_flags = flags;
1615 file = bdrv_open_child(filename, options, "file", bs,
1616 &child_file, true, &local_err);
1617 if (local_err) {
1618 ret = -EINVAL;
1619 goto fail;
1623 /* Image format probing */
1624 bs->probed = !drv;
1625 if (!drv && file) {
1626 ret = find_image_format(file->bs, filename, &drv, &local_err);
1627 if (ret < 0) {
1628 goto fail;
1631 * This option update would logically belong in bdrv_fill_options(),
1632 * but we first need to open bs->file for the probing to work, while
1633 * opening bs->file already requires the (mostly) final set of options
1634 * so that cache mode etc. can be inherited.
1636 * Adding the driver later is somewhat ugly, but it's not an option
1637 * that would ever be inherited, so it's correct. We just need to make
1638 * sure to update both bs->options (which has the full effective
1639 * options for bs) and options (which has file.* already removed).
1641 qdict_put(bs->options, "driver", qstring_from_str(drv->format_name));
1642 qdict_put(options, "driver", qstring_from_str(drv->format_name));
1643 } else if (!drv) {
1644 error_setg(errp, "Must specify either driver or file");
1645 ret = -EINVAL;
1646 goto fail;
1649 /* BDRV_O_PROTOCOL must be set iff a protocol BDS is about to be created */
1650 assert(!!(flags & BDRV_O_PROTOCOL) == !!drv->bdrv_file_open);
1651 /* file must be NULL if a protocol BDS is about to be created
1652 * (the inverse results in an error message from bdrv_open_common()) */
1653 assert(!(flags & BDRV_O_PROTOCOL) || !file);
1655 /* Open the image */
1656 ret = bdrv_open_common(bs, file, options, &local_err);
1657 if (ret < 0) {
1658 goto fail;
1661 if (file && (bs->file != file)) {
1662 bdrv_unref_child(bs, file);
1663 file = NULL;
1666 /* If there is a backing file, use it */
1667 if ((flags & BDRV_O_NO_BACKING) == 0) {
1668 ret = bdrv_open_backing_file(bs, options, "backing", &local_err);
1669 if (ret < 0) {
1670 goto close_and_fail;
1674 bdrv_refresh_filename(bs);
1676 /* Check if any unknown options were used */
1677 if (options && (qdict_size(options) != 0)) {
1678 const QDictEntry *entry = qdict_first(options);
1679 if (flags & BDRV_O_PROTOCOL) {
1680 error_setg(errp, "Block protocol '%s' doesn't support the option "
1681 "'%s'", drv->format_name, entry->key);
1682 } else {
1683 error_setg(errp, "Block format '%s' used by device '%s' doesn't "
1684 "support the option '%s'", drv->format_name,
1685 bdrv_get_device_name(bs), entry->key);
1688 ret = -EINVAL;
1689 goto close_and_fail;
1692 if (!bdrv_key_required(bs)) {
1693 if (bs->blk) {
1694 blk_dev_change_media_cb(bs->blk, true);
1696 } else if (!runstate_check(RUN_STATE_PRELAUNCH)
1697 && !runstate_check(RUN_STATE_INMIGRATE)
1698 && !runstate_check(RUN_STATE_PAUSED)) { /* HACK */
1699 error_setg(errp,
1700 "Guest must be stopped for opening of encrypted image");
1701 ret = -EBUSY;
1702 goto close_and_fail;
1705 QDECREF(options);
1706 *pbs = bs;
1708 /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
1709 * temporary snapshot afterwards. */
1710 if (snapshot_flags) {
1711 ret = bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err);
1712 if (local_err) {
1713 goto close_and_fail;
1717 return 0;
1719 fail:
1720 if (file != NULL) {
1721 bdrv_unref_child(bs, file);
1723 QDECREF(bs->explicit_options);
1724 QDECREF(bs->options);
1725 QDECREF(options);
1726 bs->options = NULL;
1727 if (!*pbs) {
1728 /* If *pbs is NULL, a new BDS has been created in this function and
1729 needs to be freed now. Otherwise, it does not need to be closed,
1730 since it has not really been opened yet. */
1731 bdrv_unref(bs);
1733 if (local_err) {
1734 error_propagate(errp, local_err);
1736 return ret;
1738 close_and_fail:
1739 /* See fail path, but now the BDS has to be always closed */
1740 if (*pbs) {
1741 bdrv_close(bs);
1742 } else {
1743 bdrv_unref(bs);
1745 QDECREF(options);
1746 if (local_err) {
1747 error_propagate(errp, local_err);
1749 return ret;
1752 int bdrv_open(BlockDriverState **pbs, const char *filename,
1753 const char *reference, QDict *options, int flags, Error **errp)
1755 return bdrv_open_inherit(pbs, filename, reference, options, flags, NULL,
1756 NULL, errp);
1759 typedef struct BlockReopenQueueEntry {
1760 bool prepared;
1761 BDRVReopenState state;
1762 QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
1763 } BlockReopenQueueEntry;
1766 * Adds a BlockDriverState to a simple queue for an atomic, transactional
1767 * reopen of multiple devices.
1769 * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
1770 * already performed, or alternatively may be NULL a new BlockReopenQueue will
1771 * be created and initialized. This newly created BlockReopenQueue should be
1772 * passed back in for subsequent calls that are intended to be of the same
1773 * atomic 'set'.
1775 * bs is the BlockDriverState to add to the reopen queue.
1777 * options contains the changed options for the associated bs
1778 * (the BlockReopenQueue takes ownership)
1780 * flags contains the open flags for the associated bs
1782 * returns a pointer to bs_queue, which is either the newly allocated
1783 * bs_queue, or the existing bs_queue being used.
1786 static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
1787 BlockDriverState *bs,
1788 QDict *options,
1789 int flags,
1790 const BdrvChildRole *role,
1791 QDict *parent_options,
1792 int parent_flags)
1794 assert(bs != NULL);
1796 BlockReopenQueueEntry *bs_entry;
1797 BdrvChild *child;
1798 QDict *old_options, *explicit_options;
1800 if (bs_queue == NULL) {
1801 bs_queue = g_new0(BlockReopenQueue, 1);
1802 QSIMPLEQ_INIT(bs_queue);
1805 if (!options) {
1806 options = qdict_new();
1810 * Precedence of options:
1811 * 1. Explicitly passed in options (highest)
1812 * 2. Set in flags (only for top level)
1813 * 3. Retained from explicitly set options of bs
1814 * 4. Inherited from parent node
1815 * 5. Retained from effective options of bs
1818 if (!parent_options) {
1820 * Any setting represented by flags is always updated. If the
1821 * corresponding QDict option is set, it takes precedence. Otherwise
1822 * the flag is translated into a QDict option. The old setting of bs is
1823 * not considered.
1825 update_options_from_flags(options, flags);
1828 /* Old explicitly set values (don't overwrite by inherited value) */
1829 old_options = qdict_clone_shallow(bs->explicit_options);
1830 bdrv_join_options(bs, options, old_options);
1831 QDECREF(old_options);
1833 explicit_options = qdict_clone_shallow(options);
1835 /* Inherit from parent node */
1836 if (parent_options) {
1837 assert(!flags);
1838 role->inherit_options(&flags, options, parent_flags, parent_options);
1841 /* Old values are used for options that aren't set yet */
1842 old_options = qdict_clone_shallow(bs->options);
1843 bdrv_join_options(bs, options, old_options);
1844 QDECREF(old_options);
1846 /* bdrv_open() masks this flag out */
1847 flags &= ~BDRV_O_PROTOCOL;
1849 QLIST_FOREACH(child, &bs->children, next) {
1850 QDict *new_child_options;
1851 char *child_key_dot;
1853 /* reopen can only change the options of block devices that were
1854 * implicitly created and inherited options. For other (referenced)
1855 * block devices, a syntax like "backing.foo" results in an error. */
1856 if (child->bs->inherits_from != bs) {
1857 continue;
1860 child_key_dot = g_strdup_printf("%s.", child->name);
1861 qdict_extract_subqdict(options, &new_child_options, child_key_dot);
1862 g_free(child_key_dot);
1864 bdrv_reopen_queue_child(bs_queue, child->bs, new_child_options, 0,
1865 child->role, options, flags);
1868 bs_entry = g_new0(BlockReopenQueueEntry, 1);
1869 QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
1871 bs_entry->state.bs = bs;
1872 bs_entry->state.options = options;
1873 bs_entry->state.explicit_options = explicit_options;
1874 bs_entry->state.flags = flags;
1876 return bs_queue;
1879 BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
1880 BlockDriverState *bs,
1881 QDict *options, int flags)
1883 return bdrv_reopen_queue_child(bs_queue, bs, options, flags,
1884 NULL, NULL, 0);
1888 * Reopen multiple BlockDriverStates atomically & transactionally.
1890 * The queue passed in (bs_queue) must have been built up previous
1891 * via bdrv_reopen_queue().
1893 * Reopens all BDS specified in the queue, with the appropriate
1894 * flags. All devices are prepared for reopen, and failure of any
1895 * device will cause all device changes to be abandonded, and intermediate
1896 * data cleaned up.
1898 * If all devices prepare successfully, then the changes are committed
1899 * to all devices.
1902 int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
1904 int ret = -1;
1905 BlockReopenQueueEntry *bs_entry, *next;
1906 Error *local_err = NULL;
1908 assert(bs_queue != NULL);
1910 bdrv_drain_all();
1912 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1913 if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
1914 error_propagate(errp, local_err);
1915 goto cleanup;
1917 bs_entry->prepared = true;
1920 /* If we reach this point, we have success and just need to apply the
1921 * changes
1923 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1924 bdrv_reopen_commit(&bs_entry->state);
1927 ret = 0;
1929 cleanup:
1930 QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
1931 if (ret && bs_entry->prepared) {
1932 bdrv_reopen_abort(&bs_entry->state);
1933 } else if (ret) {
1934 QDECREF(bs_entry->state.explicit_options);
1936 QDECREF(bs_entry->state.options);
1937 g_free(bs_entry);
1939 g_free(bs_queue);
1940 return ret;
1944 /* Reopen a single BlockDriverState with the specified flags. */
1945 int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
1947 int ret = -1;
1948 Error *local_err = NULL;
1949 BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, NULL, bdrv_flags);
1951 ret = bdrv_reopen_multiple(queue, &local_err);
1952 if (local_err != NULL) {
1953 error_propagate(errp, local_err);
1955 return ret;
1960 * Prepares a BlockDriverState for reopen. All changes are staged in the
1961 * 'opaque' field of the BDRVReopenState, which is used and allocated by
1962 * the block driver layer .bdrv_reopen_prepare()
1964 * bs is the BlockDriverState to reopen
1965 * flags are the new open flags
1966 * queue is the reopen queue
1968 * Returns 0 on success, non-zero on error. On error errp will be set
1969 * as well.
1971 * On failure, bdrv_reopen_abort() will be called to clean up any data.
1972 * It is the responsibility of the caller to then call the abort() or
1973 * commit() for any other BDS that have been left in a prepare() state
1976 int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
1977 Error **errp)
1979 int ret = -1;
1980 Error *local_err = NULL;
1981 BlockDriver *drv;
1982 QemuOpts *opts;
1983 const char *value;
1985 assert(reopen_state != NULL);
1986 assert(reopen_state->bs->drv != NULL);
1987 drv = reopen_state->bs->drv;
1989 /* Process generic block layer options */
1990 opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
1991 qemu_opts_absorb_qdict(opts, reopen_state->options, &local_err);
1992 if (local_err) {
1993 error_propagate(errp, local_err);
1994 ret = -EINVAL;
1995 goto error;
1998 update_flags_from_options(&reopen_state->flags, opts);
2000 /* If a guest device is attached, it owns WCE */
2001 if (reopen_state->bs->blk && blk_get_attached_dev(reopen_state->bs->blk)) {
2002 bool old_wce = bdrv_enable_write_cache(reopen_state->bs);
2003 bool new_wce = (reopen_state->flags & BDRV_O_CACHE_WB);
2004 if (old_wce != new_wce) {
2005 error_setg(errp, "Cannot change cache.writeback: Device attached");
2006 ret = -EINVAL;
2007 goto error;
2011 /* node-name and driver must be unchanged. Put them back into the QDict, so
2012 * that they are checked at the end of this function. */
2013 value = qemu_opt_get(opts, "node-name");
2014 if (value) {
2015 qdict_put(reopen_state->options, "node-name", qstring_from_str(value));
2018 value = qemu_opt_get(opts, "driver");
2019 if (value) {
2020 qdict_put(reopen_state->options, "driver", qstring_from_str(value));
2023 /* if we are to stay read-only, do not allow permission change
2024 * to r/w */
2025 if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
2026 reopen_state->flags & BDRV_O_RDWR) {
2027 error_setg(errp, "Node '%s' is read only",
2028 bdrv_get_device_or_node_name(reopen_state->bs));
2029 goto error;
2033 ret = bdrv_flush(reopen_state->bs);
2034 if (ret) {
2035 error_setg_errno(errp, -ret, "Error flushing drive");
2036 goto error;
2039 if (drv->bdrv_reopen_prepare) {
2040 ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
2041 if (ret) {
2042 if (local_err != NULL) {
2043 error_propagate(errp, local_err);
2044 } else {
2045 error_setg(errp, "failed while preparing to reopen image '%s'",
2046 reopen_state->bs->filename);
2048 goto error;
2050 } else {
2051 /* It is currently mandatory to have a bdrv_reopen_prepare()
2052 * handler for each supported drv. */
2053 error_setg(errp, "Block format '%s' used by node '%s' "
2054 "does not support reopening files", drv->format_name,
2055 bdrv_get_device_or_node_name(reopen_state->bs));
2056 ret = -1;
2057 goto error;
2060 /* Options that are not handled are only okay if they are unchanged
2061 * compared to the old state. It is expected that some options are only
2062 * used for the initial open, but not reopen (e.g. filename) */
2063 if (qdict_size(reopen_state->options)) {
2064 const QDictEntry *entry = qdict_first(reopen_state->options);
2066 do {
2067 QString *new_obj = qobject_to_qstring(entry->value);
2068 const char *new = qstring_get_str(new_obj);
2069 const char *old = qdict_get_try_str(reopen_state->bs->options,
2070 entry->key);
2072 if (!old || strcmp(new, old)) {
2073 error_setg(errp, "Cannot change the option '%s'", entry->key);
2074 ret = -EINVAL;
2075 goto error;
2077 } while ((entry = qdict_next(reopen_state->options, entry)));
2080 ret = 0;
2082 error:
2083 qemu_opts_del(opts);
2084 return ret;
2088 * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
2089 * makes them final by swapping the staging BlockDriverState contents into
2090 * the active BlockDriverState contents.
2092 void bdrv_reopen_commit(BDRVReopenState *reopen_state)
2094 BlockDriver *drv;
2096 assert(reopen_state != NULL);
2097 drv = reopen_state->bs->drv;
2098 assert(drv != NULL);
2100 /* If there are any driver level actions to take */
2101 if (drv->bdrv_reopen_commit) {
2102 drv->bdrv_reopen_commit(reopen_state);
2105 /* set BDS specific flags now */
2106 QDECREF(reopen_state->bs->explicit_options);
2108 reopen_state->bs->explicit_options = reopen_state->explicit_options;
2109 reopen_state->bs->open_flags = reopen_state->flags;
2110 reopen_state->bs->enable_write_cache = !!(reopen_state->flags &
2111 BDRV_O_CACHE_WB);
2112 reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
2114 bdrv_refresh_limits(reopen_state->bs, NULL);
2118 * Abort the reopen, and delete and free the staged changes in
2119 * reopen_state
2121 void bdrv_reopen_abort(BDRVReopenState *reopen_state)
2123 BlockDriver *drv;
2125 assert(reopen_state != NULL);
2126 drv = reopen_state->bs->drv;
2127 assert(drv != NULL);
2129 if (drv->bdrv_reopen_abort) {
2130 drv->bdrv_reopen_abort(reopen_state);
2133 QDECREF(reopen_state->explicit_options);
2137 void bdrv_close(BlockDriverState *bs)
2139 BdrvAioNotifier *ban, *ban_next;
2141 if (bs->job) {
2142 block_job_cancel_sync(bs->job);
2145 /* Disable I/O limits and drain all pending throttled requests */
2146 if (bs->throttle_state) {
2147 bdrv_io_limits_disable(bs);
2150 bdrv_drained_begin(bs); /* complete I/O */
2151 bdrv_flush(bs);
2152 bdrv_drain(bs); /* in case flush left pending I/O */
2154 bdrv_release_named_dirty_bitmaps(bs);
2155 assert(QLIST_EMPTY(&bs->dirty_bitmaps));
2157 if (bs->blk) {
2158 blk_dev_change_media_cb(bs->blk, false);
2161 if (bs->drv) {
2162 BdrvChild *child, *next;
2164 bs->drv->bdrv_close(bs);
2165 bs->drv = NULL;
2167 bdrv_set_backing_hd(bs, NULL);
2169 if (bs->file != NULL) {
2170 bdrv_unref_child(bs, bs->file);
2171 bs->file = NULL;
2174 QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
2175 /* TODO Remove bdrv_unref() from drivers' close function and use
2176 * bdrv_unref_child() here */
2177 if (child->bs->inherits_from == bs) {
2178 child->bs->inherits_from = NULL;
2180 bdrv_detach_child(child);
2183 g_free(bs->opaque);
2184 bs->opaque = NULL;
2185 bs->copy_on_read = 0;
2186 bs->backing_file[0] = '\0';
2187 bs->backing_format[0] = '\0';
2188 bs->total_sectors = 0;
2189 bs->encrypted = 0;
2190 bs->valid_key = 0;
2191 bs->sg = 0;
2192 bs->zero_beyond_eof = false;
2193 QDECREF(bs->options);
2194 QDECREF(bs->explicit_options);
2195 bs->options = NULL;
2196 QDECREF(bs->full_open_options);
2197 bs->full_open_options = NULL;
2200 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
2201 g_free(ban);
2203 QLIST_INIT(&bs->aio_notifiers);
2204 bdrv_drained_end(bs);
2207 void bdrv_close_all(void)
2209 BlockDriverState *bs;
2211 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
2212 AioContext *aio_context = bdrv_get_aio_context(bs);
2214 aio_context_acquire(aio_context);
2215 bdrv_close(bs);
2216 aio_context_release(aio_context);
2220 /* make a BlockDriverState anonymous by removing from bdrv_state and
2221 * graph_bdrv_state list.
2222 Also, NULL terminate the device_name to prevent double remove */
2223 void bdrv_make_anon(BlockDriverState *bs)
2226 * Take care to remove bs from bdrv_states only when it's actually
2227 * in it. Note that bs->device_list.tqe_prev is initially null,
2228 * and gets set to non-null by QTAILQ_INSERT_TAIL(). Establish
2229 * the useful invariant "bs in bdrv_states iff bs->tqe_prev" by
2230 * resetting it to null on remove.
2232 if (bs->device_list.tqe_prev) {
2233 QTAILQ_REMOVE(&bdrv_states, bs, device_list);
2234 bs->device_list.tqe_prev = NULL;
2236 if (bs->node_name[0] != '\0') {
2237 QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
2239 bs->node_name[0] = '\0';
2242 /* Fields that need to stay with the top-level BDS */
2243 static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
2244 BlockDriverState *bs_src)
2246 /* move some fields that need to stay attached to the device */
2248 /* dev info */
2249 bs_dest->copy_on_read = bs_src->copy_on_read;
2251 bs_dest->enable_write_cache = bs_src->enable_write_cache;
2253 /* dirty bitmap */
2254 bs_dest->dirty_bitmaps = bs_src->dirty_bitmaps;
2257 static void change_parent_backing_link(BlockDriverState *from,
2258 BlockDriverState *to)
2260 BdrvChild *c, *next;
2262 QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) {
2263 assert(c->role != &child_backing);
2264 c->bs = to;
2265 QLIST_REMOVE(c, next_parent);
2266 QLIST_INSERT_HEAD(&to->parents, c, next_parent);
2267 bdrv_ref(to);
2268 bdrv_unref(from);
2270 if (from->blk) {
2271 blk_set_bs(from->blk, to);
2272 if (!to->device_list.tqe_prev) {
2273 QTAILQ_INSERT_BEFORE(from, to, device_list);
2275 QTAILQ_REMOVE(&bdrv_states, from, device_list);
2279 static void swap_feature_fields(BlockDriverState *bs_top,
2280 BlockDriverState *bs_new)
2282 BlockDriverState tmp;
2284 bdrv_move_feature_fields(&tmp, bs_top);
2285 bdrv_move_feature_fields(bs_top, bs_new);
2286 bdrv_move_feature_fields(bs_new, &tmp);
2288 assert(!bs_new->throttle_state);
2289 if (bs_top->throttle_state) {
2290 assert(bs_top->io_limits_enabled);
2291 bdrv_io_limits_enable(bs_new, throttle_group_get_name(bs_top));
2292 bdrv_io_limits_disable(bs_top);
2297 * Add new bs contents at the top of an image chain while the chain is
2298 * live, while keeping required fields on the top layer.
2300 * This will modify the BlockDriverState fields, and swap contents
2301 * between bs_new and bs_top. Both bs_new and bs_top are modified.
2303 * bs_new must not be attached to a BlockBackend.
2305 * This function does not create any image files.
2307 * bdrv_append() takes ownership of a bs_new reference and unrefs it because
2308 * that's what the callers commonly need. bs_new will be referenced by the old
2309 * parents of bs_top after bdrv_append() returns. If the caller needs to keep a
2310 * reference of its own, it must call bdrv_ref().
2312 void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
2314 assert(!bdrv_requests_pending(bs_top));
2315 assert(!bdrv_requests_pending(bs_new));
2317 bdrv_ref(bs_top);
2318 change_parent_backing_link(bs_top, bs_new);
2320 /* Some fields always stay on top of the backing file chain */
2321 swap_feature_fields(bs_top, bs_new);
2323 bdrv_set_backing_hd(bs_new, bs_top);
2324 bdrv_unref(bs_top);
2326 /* bs_new is now referenced by its new parents, we don't need the
2327 * additional reference any more. */
2328 bdrv_unref(bs_new);
2331 void bdrv_replace_in_backing_chain(BlockDriverState *old, BlockDriverState *new)
2333 assert(!bdrv_requests_pending(old));
2334 assert(!bdrv_requests_pending(new));
2336 bdrv_ref(old);
2338 if (old->blk) {
2339 /* As long as these fields aren't in BlockBackend, but in the top-level
2340 * BlockDriverState, it's not possible for a BDS to have two BBs.
2342 * We really want to copy the fields from old to new, but we go for a
2343 * swap instead so that pointers aren't duplicated and cause trouble.
2344 * (Also, bdrv_swap() used to do the same.) */
2345 assert(!new->blk);
2346 swap_feature_fields(old, new);
2348 change_parent_backing_link(old, new);
2350 /* Change backing files if a previously independent node is added to the
2351 * chain. For active commit, we replace top by its own (indirect) backing
2352 * file and don't do anything here so we don't build a loop. */
2353 if (new->backing == NULL && !bdrv_chain_contains(backing_bs(old), new)) {
2354 bdrv_set_backing_hd(new, backing_bs(old));
2355 bdrv_set_backing_hd(old, NULL);
2358 bdrv_unref(old);
2361 static void bdrv_delete(BlockDriverState *bs)
2363 assert(!bs->job);
2364 assert(bdrv_op_blocker_is_empty(bs));
2365 assert(!bs->refcnt);
2367 bdrv_close(bs);
2369 /* remove from list, if necessary */
2370 bdrv_make_anon(bs);
2372 g_free(bs);
2376 * Run consistency checks on an image
2378 * Returns 0 if the check could be completed (it doesn't mean that the image is
2379 * free of errors) or -errno when an internal error occurred. The results of the
2380 * check are stored in res.
2382 int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
2384 if (bs->drv == NULL) {
2385 return -ENOMEDIUM;
2387 if (bs->drv->bdrv_check == NULL) {
2388 return -ENOTSUP;
2391 memset(res, 0, sizeof(*res));
2392 return bs->drv->bdrv_check(bs, res, fix);
2395 #define COMMIT_BUF_SECTORS 2048
2397 /* commit COW file into the raw image */
2398 int bdrv_commit(BlockDriverState *bs)
2400 BlockDriver *drv = bs->drv;
2401 int64_t sector, total_sectors, length, backing_length;
2402 int n, ro, open_flags;
2403 int ret = 0;
2404 uint8_t *buf = NULL;
2406 if (!drv)
2407 return -ENOMEDIUM;
2409 if (!bs->backing) {
2410 return -ENOTSUP;
2413 if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) ||
2414 bdrv_op_is_blocked(bs->backing->bs, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) {
2415 return -EBUSY;
2418 ro = bs->backing->bs->read_only;
2419 open_flags = bs->backing->bs->open_flags;
2421 if (ro) {
2422 if (bdrv_reopen(bs->backing->bs, open_flags | BDRV_O_RDWR, NULL)) {
2423 return -EACCES;
2427 length = bdrv_getlength(bs);
2428 if (length < 0) {
2429 ret = length;
2430 goto ro_cleanup;
2433 backing_length = bdrv_getlength(bs->backing->bs);
2434 if (backing_length < 0) {
2435 ret = backing_length;
2436 goto ro_cleanup;
2439 /* If our top snapshot is larger than the backing file image,
2440 * grow the backing file image if possible. If not possible,
2441 * we must return an error */
2442 if (length > backing_length) {
2443 ret = bdrv_truncate(bs->backing->bs, length);
2444 if (ret < 0) {
2445 goto ro_cleanup;
2449 total_sectors = length >> BDRV_SECTOR_BITS;
2451 /* qemu_try_blockalign() for bs will choose an alignment that works for
2452 * bs->backing->bs as well, so no need to compare the alignment manually. */
2453 buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
2454 if (buf == NULL) {
2455 ret = -ENOMEM;
2456 goto ro_cleanup;
2459 for (sector = 0; sector < total_sectors; sector += n) {
2460 ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
2461 if (ret < 0) {
2462 goto ro_cleanup;
2464 if (ret) {
2465 ret = bdrv_read(bs, sector, buf, n);
2466 if (ret < 0) {
2467 goto ro_cleanup;
2470 ret = bdrv_write(bs->backing->bs, sector, buf, n);
2471 if (ret < 0) {
2472 goto ro_cleanup;
2477 if (drv->bdrv_make_empty) {
2478 ret = drv->bdrv_make_empty(bs);
2479 if (ret < 0) {
2480 goto ro_cleanup;
2482 bdrv_flush(bs);
2486 * Make sure all data we wrote to the backing device is actually
2487 * stable on disk.
2489 if (bs->backing) {
2490 bdrv_flush(bs->backing->bs);
2493 ret = 0;
2494 ro_cleanup:
2495 qemu_vfree(buf);
2497 if (ro) {
2498 /* ignoring error return here */
2499 bdrv_reopen(bs->backing->bs, open_flags & ~BDRV_O_RDWR, NULL);
2502 return ret;
2505 int bdrv_commit_all(void)
2507 BlockDriverState *bs;
2509 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
2510 AioContext *aio_context = bdrv_get_aio_context(bs);
2512 aio_context_acquire(aio_context);
2513 if (bs->drv && bs->backing) {
2514 int ret = bdrv_commit(bs);
2515 if (ret < 0) {
2516 aio_context_release(aio_context);
2517 return ret;
2520 aio_context_release(aio_context);
2522 return 0;
2526 * Return values:
2527 * 0 - success
2528 * -EINVAL - backing format specified, but no file
2529 * -ENOSPC - can't update the backing file because no space is left in the
2530 * image file header
2531 * -ENOTSUP - format driver doesn't support changing the backing file
2533 int bdrv_change_backing_file(BlockDriverState *bs,
2534 const char *backing_file, const char *backing_fmt)
2536 BlockDriver *drv = bs->drv;
2537 int ret;
2539 /* Backing file format doesn't make sense without a backing file */
2540 if (backing_fmt && !backing_file) {
2541 return -EINVAL;
2544 if (drv->bdrv_change_backing_file != NULL) {
2545 ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
2546 } else {
2547 ret = -ENOTSUP;
2550 if (ret == 0) {
2551 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
2552 pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
2554 return ret;
2558 * Finds the image layer in the chain that has 'bs' as its backing file.
2560 * active is the current topmost image.
2562 * Returns NULL if bs is not found in active's image chain,
2563 * or if active == bs.
2565 * Returns the bottommost base image if bs == NULL.
2567 BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
2568 BlockDriverState *bs)
2570 while (active && bs != backing_bs(active)) {
2571 active = backing_bs(active);
2574 return active;
2577 /* Given a BDS, searches for the base layer. */
2578 BlockDriverState *bdrv_find_base(BlockDriverState *bs)
2580 return bdrv_find_overlay(bs, NULL);
2584 * Drops images above 'base' up to and including 'top', and sets the image
2585 * above 'top' to have base as its backing file.
2587 * Requires that the overlay to 'top' is opened r/w, so that the backing file
2588 * information in 'bs' can be properly updated.
2590 * E.g., this will convert the following chain:
2591 * bottom <- base <- intermediate <- top <- active
2593 * to
2595 * bottom <- base <- active
2597 * It is allowed for bottom==base, in which case it converts:
2599 * base <- intermediate <- top <- active
2601 * to
2603 * base <- active
2605 * If backing_file_str is non-NULL, it will be used when modifying top's
2606 * overlay image metadata.
2608 * Error conditions:
2609 * if active == top, that is considered an error
2612 int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
2613 BlockDriverState *base, const char *backing_file_str)
2615 BlockDriverState *new_top_bs = NULL;
2616 int ret = -EIO;
2618 if (!top->drv || !base->drv) {
2619 goto exit;
2622 new_top_bs = bdrv_find_overlay(active, top);
2624 if (new_top_bs == NULL) {
2625 /* we could not find the image above 'top', this is an error */
2626 goto exit;
2629 /* special case of new_top_bs->backing->bs already pointing to base - nothing
2630 * to do, no intermediate images */
2631 if (backing_bs(new_top_bs) == base) {
2632 ret = 0;
2633 goto exit;
2636 /* Make sure that base is in the backing chain of top */
2637 if (!bdrv_chain_contains(top, base)) {
2638 goto exit;
2641 /* success - we can delete the intermediate states, and link top->base */
2642 backing_file_str = backing_file_str ? backing_file_str : base->filename;
2643 ret = bdrv_change_backing_file(new_top_bs, backing_file_str,
2644 base->drv ? base->drv->format_name : "");
2645 if (ret) {
2646 goto exit;
2648 bdrv_set_backing_hd(new_top_bs, base);
2650 ret = 0;
2651 exit:
2652 return ret;
2656 * Truncate file to 'offset' bytes (needed only for file protocols)
2658 int bdrv_truncate(BlockDriverState *bs, int64_t offset)
2660 BlockDriver *drv = bs->drv;
2661 int ret;
2662 if (!drv)
2663 return -ENOMEDIUM;
2664 if (!drv->bdrv_truncate)
2665 return -ENOTSUP;
2666 if (bs->read_only)
2667 return -EACCES;
2669 ret = drv->bdrv_truncate(bs, offset);
2670 if (ret == 0) {
2671 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
2672 bdrv_dirty_bitmap_truncate(bs);
2673 if (bs->blk) {
2674 blk_dev_resize_cb(bs->blk);
2677 return ret;
2681 * Length of a allocated file in bytes. Sparse files are counted by actual
2682 * allocated space. Return < 0 if error or unknown.
2684 int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
2686 BlockDriver *drv = bs->drv;
2687 if (!drv) {
2688 return -ENOMEDIUM;
2690 if (drv->bdrv_get_allocated_file_size) {
2691 return drv->bdrv_get_allocated_file_size(bs);
2693 if (bs->file) {
2694 return bdrv_get_allocated_file_size(bs->file->bs);
2696 return -ENOTSUP;
2700 * Return number of sectors on success, -errno on error.
2702 int64_t bdrv_nb_sectors(BlockDriverState *bs)
2704 BlockDriver *drv = bs->drv;
2706 if (!drv)
2707 return -ENOMEDIUM;
2709 if (drv->has_variable_length) {
2710 int ret = refresh_total_sectors(bs, bs->total_sectors);
2711 if (ret < 0) {
2712 return ret;
2715 return bs->total_sectors;
2719 * Return length in bytes on success, -errno on error.
2720 * The length is always a multiple of BDRV_SECTOR_SIZE.
2722 int64_t bdrv_getlength(BlockDriverState *bs)
2724 int64_t ret = bdrv_nb_sectors(bs);
2726 ret = ret > INT64_MAX / BDRV_SECTOR_SIZE ? -EFBIG : ret;
2727 return ret < 0 ? ret : ret * BDRV_SECTOR_SIZE;
2730 /* return 0 as number of sectors if no device present or error */
2731 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
2733 int64_t nb_sectors = bdrv_nb_sectors(bs);
2735 *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
2738 int bdrv_is_read_only(BlockDriverState *bs)
2740 return bs->read_only;
2743 int bdrv_is_sg(BlockDriverState *bs)
2745 return bs->sg;
2748 int bdrv_enable_write_cache(BlockDriverState *bs)
2750 return bs->enable_write_cache;
2753 void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)
2755 bs->enable_write_cache = wce;
2757 /* so a reopen() will preserve wce */
2758 if (wce) {
2759 bs->open_flags |= BDRV_O_CACHE_WB;
2760 } else {
2761 bs->open_flags &= ~BDRV_O_CACHE_WB;
2765 int bdrv_is_encrypted(BlockDriverState *bs)
2767 if (bs->backing && bs->backing->bs->encrypted) {
2768 return 1;
2770 return bs->encrypted;
2773 int bdrv_key_required(BlockDriverState *bs)
2775 BdrvChild *backing = bs->backing;
2777 if (backing && backing->bs->encrypted && !backing->bs->valid_key) {
2778 return 1;
2780 return (bs->encrypted && !bs->valid_key);
2783 int bdrv_set_key(BlockDriverState *bs, const char *key)
2785 int ret;
2786 if (bs->backing && bs->backing->bs->encrypted) {
2787 ret = bdrv_set_key(bs->backing->bs, key);
2788 if (ret < 0)
2789 return ret;
2790 if (!bs->encrypted)
2791 return 0;
2793 if (!bs->encrypted) {
2794 return -EINVAL;
2795 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
2796 return -ENOMEDIUM;
2798 ret = bs->drv->bdrv_set_key(bs, key);
2799 if (ret < 0) {
2800 bs->valid_key = 0;
2801 } else if (!bs->valid_key) {
2802 bs->valid_key = 1;
2803 if (bs->blk) {
2804 /* call the change callback now, we skipped it on open */
2805 blk_dev_change_media_cb(bs->blk, true);
2808 return ret;
2812 * Provide an encryption key for @bs.
2813 * If @key is non-null:
2814 * If @bs is not encrypted, fail.
2815 * Else if the key is invalid, fail.
2816 * Else set @bs's key to @key, replacing the existing key, if any.
2817 * If @key is null:
2818 * If @bs is encrypted and still lacks a key, fail.
2819 * Else do nothing.
2820 * On failure, store an error object through @errp if non-null.
2822 void bdrv_add_key(BlockDriverState *bs, const char *key, Error **errp)
2824 if (key) {
2825 if (!bdrv_is_encrypted(bs)) {
2826 error_setg(errp, "Node '%s' is not encrypted",
2827 bdrv_get_device_or_node_name(bs));
2828 } else if (bdrv_set_key(bs, key) < 0) {
2829 error_setg(errp, QERR_INVALID_PASSWORD);
2831 } else {
2832 if (bdrv_key_required(bs)) {
2833 error_set(errp, ERROR_CLASS_DEVICE_ENCRYPTED,
2834 "'%s' (%s) is encrypted",
2835 bdrv_get_device_or_node_name(bs),
2836 bdrv_get_encrypted_filename(bs));
2841 const char *bdrv_get_format_name(BlockDriverState *bs)
2843 return bs->drv ? bs->drv->format_name : NULL;
2846 static int qsort_strcmp(const void *a, const void *b)
2848 return strcmp(a, b);
2851 void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
2852 void *opaque)
2854 BlockDriver *drv;
2855 int count = 0;
2856 int i;
2857 const char **formats = NULL;
2859 QLIST_FOREACH(drv, &bdrv_drivers, list) {
2860 if (drv->format_name) {
2861 bool found = false;
2862 int i = count;
2863 while (formats && i && !found) {
2864 found = !strcmp(formats[--i], drv->format_name);
2867 if (!found) {
2868 formats = g_renew(const char *, formats, count + 1);
2869 formats[count++] = drv->format_name;
2874 qsort(formats, count, sizeof(formats[0]), qsort_strcmp);
2876 for (i = 0; i < count; i++) {
2877 it(opaque, formats[i]);
2880 g_free(formats);
2883 /* This function is to find a node in the bs graph */
2884 BlockDriverState *bdrv_find_node(const char *node_name)
2886 BlockDriverState *bs;
2888 assert(node_name);
2890 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
2891 if (!strcmp(node_name, bs->node_name)) {
2892 return bs;
2895 return NULL;
2898 /* Put this QMP function here so it can access the static graph_bdrv_states. */
2899 BlockDeviceInfoList *bdrv_named_nodes_list(Error **errp)
2901 BlockDeviceInfoList *list, *entry;
2902 BlockDriverState *bs;
2904 list = NULL;
2905 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
2906 BlockDeviceInfo *info = bdrv_block_device_info(bs, errp);
2907 if (!info) {
2908 qapi_free_BlockDeviceInfoList(list);
2909 return NULL;
2911 entry = g_malloc0(sizeof(*entry));
2912 entry->value = info;
2913 entry->next = list;
2914 list = entry;
2917 return list;
2920 BlockDriverState *bdrv_lookup_bs(const char *device,
2921 const char *node_name,
2922 Error **errp)
2924 BlockBackend *blk;
2925 BlockDriverState *bs;
2927 if (device) {
2928 blk = blk_by_name(device);
2930 if (blk) {
2931 bs = blk_bs(blk);
2932 if (!bs) {
2933 error_setg(errp, "Device '%s' has no medium", device);
2936 return bs;
2940 if (node_name) {
2941 bs = bdrv_find_node(node_name);
2943 if (bs) {
2944 return bs;
2948 error_setg(errp, "Cannot find device=%s nor node_name=%s",
2949 device ? device : "",
2950 node_name ? node_name : "");
2951 return NULL;
2954 /* If 'base' is in the same chain as 'top', return true. Otherwise,
2955 * return false. If either argument is NULL, return false. */
2956 bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
2958 while (top && top != base) {
2959 top = backing_bs(top);
2962 return top != NULL;
2965 BlockDriverState *bdrv_next_node(BlockDriverState *bs)
2967 if (!bs) {
2968 return QTAILQ_FIRST(&graph_bdrv_states);
2970 return QTAILQ_NEXT(bs, node_list);
2973 BlockDriverState *bdrv_next(BlockDriverState *bs)
2975 if (!bs) {
2976 return QTAILQ_FIRST(&bdrv_states);
2978 return QTAILQ_NEXT(bs, device_list);
2981 const char *bdrv_get_node_name(const BlockDriverState *bs)
2983 return bs->node_name;
2986 /* TODO check what callers really want: bs->node_name or blk_name() */
2987 const char *bdrv_get_device_name(const BlockDriverState *bs)
2989 return bs->blk ? blk_name(bs->blk) : "";
2992 /* This can be used to identify nodes that might not have a device
2993 * name associated. Since node and device names live in the same
2994 * namespace, the result is unambiguous. The exception is if both are
2995 * absent, then this returns an empty (non-null) string. */
2996 const char *bdrv_get_device_or_node_name(const BlockDriverState *bs)
2998 return bs->blk ? blk_name(bs->blk) : bs->node_name;
3001 int bdrv_get_flags(BlockDriverState *bs)
3003 return bs->open_flags;
3006 int bdrv_has_zero_init_1(BlockDriverState *bs)
3008 return 1;
3011 int bdrv_has_zero_init(BlockDriverState *bs)
3013 assert(bs->drv);
3015 /* If BS is a copy on write image, it is initialized to
3016 the contents of the base image, which may not be zeroes. */
3017 if (bs->backing) {
3018 return 0;
3020 if (bs->drv->bdrv_has_zero_init) {
3021 return bs->drv->bdrv_has_zero_init(bs);
3024 /* safe default */
3025 return 0;
3028 bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs)
3030 BlockDriverInfo bdi;
3032 if (bs->backing) {
3033 return false;
3036 if (bdrv_get_info(bs, &bdi) == 0) {
3037 return bdi.unallocated_blocks_are_zero;
3040 return false;
3043 bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
3045 BlockDriverInfo bdi;
3047 if (bs->backing || !(bs->open_flags & BDRV_O_UNMAP)) {
3048 return false;
3051 if (bdrv_get_info(bs, &bdi) == 0) {
3052 return bdi.can_write_zeroes_with_unmap;
3055 return false;
3058 const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
3060 if (bs->backing && bs->backing->bs->encrypted)
3061 return bs->backing_file;
3062 else if (bs->encrypted)
3063 return bs->filename;
3064 else
3065 return NULL;
3068 void bdrv_get_backing_filename(BlockDriverState *bs,
3069 char *filename, int filename_size)
3071 pstrcpy(filename, filename_size, bs->backing_file);
3074 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
3076 BlockDriver *drv = bs->drv;
3077 if (!drv)
3078 return -ENOMEDIUM;
3079 if (!drv->bdrv_get_info)
3080 return -ENOTSUP;
3081 memset(bdi, 0, sizeof(*bdi));
3082 return drv->bdrv_get_info(bs, bdi);
3085 ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs)
3087 BlockDriver *drv = bs->drv;
3088 if (drv && drv->bdrv_get_specific_info) {
3089 return drv->bdrv_get_specific_info(bs);
3091 return NULL;
3094 void bdrv_debug_event(BlockDriverState *bs, BlkdebugEvent event)
3096 if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
3097 return;
3100 bs->drv->bdrv_debug_event(bs, event);
3103 int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
3104 const char *tag)
3106 while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
3107 bs = bs->file ? bs->file->bs : NULL;
3110 if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
3111 return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
3114 return -ENOTSUP;
3117 int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
3119 while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) {
3120 bs = bs->file ? bs->file->bs : NULL;
3123 if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) {
3124 return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
3127 return -ENOTSUP;
3130 int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
3132 while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) {
3133 bs = bs->file ? bs->file->bs : NULL;
3136 if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
3137 return bs->drv->bdrv_debug_resume(bs, tag);
3140 return -ENOTSUP;
3143 bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
3145 while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
3146 bs = bs->file ? bs->file->bs : NULL;
3149 if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
3150 return bs->drv->bdrv_debug_is_suspended(bs, tag);
3153 return false;
3156 int bdrv_is_snapshot(BlockDriverState *bs)
3158 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
3161 /* backing_file can either be relative, or absolute, or a protocol. If it is
3162 * relative, it must be relative to the chain. So, passing in bs->filename
3163 * from a BDS as backing_file should not be done, as that may be relative to
3164 * the CWD rather than the chain. */
3165 BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
3166 const char *backing_file)
3168 char *filename_full = NULL;
3169 char *backing_file_full = NULL;
3170 char *filename_tmp = NULL;
3171 int is_protocol = 0;
3172 BlockDriverState *curr_bs = NULL;
3173 BlockDriverState *retval = NULL;
3175 if (!bs || !bs->drv || !backing_file) {
3176 return NULL;
3179 filename_full = g_malloc(PATH_MAX);
3180 backing_file_full = g_malloc(PATH_MAX);
3181 filename_tmp = g_malloc(PATH_MAX);
3183 is_protocol = path_has_protocol(backing_file);
3185 for (curr_bs = bs; curr_bs->backing; curr_bs = curr_bs->backing->bs) {
3187 /* If either of the filename paths is actually a protocol, then
3188 * compare unmodified paths; otherwise make paths relative */
3189 if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
3190 if (strcmp(backing_file, curr_bs->backing_file) == 0) {
3191 retval = curr_bs->backing->bs;
3192 break;
3194 } else {
3195 /* If not an absolute filename path, make it relative to the current
3196 * image's filename path */
3197 path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
3198 backing_file);
3200 /* We are going to compare absolute pathnames */
3201 if (!realpath(filename_tmp, filename_full)) {
3202 continue;
3205 /* We need to make sure the backing filename we are comparing against
3206 * is relative to the current image filename (or absolute) */
3207 path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
3208 curr_bs->backing_file);
3210 if (!realpath(filename_tmp, backing_file_full)) {
3211 continue;
3214 if (strcmp(backing_file_full, filename_full) == 0) {
3215 retval = curr_bs->backing->bs;
3216 break;
3221 g_free(filename_full);
3222 g_free(backing_file_full);
3223 g_free(filename_tmp);
3224 return retval;
3227 int bdrv_get_backing_file_depth(BlockDriverState *bs)
3229 if (!bs->drv) {
3230 return 0;
3233 if (!bs->backing) {
3234 return 0;
3237 return 1 + bdrv_get_backing_file_depth(bs->backing->bs);
3240 void bdrv_init(void)
3242 module_call_init(MODULE_INIT_BLOCK);
3245 void bdrv_init_with_whitelist(void)
3247 use_bdrv_whitelist = 1;
3248 bdrv_init();
3251 void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
3253 Error *local_err = NULL;
3254 int ret;
3256 if (!bs->drv) {
3257 return;
3260 if (!(bs->open_flags & BDRV_O_INACTIVE)) {
3261 return;
3263 bs->open_flags &= ~BDRV_O_INACTIVE;
3265 if (bs->drv->bdrv_invalidate_cache) {
3266 bs->drv->bdrv_invalidate_cache(bs, &local_err);
3267 } else if (bs->file) {
3268 bdrv_invalidate_cache(bs->file->bs, &local_err);
3270 if (local_err) {
3271 bs->open_flags |= BDRV_O_INACTIVE;
3272 error_propagate(errp, local_err);
3273 return;
3276 ret = refresh_total_sectors(bs, bs->total_sectors);
3277 if (ret < 0) {
3278 bs->open_flags |= BDRV_O_INACTIVE;
3279 error_setg_errno(errp, -ret, "Could not refresh total sector count");
3280 return;
3284 void bdrv_invalidate_cache_all(Error **errp)
3286 BlockDriverState *bs;
3287 Error *local_err = NULL;
3289 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
3290 AioContext *aio_context = bdrv_get_aio_context(bs);
3292 aio_context_acquire(aio_context);
3293 bdrv_invalidate_cache(bs, &local_err);
3294 aio_context_release(aio_context);
3295 if (local_err) {
3296 error_propagate(errp, local_err);
3297 return;
3302 static int bdrv_inactivate(BlockDriverState *bs)
3304 int ret;
3306 if (bs->drv->bdrv_inactivate) {
3307 ret = bs->drv->bdrv_inactivate(bs);
3308 if (ret < 0) {
3309 return ret;
3313 bs->open_flags |= BDRV_O_INACTIVE;
3314 return 0;
3317 int bdrv_inactivate_all(void)
3319 BlockDriverState *bs;
3320 int ret;
3322 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
3323 AioContext *aio_context = bdrv_get_aio_context(bs);
3325 aio_context_acquire(aio_context);
3326 ret = bdrv_inactivate(bs);
3327 aio_context_release(aio_context);
3328 if (ret < 0) {
3329 return ret;
3333 return 0;
3336 /**************************************************************/
3337 /* removable device support */
3340 * Return TRUE if the media is present
3342 bool bdrv_is_inserted(BlockDriverState *bs)
3344 BlockDriver *drv = bs->drv;
3345 BdrvChild *child;
3347 if (!drv) {
3348 return false;
3350 if (drv->bdrv_is_inserted) {
3351 return drv->bdrv_is_inserted(bs);
3353 QLIST_FOREACH(child, &bs->children, next) {
3354 if (!bdrv_is_inserted(child->bs)) {
3355 return false;
3358 return true;
3362 * Return whether the media changed since the last call to this
3363 * function, or -ENOTSUP if we don't know. Most drivers don't know.
3365 int bdrv_media_changed(BlockDriverState *bs)
3367 BlockDriver *drv = bs->drv;
3369 if (drv && drv->bdrv_media_changed) {
3370 return drv->bdrv_media_changed(bs);
3372 return -ENOTSUP;
3376 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3378 void bdrv_eject(BlockDriverState *bs, bool eject_flag)
3380 BlockDriver *drv = bs->drv;
3381 const char *device_name;
3383 if (drv && drv->bdrv_eject) {
3384 drv->bdrv_eject(bs, eject_flag);
3387 device_name = bdrv_get_device_name(bs);
3388 if (device_name[0] != '\0') {
3389 qapi_event_send_device_tray_moved(device_name,
3390 eject_flag, &error_abort);
3395 * Lock or unlock the media (if it is locked, the user won't be able
3396 * to eject it manually).
3398 void bdrv_lock_medium(BlockDriverState *bs, bool locked)
3400 BlockDriver *drv = bs->drv;
3402 trace_bdrv_lock_medium(bs, locked);
3404 if (drv && drv->bdrv_lock_medium) {
3405 drv->bdrv_lock_medium(bs, locked);
3409 BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name)
3411 BdrvDirtyBitmap *bm;
3413 assert(name);
3414 QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
3415 if (bm->name && !strcmp(name, bm->name)) {
3416 return bm;
3419 return NULL;
3422 void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap)
3424 assert(!bdrv_dirty_bitmap_frozen(bitmap));
3425 g_free(bitmap->name);
3426 bitmap->name = NULL;
3429 BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
3430 uint32_t granularity,
3431 const char *name,
3432 Error **errp)
3434 int64_t bitmap_size;
3435 BdrvDirtyBitmap *bitmap;
3436 uint32_t sector_granularity;
3438 assert((granularity & (granularity - 1)) == 0);
3440 if (name && bdrv_find_dirty_bitmap(bs, name)) {
3441 error_setg(errp, "Bitmap already exists: %s", name);
3442 return NULL;
3444 sector_granularity = granularity >> BDRV_SECTOR_BITS;
3445 assert(sector_granularity);
3446 bitmap_size = bdrv_nb_sectors(bs);
3447 if (bitmap_size < 0) {
3448 error_setg_errno(errp, -bitmap_size, "could not get length of device");
3449 errno = -bitmap_size;
3450 return NULL;
3452 bitmap = g_new0(BdrvDirtyBitmap, 1);
3453 bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(sector_granularity));
3454 bitmap->size = bitmap_size;
3455 bitmap->name = g_strdup(name);
3456 bitmap->disabled = false;
3457 QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
3458 return bitmap;
3461 bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap)
3463 return bitmap->successor;
3466 bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap)
3468 return !(bitmap->disabled || bitmap->successor);
3471 DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap)
3473 if (bdrv_dirty_bitmap_frozen(bitmap)) {
3474 return DIRTY_BITMAP_STATUS_FROZEN;
3475 } else if (!bdrv_dirty_bitmap_enabled(bitmap)) {
3476 return DIRTY_BITMAP_STATUS_DISABLED;
3477 } else {
3478 return DIRTY_BITMAP_STATUS_ACTIVE;
3483 * Create a successor bitmap destined to replace this bitmap after an operation.
3484 * Requires that the bitmap is not frozen and has no successor.
3486 int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs,
3487 BdrvDirtyBitmap *bitmap, Error **errp)
3489 uint64_t granularity;
3490 BdrvDirtyBitmap *child;
3492 if (bdrv_dirty_bitmap_frozen(bitmap)) {
3493 error_setg(errp, "Cannot create a successor for a bitmap that is "
3494 "currently frozen");
3495 return -1;
3497 assert(!bitmap->successor);
3499 /* Create an anonymous successor */
3500 granularity = bdrv_dirty_bitmap_granularity(bitmap);
3501 child = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp);
3502 if (!child) {
3503 return -1;
3506 /* Successor will be on or off based on our current state. */
3507 child->disabled = bitmap->disabled;
3509 /* Install the successor and freeze the parent */
3510 bitmap->successor = child;
3511 return 0;
3515 * For a bitmap with a successor, yield our name to the successor,
3516 * delete the old bitmap, and return a handle to the new bitmap.
3518 BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
3519 BdrvDirtyBitmap *bitmap,
3520 Error **errp)
3522 char *name;
3523 BdrvDirtyBitmap *successor = bitmap->successor;
3525 if (successor == NULL) {
3526 error_setg(errp, "Cannot relinquish control if "
3527 "there's no successor present");
3528 return NULL;
3531 name = bitmap->name;
3532 bitmap->name = NULL;
3533 successor->name = name;
3534 bitmap->successor = NULL;
3535 bdrv_release_dirty_bitmap(bs, bitmap);
3537 return successor;
3541 * In cases of failure where we can no longer safely delete the parent,
3542 * we may wish to re-join the parent and child/successor.
3543 * The merged parent will be un-frozen, but not explicitly re-enabled.
3545 BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
3546 BdrvDirtyBitmap *parent,
3547 Error **errp)
3549 BdrvDirtyBitmap *successor = parent->successor;
3551 if (!successor) {
3552 error_setg(errp, "Cannot reclaim a successor when none is present");
3553 return NULL;
3556 if (!hbitmap_merge(parent->bitmap, successor->bitmap)) {
3557 error_setg(errp, "Merging of parent and successor bitmap failed");
3558 return NULL;
3560 bdrv_release_dirty_bitmap(bs, successor);
3561 parent->successor = NULL;
3563 return parent;
3567 * Truncates _all_ bitmaps attached to a BDS.
3569 static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs)
3571 BdrvDirtyBitmap *bitmap;
3572 uint64_t size = bdrv_nb_sectors(bs);
3574 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
3575 assert(!bdrv_dirty_bitmap_frozen(bitmap));
3576 hbitmap_truncate(bitmap->bitmap, size);
3577 bitmap->size = size;
3581 static void bdrv_do_release_matching_dirty_bitmap(BlockDriverState *bs,
3582 BdrvDirtyBitmap *bitmap,
3583 bool only_named)
3585 BdrvDirtyBitmap *bm, *next;
3586 QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
3587 if ((!bitmap || bm == bitmap) && (!only_named || bm->name)) {
3588 assert(!bdrv_dirty_bitmap_frozen(bm));
3589 QLIST_REMOVE(bm, list);
3590 hbitmap_free(bm->bitmap);
3591 g_free(bm->name);
3592 g_free(bm);
3594 if (bitmap) {
3595 return;
3601 void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
3603 bdrv_do_release_matching_dirty_bitmap(bs, bitmap, false);
3607 * Release all named dirty bitmaps attached to a BDS (for use in bdrv_close()).
3608 * There must not be any frozen bitmaps attached.
3610 static void bdrv_release_named_dirty_bitmaps(BlockDriverState *bs)
3612 bdrv_do_release_matching_dirty_bitmap(bs, NULL, true);
3615 void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
3617 assert(!bdrv_dirty_bitmap_frozen(bitmap));
3618 bitmap->disabled = true;
3621 void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
3623 assert(!bdrv_dirty_bitmap_frozen(bitmap));
3624 bitmap->disabled = false;
3627 BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
3629 BdrvDirtyBitmap *bm;
3630 BlockDirtyInfoList *list = NULL;
3631 BlockDirtyInfoList **plist = &list;
3633 QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
3634 BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1);
3635 BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1);
3636 info->count = bdrv_get_dirty_count(bm);
3637 info->granularity = bdrv_dirty_bitmap_granularity(bm);
3638 info->has_name = !!bm->name;
3639 info->name = g_strdup(bm->name);
3640 info->status = bdrv_dirty_bitmap_status(bm);
3641 entry->value = info;
3642 *plist = entry;
3643 plist = &entry->next;
3646 return list;
3649 int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector)
3651 if (bitmap) {
3652 return hbitmap_get(bitmap->bitmap, sector);
3653 } else {
3654 return 0;
3659 * Chooses a default granularity based on the existing cluster size,
3660 * but clamped between [4K, 64K]. Defaults to 64K in the case that there
3661 * is no cluster size information available.
3663 uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs)
3665 BlockDriverInfo bdi;
3666 uint32_t granularity;
3668 if (bdrv_get_info(bs, &bdi) >= 0 && bdi.cluster_size > 0) {
3669 granularity = MAX(4096, bdi.cluster_size);
3670 granularity = MIN(65536, granularity);
3671 } else {
3672 granularity = 65536;
3675 return granularity;
3678 uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap)
3680 return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap);
3683 void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
3685 hbitmap_iter_init(hbi, bitmap->bitmap, 0);
3688 void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
3689 int64_t cur_sector, int nr_sectors)
3691 assert(bdrv_dirty_bitmap_enabled(bitmap));
3692 hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
3695 void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
3696 int64_t cur_sector, int nr_sectors)
3698 assert(bdrv_dirty_bitmap_enabled(bitmap));
3699 hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
3702 void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out)
3704 assert(bdrv_dirty_bitmap_enabled(bitmap));
3705 if (!out) {
3706 hbitmap_reset_all(bitmap->bitmap);
3707 } else {
3708 HBitmap *backup = bitmap->bitmap;
3709 bitmap->bitmap = hbitmap_alloc(bitmap->size,
3710 hbitmap_granularity(backup));
3711 *out = backup;
3715 void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in)
3717 HBitmap *tmp = bitmap->bitmap;
3718 assert(bdrv_dirty_bitmap_enabled(bitmap));
3719 bitmap->bitmap = in;
3720 hbitmap_free(tmp);
3723 void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
3724 int nr_sectors)
3726 BdrvDirtyBitmap *bitmap;
3727 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
3728 if (!bdrv_dirty_bitmap_enabled(bitmap)) {
3729 continue;
3731 hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
3736 * Advance an HBitmapIter to an arbitrary offset.
3738 void bdrv_set_dirty_iter(HBitmapIter *hbi, int64_t offset)
3740 assert(hbi->hb);
3741 hbitmap_iter_init(hbi, hbi->hb, offset);
3744 int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap)
3746 return hbitmap_count(bitmap->bitmap);
3749 /* Get a reference to bs */
3750 void bdrv_ref(BlockDriverState *bs)
3752 bs->refcnt++;
3755 /* Release a previously grabbed reference to bs.
3756 * If after releasing, reference count is zero, the BlockDriverState is
3757 * deleted. */
3758 void bdrv_unref(BlockDriverState *bs)
3760 if (!bs) {
3761 return;
3763 assert(bs->refcnt > 0);
3764 if (--bs->refcnt == 0) {
3765 bdrv_delete(bs);
3769 struct BdrvOpBlocker {
3770 Error *reason;
3771 QLIST_ENTRY(BdrvOpBlocker) list;
3774 bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
3776 BdrvOpBlocker *blocker;
3777 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3778 if (!QLIST_EMPTY(&bs->op_blockers[op])) {
3779 blocker = QLIST_FIRST(&bs->op_blockers[op]);
3780 if (errp) {
3781 *errp = error_copy(blocker->reason);
3782 error_prepend(errp, "Node '%s' is busy: ",
3783 bdrv_get_device_or_node_name(bs));
3785 return true;
3787 return false;
3790 void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
3792 BdrvOpBlocker *blocker;
3793 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3795 blocker = g_new0(BdrvOpBlocker, 1);
3796 blocker->reason = reason;
3797 QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list);
3800 void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
3802 BdrvOpBlocker *blocker, *next;
3803 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3804 QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
3805 if (blocker->reason == reason) {
3806 QLIST_REMOVE(blocker, list);
3807 g_free(blocker);
3812 void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
3814 int i;
3815 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3816 bdrv_op_block(bs, i, reason);
3820 void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason)
3822 int i;
3823 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3824 bdrv_op_unblock(bs, i, reason);
3828 bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
3830 int i;
3832 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3833 if (!QLIST_EMPTY(&bs->op_blockers[i])) {
3834 return false;
3837 return true;
3840 void bdrv_img_create(const char *filename, const char *fmt,
3841 const char *base_filename, const char *base_fmt,
3842 char *options, uint64_t img_size, int flags,
3843 Error **errp, bool quiet)
3845 QemuOptsList *create_opts = NULL;
3846 QemuOpts *opts = NULL;
3847 const char *backing_fmt, *backing_file;
3848 int64_t size;
3849 BlockDriver *drv, *proto_drv;
3850 Error *local_err = NULL;
3851 int ret = 0;
3853 /* Find driver and parse its options */
3854 drv = bdrv_find_format(fmt);
3855 if (!drv) {
3856 error_setg(errp, "Unknown file format '%s'", fmt);
3857 return;
3860 proto_drv = bdrv_find_protocol(filename, true, errp);
3861 if (!proto_drv) {
3862 return;
3865 if (!drv->create_opts) {
3866 error_setg(errp, "Format driver '%s' does not support image creation",
3867 drv->format_name);
3868 return;
3871 if (!proto_drv->create_opts) {
3872 error_setg(errp, "Protocol driver '%s' does not support image creation",
3873 proto_drv->format_name);
3874 return;
3877 create_opts = qemu_opts_append(create_opts, drv->create_opts);
3878 create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
3880 /* Create parameter list with default values */
3881 opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
3882 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort);
3884 /* Parse -o options */
3885 if (options) {
3886 qemu_opts_do_parse(opts, options, NULL, &local_err);
3887 if (local_err) {
3888 error_report_err(local_err);
3889 local_err = NULL;
3890 error_setg(errp, "Invalid options for file format '%s'", fmt);
3891 goto out;
3895 if (base_filename) {
3896 qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, &local_err);
3897 if (local_err) {
3898 error_setg(errp, "Backing file not supported for file format '%s'",
3899 fmt);
3900 goto out;
3904 if (base_fmt) {
3905 qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, &local_err);
3906 if (local_err) {
3907 error_setg(errp, "Backing file format not supported for file "
3908 "format '%s'", fmt);
3909 goto out;
3913 backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
3914 if (backing_file) {
3915 if (!strcmp(filename, backing_file)) {
3916 error_setg(errp, "Error: Trying to create an image with the "
3917 "same filename as the backing file");
3918 goto out;
3922 backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
3924 // The size for the image must always be specified, with one exception:
3925 // If we are using a backing file, we can obtain the size from there
3926 size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
3927 if (size == -1) {
3928 if (backing_file) {
3929 BlockDriverState *bs;
3930 char *full_backing = g_new0(char, PATH_MAX);
3931 int64_t size;
3932 int back_flags;
3933 QDict *backing_options = NULL;
3935 bdrv_get_full_backing_filename_from_filename(filename, backing_file,
3936 full_backing, PATH_MAX,
3937 &local_err);
3938 if (local_err) {
3939 g_free(full_backing);
3940 goto out;
3943 /* backing files always opened read-only */
3944 back_flags =
3945 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
3947 if (backing_fmt) {
3948 backing_options = qdict_new();
3949 qdict_put(backing_options, "driver",
3950 qstring_from_str(backing_fmt));
3953 bs = NULL;
3954 ret = bdrv_open(&bs, full_backing, NULL, backing_options,
3955 back_flags, &local_err);
3956 g_free(full_backing);
3957 if (ret < 0) {
3958 goto out;
3960 size = bdrv_getlength(bs);
3961 if (size < 0) {
3962 error_setg_errno(errp, -size, "Could not get size of '%s'",
3963 backing_file);
3964 bdrv_unref(bs);
3965 goto out;
3968 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort);
3970 bdrv_unref(bs);
3971 } else {
3972 error_setg(errp, "Image creation needs a size parameter");
3973 goto out;
3977 if (!quiet) {
3978 printf("Formatting '%s', fmt=%s ", filename, fmt);
3979 qemu_opts_print(opts, " ");
3980 puts("");
3983 ret = bdrv_create(drv, filename, opts, &local_err);
3985 if (ret == -EFBIG) {
3986 /* This is generally a better message than whatever the driver would
3987 * deliver (especially because of the cluster_size_hint), since that
3988 * is most probably not much different from "image too large". */
3989 const char *cluster_size_hint = "";
3990 if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) {
3991 cluster_size_hint = " (try using a larger cluster size)";
3993 error_setg(errp, "The image size is too large for file format '%s'"
3994 "%s", fmt, cluster_size_hint);
3995 error_free(local_err);
3996 local_err = NULL;
3999 out:
4000 qemu_opts_del(opts);
4001 qemu_opts_free(create_opts);
4002 if (local_err) {
4003 error_propagate(errp, local_err);
4007 AioContext *bdrv_get_aio_context(BlockDriverState *bs)
4009 return bs->aio_context;
4012 void bdrv_detach_aio_context(BlockDriverState *bs)
4014 BdrvAioNotifier *baf;
4016 if (!bs->drv) {
4017 return;
4020 QLIST_FOREACH(baf, &bs->aio_notifiers, list) {
4021 baf->detach_aio_context(baf->opaque);
4024 if (bs->throttle_state) {
4025 throttle_timers_detach_aio_context(&bs->throttle_timers);
4027 if (bs->drv->bdrv_detach_aio_context) {
4028 bs->drv->bdrv_detach_aio_context(bs);
4030 if (bs->file) {
4031 bdrv_detach_aio_context(bs->file->bs);
4033 if (bs->backing) {
4034 bdrv_detach_aio_context(bs->backing->bs);
4037 bs->aio_context = NULL;
4040 void bdrv_attach_aio_context(BlockDriverState *bs,
4041 AioContext *new_context)
4043 BdrvAioNotifier *ban;
4045 if (!bs->drv) {
4046 return;
4049 bs->aio_context = new_context;
4051 if (bs->backing) {
4052 bdrv_attach_aio_context(bs->backing->bs, new_context);
4054 if (bs->file) {
4055 bdrv_attach_aio_context(bs->file->bs, new_context);
4057 if (bs->drv->bdrv_attach_aio_context) {
4058 bs->drv->bdrv_attach_aio_context(bs, new_context);
4060 if (bs->throttle_state) {
4061 throttle_timers_attach_aio_context(&bs->throttle_timers, new_context);
4064 QLIST_FOREACH(ban, &bs->aio_notifiers, list) {
4065 ban->attached_aio_context(new_context, ban->opaque);
4069 void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
4071 bdrv_drain(bs); /* ensure there are no in-flight requests */
4073 bdrv_detach_aio_context(bs);
4075 /* This function executes in the old AioContext so acquire the new one in
4076 * case it runs in a different thread.
4078 aio_context_acquire(new_context);
4079 bdrv_attach_aio_context(bs, new_context);
4080 aio_context_release(new_context);
4083 void bdrv_add_aio_context_notifier(BlockDriverState *bs,
4084 void (*attached_aio_context)(AioContext *new_context, void *opaque),
4085 void (*detach_aio_context)(void *opaque), void *opaque)
4087 BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1);
4088 *ban = (BdrvAioNotifier){
4089 .attached_aio_context = attached_aio_context,
4090 .detach_aio_context = detach_aio_context,
4091 .opaque = opaque
4094 QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list);
4097 void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
4098 void (*attached_aio_context)(AioContext *,
4099 void *),
4100 void (*detach_aio_context)(void *),
4101 void *opaque)
4103 BdrvAioNotifier *ban, *ban_next;
4105 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
4106 if (ban->attached_aio_context == attached_aio_context &&
4107 ban->detach_aio_context == detach_aio_context &&
4108 ban->opaque == opaque)
4110 QLIST_REMOVE(ban, list);
4111 g_free(ban);
4113 return;
4117 abort();
4120 int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts,
4121 BlockDriverAmendStatusCB *status_cb, void *cb_opaque)
4123 if (!bs->drv->bdrv_amend_options) {
4124 return -ENOTSUP;
4126 return bs->drv->bdrv_amend_options(bs, opts, status_cb, cb_opaque);
4129 /* This function will be called by the bdrv_recurse_is_first_non_filter method
4130 * of block filter and by bdrv_is_first_non_filter.
4131 * It is used to test if the given bs is the candidate or recurse more in the
4132 * node graph.
4134 bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
4135 BlockDriverState *candidate)
4137 /* return false if basic checks fails */
4138 if (!bs || !bs->drv) {
4139 return false;
4142 /* the code reached a non block filter driver -> check if the bs is
4143 * the same as the candidate. It's the recursion termination condition.
4145 if (!bs->drv->is_filter) {
4146 return bs == candidate;
4148 /* Down this path the driver is a block filter driver */
4150 /* If the block filter recursion method is defined use it to recurse down
4151 * the node graph.
4153 if (bs->drv->bdrv_recurse_is_first_non_filter) {
4154 return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate);
4157 /* the driver is a block filter but don't allow to recurse -> return false
4159 return false;
4162 /* This function checks if the candidate is the first non filter bs down it's
4163 * bs chain. Since we don't have pointers to parents it explore all bs chains
4164 * from the top. Some filters can choose not to pass down the recursion.
4166 bool bdrv_is_first_non_filter(BlockDriverState *candidate)
4168 BlockDriverState *bs;
4170 /* walk down the bs forest recursively */
4171 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
4172 bool perm;
4174 /* try to recurse in this top level bs */
4175 perm = bdrv_recurse_is_first_non_filter(bs, candidate);
4177 /* candidate is the first non filter */
4178 if (perm) {
4179 return true;
4183 return false;
4186 BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs,
4187 const char *node_name, Error **errp)
4189 BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
4190 AioContext *aio_context;
4192 if (!to_replace_bs) {
4193 error_setg(errp, "Node name '%s' not found", node_name);
4194 return NULL;
4197 aio_context = bdrv_get_aio_context(to_replace_bs);
4198 aio_context_acquire(aio_context);
4200 if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
4201 to_replace_bs = NULL;
4202 goto out;
4205 /* We don't want arbitrary node of the BDS chain to be replaced only the top
4206 * most non filter in order to prevent data corruption.
4207 * Another benefit is that this tests exclude backing files which are
4208 * blocked by the backing blockers.
4210 if (!bdrv_recurse_is_first_non_filter(parent_bs, to_replace_bs)) {
4211 error_setg(errp, "Only top most non filter can be replaced");
4212 to_replace_bs = NULL;
4213 goto out;
4216 out:
4217 aio_context_release(aio_context);
4218 return to_replace_bs;
4221 static bool append_open_options(QDict *d, BlockDriverState *bs)
4223 const QDictEntry *entry;
4224 QemuOptDesc *desc;
4225 BdrvChild *child;
4226 bool found_any = false;
4227 const char *p;
4229 for (entry = qdict_first(bs->options); entry;
4230 entry = qdict_next(bs->options, entry))
4232 /* Exclude options for children */
4233 QLIST_FOREACH(child, &bs->children, next) {
4234 if (strstart(qdict_entry_key(entry), child->name, &p)
4235 && (!*p || *p == '.'))
4237 break;
4240 if (child) {
4241 continue;
4244 /* And exclude all non-driver-specific options */
4245 for (desc = bdrv_runtime_opts.desc; desc->name; desc++) {
4246 if (!strcmp(qdict_entry_key(entry), desc->name)) {
4247 break;
4250 if (desc->name) {
4251 continue;
4254 qobject_incref(qdict_entry_value(entry));
4255 qdict_put_obj(d, qdict_entry_key(entry), qdict_entry_value(entry));
4256 found_any = true;
4259 return found_any;
4262 /* Updates the following BDS fields:
4263 * - exact_filename: A filename which may be used for opening a block device
4264 * which (mostly) equals the given BDS (even without any
4265 * other options; so reading and writing must return the same
4266 * results, but caching etc. may be different)
4267 * - full_open_options: Options which, when given when opening a block device
4268 * (without a filename), result in a BDS (mostly)
4269 * equalling the given one
4270 * - filename: If exact_filename is set, it is copied here. Otherwise,
4271 * full_open_options is converted to a JSON object, prefixed with
4272 * "json:" (for use through the JSON pseudo protocol) and put here.
4274 void bdrv_refresh_filename(BlockDriverState *bs)
4276 BlockDriver *drv = bs->drv;
4277 QDict *opts;
4279 if (!drv) {
4280 return;
4283 /* This BDS's file name will most probably depend on its file's name, so
4284 * refresh that first */
4285 if (bs->file) {
4286 bdrv_refresh_filename(bs->file->bs);
4289 if (drv->bdrv_refresh_filename) {
4290 /* Obsolete information is of no use here, so drop the old file name
4291 * information before refreshing it */
4292 bs->exact_filename[0] = '\0';
4293 if (bs->full_open_options) {
4294 QDECREF(bs->full_open_options);
4295 bs->full_open_options = NULL;
4298 opts = qdict_new();
4299 append_open_options(opts, bs);
4300 drv->bdrv_refresh_filename(bs, opts);
4301 QDECREF(opts);
4302 } else if (bs->file) {
4303 /* Try to reconstruct valid information from the underlying file */
4304 bool has_open_options;
4306 bs->exact_filename[0] = '\0';
4307 if (bs->full_open_options) {
4308 QDECREF(bs->full_open_options);
4309 bs->full_open_options = NULL;
4312 opts = qdict_new();
4313 has_open_options = append_open_options(opts, bs);
4315 /* If no specific options have been given for this BDS, the filename of
4316 * the underlying file should suffice for this one as well */
4317 if (bs->file->bs->exact_filename[0] && !has_open_options) {
4318 strcpy(bs->exact_filename, bs->file->bs->exact_filename);
4320 /* Reconstructing the full options QDict is simple for most format block
4321 * drivers, as long as the full options are known for the underlying
4322 * file BDS. The full options QDict of that file BDS should somehow
4323 * contain a representation of the filename, therefore the following
4324 * suffices without querying the (exact_)filename of this BDS. */
4325 if (bs->file->bs->full_open_options) {
4326 qdict_put_obj(opts, "driver",
4327 QOBJECT(qstring_from_str(drv->format_name)));
4328 QINCREF(bs->file->bs->full_open_options);
4329 qdict_put_obj(opts, "file",
4330 QOBJECT(bs->file->bs->full_open_options));
4332 bs->full_open_options = opts;
4333 } else {
4334 QDECREF(opts);
4336 } else if (!bs->full_open_options && qdict_size(bs->options)) {
4337 /* There is no underlying file BDS (at least referenced by BDS.file),
4338 * so the full options QDict should be equal to the options given
4339 * specifically for this block device when it was opened (plus the
4340 * driver specification).
4341 * Because those options don't change, there is no need to update
4342 * full_open_options when it's already set. */
4344 opts = qdict_new();
4345 append_open_options(opts, bs);
4346 qdict_put_obj(opts, "driver",
4347 QOBJECT(qstring_from_str(drv->format_name)));
4349 if (bs->exact_filename[0]) {
4350 /* This may not work for all block protocol drivers (some may
4351 * require this filename to be parsed), but we have to find some
4352 * default solution here, so just include it. If some block driver
4353 * does not support pure options without any filename at all or
4354 * needs some special format of the options QDict, it needs to
4355 * implement the driver-specific bdrv_refresh_filename() function.
4357 qdict_put_obj(opts, "filename",
4358 QOBJECT(qstring_from_str(bs->exact_filename)));
4361 bs->full_open_options = opts;
4364 if (bs->exact_filename[0]) {
4365 pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename);
4366 } else if (bs->full_open_options) {
4367 QString *json = qobject_to_json(QOBJECT(bs->full_open_options));
4368 snprintf(bs->filename, sizeof(bs->filename), "json:%s",
4369 qstring_get_str(json));
4370 QDECREF(json);