ide: fix device_reset to not ignore pending AIO
[qemu.git] / block.c
blobefc3c43f891111167bc527d4144535a947d2b9b4
1 /*
2 * QEMU System Emulator block driver
4 * Copyright (c) 2003 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
24 #include "qemu/osdep.h"
25 #include "qemu-common.h"
26 #include "trace.h"
27 #include "block/block_int.h"
28 #include "block/blockjob.h"
29 #include "qemu/error-report.h"
30 #include "qemu/module.h"
31 #include "qapi/qmp/qerror.h"
32 #include "qapi/qmp/qbool.h"
33 #include "qapi/qmp/qjson.h"
34 #include "sysemu/block-backend.h"
35 #include "sysemu/sysemu.h"
36 #include "qemu/notify.h"
37 #include "qemu/coroutine.h"
38 #include "block/qapi.h"
39 #include "qmp-commands.h"
40 #include "qemu/timer.h"
41 #include "qapi-event.h"
42 #include "block/throttle-groups.h"
44 #ifdef CONFIG_BSD
45 #include <sys/ioctl.h>
46 #include <sys/queue.h>
47 #ifndef __DragonFly__
48 #include <sys/disk.h>
49 #endif
50 #endif
52 #ifdef _WIN32
53 #include <windows.h>
54 #endif
56 /**
57 * A BdrvDirtyBitmap can be in three possible states:
58 * (1) successor is NULL and disabled is false: full r/w mode
59 * (2) successor is NULL and disabled is true: read only mode ("disabled")
60 * (3) successor is set: frozen mode.
61 * A frozen bitmap cannot be renamed, deleted, anonymized, cleared, set,
62 * or enabled. A frozen bitmap can only abdicate() or reclaim().
64 struct BdrvDirtyBitmap {
65 HBitmap *bitmap; /* Dirty sector bitmap implementation */
66 BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */
67 char *name; /* Optional non-empty unique ID */
68 int64_t size; /* Size of the bitmap (Number of sectors) */
69 bool disabled; /* Bitmap is read-only */
70 QLIST_ENTRY(BdrvDirtyBitmap) list;
73 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
75 struct BdrvStates bdrv_states = QTAILQ_HEAD_INITIALIZER(bdrv_states);
77 static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
78 QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
80 static QTAILQ_HEAD(, BlockDriverState) all_bdrv_states =
81 QTAILQ_HEAD_INITIALIZER(all_bdrv_states);
83 static QLIST_HEAD(, BlockDriver) bdrv_drivers =
84 QLIST_HEAD_INITIALIZER(bdrv_drivers);
86 static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
87 const char *reference, QDict *options, int flags,
88 BlockDriverState *parent,
89 const BdrvChildRole *child_role, Error **errp);
91 static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs);
92 static void bdrv_release_named_dirty_bitmaps(BlockDriverState *bs);
94 /* If non-zero, use only whitelisted block drivers */
95 static int use_bdrv_whitelist;
97 static void bdrv_close(BlockDriverState *bs);
99 #ifdef _WIN32
100 static int is_windows_drive_prefix(const char *filename)
102 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
103 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
104 filename[1] == ':');
107 int is_windows_drive(const char *filename)
109 if (is_windows_drive_prefix(filename) &&
110 filename[2] == '\0')
111 return 1;
112 if (strstart(filename, "\\\\.\\", NULL) ||
113 strstart(filename, "//./", NULL))
114 return 1;
115 return 0;
117 #endif
119 size_t bdrv_opt_mem_align(BlockDriverState *bs)
121 if (!bs || !bs->drv) {
122 /* page size or 4k (hdd sector size) should be on the safe side */
123 return MAX(4096, getpagesize());
126 return bs->bl.opt_mem_alignment;
129 size_t bdrv_min_mem_align(BlockDriverState *bs)
131 if (!bs || !bs->drv) {
132 /* page size or 4k (hdd sector size) should be on the safe side */
133 return MAX(4096, getpagesize());
136 return bs->bl.min_mem_alignment;
139 /* check if the path starts with "<protocol>:" */
140 int path_has_protocol(const char *path)
142 const char *p;
144 #ifdef _WIN32
145 if (is_windows_drive(path) ||
146 is_windows_drive_prefix(path)) {
147 return 0;
149 p = path + strcspn(path, ":/\\");
150 #else
151 p = path + strcspn(path, ":/");
152 #endif
154 return *p == ':';
157 int path_is_absolute(const char *path)
159 #ifdef _WIN32
160 /* specific case for names like: "\\.\d:" */
161 if (is_windows_drive(path) || is_windows_drive_prefix(path)) {
162 return 1;
164 return (*path == '/' || *path == '\\');
165 #else
166 return (*path == '/');
167 #endif
170 /* if filename is absolute, just copy it to dest. Otherwise, build a
171 path to it by considering it is relative to base_path. URL are
172 supported. */
173 void path_combine(char *dest, int dest_size,
174 const char *base_path,
175 const char *filename)
177 const char *p, *p1;
178 int len;
180 if (dest_size <= 0)
181 return;
182 if (path_is_absolute(filename)) {
183 pstrcpy(dest, dest_size, filename);
184 } else {
185 p = strchr(base_path, ':');
186 if (p)
187 p++;
188 else
189 p = base_path;
190 p1 = strrchr(base_path, '/');
191 #ifdef _WIN32
193 const char *p2;
194 p2 = strrchr(base_path, '\\');
195 if (!p1 || p2 > p1)
196 p1 = p2;
198 #endif
199 if (p1)
200 p1++;
201 else
202 p1 = base_path;
203 if (p1 > p)
204 p = p1;
205 len = p - base_path;
206 if (len > dest_size - 1)
207 len = dest_size - 1;
208 memcpy(dest, base_path, len);
209 dest[len] = '\0';
210 pstrcat(dest, dest_size, filename);
214 void bdrv_get_full_backing_filename_from_filename(const char *backed,
215 const char *backing,
216 char *dest, size_t sz,
217 Error **errp)
219 if (backing[0] == '\0' || path_has_protocol(backing) ||
220 path_is_absolute(backing))
222 pstrcpy(dest, sz, backing);
223 } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) {
224 error_setg(errp, "Cannot use relative backing file names for '%s'",
225 backed);
226 } else {
227 path_combine(dest, sz, backed, backing);
231 void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz,
232 Error **errp)
234 char *backed = bs->exact_filename[0] ? bs->exact_filename : bs->filename;
236 bdrv_get_full_backing_filename_from_filename(backed, bs->backing_file,
237 dest, sz, errp);
240 void bdrv_register(BlockDriver *bdrv)
242 bdrv_setup_io_funcs(bdrv);
244 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
247 BlockDriverState *bdrv_new_root(void)
249 BlockDriverState *bs = bdrv_new();
251 QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list);
252 return bs;
255 BlockDriverState *bdrv_new(void)
257 BlockDriverState *bs;
258 int i;
260 bs = g_new0(BlockDriverState, 1);
261 QLIST_INIT(&bs->dirty_bitmaps);
262 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
263 QLIST_INIT(&bs->op_blockers[i]);
265 notifier_with_return_list_init(&bs->before_write_notifiers);
266 qemu_co_queue_init(&bs->throttled_reqs[0]);
267 qemu_co_queue_init(&bs->throttled_reqs[1]);
268 bs->refcnt = 1;
269 bs->aio_context = qemu_get_aio_context();
271 QTAILQ_INSERT_TAIL(&all_bdrv_states, bs, bs_list);
273 return bs;
276 BlockDriver *bdrv_find_format(const char *format_name)
278 BlockDriver *drv1;
279 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
280 if (!strcmp(drv1->format_name, format_name)) {
281 return drv1;
284 return NULL;
287 static int bdrv_is_whitelisted(BlockDriver *drv, bool read_only)
289 static const char *whitelist_rw[] = {
290 CONFIG_BDRV_RW_WHITELIST
292 static const char *whitelist_ro[] = {
293 CONFIG_BDRV_RO_WHITELIST
295 const char **p;
297 if (!whitelist_rw[0] && !whitelist_ro[0]) {
298 return 1; /* no whitelist, anything goes */
301 for (p = whitelist_rw; *p; p++) {
302 if (!strcmp(drv->format_name, *p)) {
303 return 1;
306 if (read_only) {
307 for (p = whitelist_ro; *p; p++) {
308 if (!strcmp(drv->format_name, *p)) {
309 return 1;
313 return 0;
316 typedef struct CreateCo {
317 BlockDriver *drv;
318 char *filename;
319 QemuOpts *opts;
320 int ret;
321 Error *err;
322 } CreateCo;
324 static void coroutine_fn bdrv_create_co_entry(void *opaque)
326 Error *local_err = NULL;
327 int ret;
329 CreateCo *cco = opaque;
330 assert(cco->drv);
332 ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err);
333 if (local_err) {
334 error_propagate(&cco->err, local_err);
336 cco->ret = ret;
339 int bdrv_create(BlockDriver *drv, const char* filename,
340 QemuOpts *opts, Error **errp)
342 int ret;
344 Coroutine *co;
345 CreateCo cco = {
346 .drv = drv,
347 .filename = g_strdup(filename),
348 .opts = opts,
349 .ret = NOT_DONE,
350 .err = NULL,
353 if (!drv->bdrv_create) {
354 error_setg(errp, "Driver '%s' does not support image creation", drv->format_name);
355 ret = -ENOTSUP;
356 goto out;
359 if (qemu_in_coroutine()) {
360 /* Fast-path if already in coroutine context */
361 bdrv_create_co_entry(&cco);
362 } else {
363 co = qemu_coroutine_create(bdrv_create_co_entry);
364 qemu_coroutine_enter(co, &cco);
365 while (cco.ret == NOT_DONE) {
366 aio_poll(qemu_get_aio_context(), true);
370 ret = cco.ret;
371 if (ret < 0) {
372 if (cco.err) {
373 error_propagate(errp, cco.err);
374 } else {
375 error_setg_errno(errp, -ret, "Could not create image");
379 out:
380 g_free(cco.filename);
381 return ret;
384 int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp)
386 BlockDriver *drv;
387 Error *local_err = NULL;
388 int ret;
390 drv = bdrv_find_protocol(filename, true, errp);
391 if (drv == NULL) {
392 return -ENOENT;
395 ret = bdrv_create(drv, filename, opts, &local_err);
396 if (local_err) {
397 error_propagate(errp, local_err);
399 return ret;
403 * Try to get @bs's logical and physical block size.
404 * On success, store them in @bsz struct and return 0.
405 * On failure return -errno.
406 * @bs must not be empty.
408 int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
410 BlockDriver *drv = bs->drv;
412 if (drv && drv->bdrv_probe_blocksizes) {
413 return drv->bdrv_probe_blocksizes(bs, bsz);
416 return -ENOTSUP;
420 * Try to get @bs's geometry (cyls, heads, sectors).
421 * On success, store them in @geo struct and return 0.
422 * On failure return -errno.
423 * @bs must not be empty.
425 int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
427 BlockDriver *drv = bs->drv;
429 if (drv && drv->bdrv_probe_geometry) {
430 return drv->bdrv_probe_geometry(bs, geo);
433 return -ENOTSUP;
437 * Create a uniquely-named empty temporary file.
438 * Return 0 upon success, otherwise a negative errno value.
440 int get_tmp_filename(char *filename, int size)
442 #ifdef _WIN32
443 char temp_dir[MAX_PATH];
444 /* GetTempFileName requires that its output buffer (4th param)
445 have length MAX_PATH or greater. */
446 assert(size >= MAX_PATH);
447 return (GetTempPath(MAX_PATH, temp_dir)
448 && GetTempFileName(temp_dir, "qem", 0, filename)
449 ? 0 : -GetLastError());
450 #else
451 int fd;
452 const char *tmpdir;
453 tmpdir = getenv("TMPDIR");
454 if (!tmpdir) {
455 tmpdir = "/var/tmp";
457 if (snprintf(filename, size, "%s/vl.XXXXXX", tmpdir) >= size) {
458 return -EOVERFLOW;
460 fd = mkstemp(filename);
461 if (fd < 0) {
462 return -errno;
464 if (close(fd) != 0) {
465 unlink(filename);
466 return -errno;
468 return 0;
469 #endif
473 * Detect host devices. By convention, /dev/cdrom[N] is always
474 * recognized as a host CDROM.
476 static BlockDriver *find_hdev_driver(const char *filename)
478 int score_max = 0, score;
479 BlockDriver *drv = NULL, *d;
481 QLIST_FOREACH(d, &bdrv_drivers, list) {
482 if (d->bdrv_probe_device) {
483 score = d->bdrv_probe_device(filename);
484 if (score > score_max) {
485 score_max = score;
486 drv = d;
491 return drv;
494 BlockDriver *bdrv_find_protocol(const char *filename,
495 bool allow_protocol_prefix,
496 Error **errp)
498 BlockDriver *drv1;
499 char protocol[128];
500 int len;
501 const char *p;
503 /* TODO Drivers without bdrv_file_open must be specified explicitly */
506 * XXX(hch): we really should not let host device detection
507 * override an explicit protocol specification, but moving this
508 * later breaks access to device names with colons in them.
509 * Thanks to the brain-dead persistent naming schemes on udev-
510 * based Linux systems those actually are quite common.
512 drv1 = find_hdev_driver(filename);
513 if (drv1) {
514 return drv1;
517 if (!path_has_protocol(filename) || !allow_protocol_prefix) {
518 return &bdrv_file;
521 p = strchr(filename, ':');
522 assert(p != NULL);
523 len = p - filename;
524 if (len > sizeof(protocol) - 1)
525 len = sizeof(protocol) - 1;
526 memcpy(protocol, filename, len);
527 protocol[len] = '\0';
528 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
529 if (drv1->protocol_name &&
530 !strcmp(drv1->protocol_name, protocol)) {
531 return drv1;
535 error_setg(errp, "Unknown protocol '%s'", protocol);
536 return NULL;
540 * Guess image format by probing its contents.
541 * This is not a good idea when your image is raw (CVE-2008-2004), but
542 * we do it anyway for backward compatibility.
544 * @buf contains the image's first @buf_size bytes.
545 * @buf_size is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE,
546 * but can be smaller if the image file is smaller)
547 * @filename is its filename.
549 * For all block drivers, call the bdrv_probe() method to get its
550 * probing score.
551 * Return the first block driver with the highest probing score.
553 BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
554 const char *filename)
556 int score_max = 0, score;
557 BlockDriver *drv = NULL, *d;
559 QLIST_FOREACH(d, &bdrv_drivers, list) {
560 if (d->bdrv_probe) {
561 score = d->bdrv_probe(buf, buf_size, filename);
562 if (score > score_max) {
563 score_max = score;
564 drv = d;
569 return drv;
572 static int find_image_format(BlockDriverState *bs, const char *filename,
573 BlockDriver **pdrv, Error **errp)
575 BlockDriver *drv;
576 uint8_t buf[BLOCK_PROBE_BUF_SIZE];
577 int ret = 0;
579 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
580 if (bdrv_is_sg(bs) || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
581 *pdrv = &bdrv_raw;
582 return ret;
585 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
586 if (ret < 0) {
587 error_setg_errno(errp, -ret, "Could not read image for determining its "
588 "format");
589 *pdrv = NULL;
590 return ret;
593 drv = bdrv_probe_all(buf, ret, filename);
594 if (!drv) {
595 error_setg(errp, "Could not determine image format: No compatible "
596 "driver found");
597 ret = -ENOENT;
599 *pdrv = drv;
600 return ret;
604 * Set the current 'total_sectors' value
605 * Return 0 on success, -errno on error.
607 static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
609 BlockDriver *drv = bs->drv;
611 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
612 if (bdrv_is_sg(bs))
613 return 0;
615 /* query actual device if possible, otherwise just trust the hint */
616 if (drv->bdrv_getlength) {
617 int64_t length = drv->bdrv_getlength(bs);
618 if (length < 0) {
619 return length;
621 hint = DIV_ROUND_UP(length, BDRV_SECTOR_SIZE);
624 bs->total_sectors = hint;
625 return 0;
629 * Combines a QDict of new block driver @options with any missing options taken
630 * from @old_options, so that leaving out an option defaults to its old value.
632 static void bdrv_join_options(BlockDriverState *bs, QDict *options,
633 QDict *old_options)
635 if (bs->drv && bs->drv->bdrv_join_options) {
636 bs->drv->bdrv_join_options(options, old_options);
637 } else {
638 qdict_join(options, old_options, false);
643 * Set open flags for a given discard mode
645 * Return 0 on success, -1 if the discard mode was invalid.
647 int bdrv_parse_discard_flags(const char *mode, int *flags)
649 *flags &= ~BDRV_O_UNMAP;
651 if (!strcmp(mode, "off") || !strcmp(mode, "ignore")) {
652 /* do nothing */
653 } else if (!strcmp(mode, "on") || !strcmp(mode, "unmap")) {
654 *flags |= BDRV_O_UNMAP;
655 } else {
656 return -1;
659 return 0;
663 * Set open flags for a given cache mode
665 * Return 0 on success, -1 if the cache mode was invalid.
667 int bdrv_parse_cache_flags(const char *mode, int *flags)
669 *flags &= ~BDRV_O_CACHE_MASK;
671 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
672 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
673 } else if (!strcmp(mode, "directsync")) {
674 *flags |= BDRV_O_NOCACHE;
675 } else if (!strcmp(mode, "writeback")) {
676 *flags |= BDRV_O_CACHE_WB;
677 } else if (!strcmp(mode, "unsafe")) {
678 *flags |= BDRV_O_CACHE_WB;
679 *flags |= BDRV_O_NO_FLUSH;
680 } else if (!strcmp(mode, "writethrough")) {
681 /* this is the default */
682 } else {
683 return -1;
686 return 0;
690 * Returns the flags that a temporary snapshot should get, based on the
691 * originally requested flags (the originally requested image will have flags
692 * like a backing file)
694 static int bdrv_temp_snapshot_flags(int flags)
696 return (flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
700 * Returns the options and flags that bs->file should get if a protocol driver
701 * is expected, based on the given options and flags for the parent BDS
703 static void bdrv_inherited_options(int *child_flags, QDict *child_options,
704 int parent_flags, QDict *parent_options)
706 int flags = parent_flags;
708 /* Enable protocol handling, disable format probing for bs->file */
709 flags |= BDRV_O_PROTOCOL;
711 /* If the cache mode isn't explicitly set, inherit direct and no-flush from
712 * the parent. */
713 qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_DIRECT);
714 qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_NO_FLUSH);
716 /* Our block drivers take care to send flushes and respect unmap policy,
717 * so we can default to enable both on lower layers regardless of the
718 * corresponding parent options. */
719 qdict_set_default_str(child_options, BDRV_OPT_CACHE_WB, "on");
720 flags |= BDRV_O_UNMAP;
722 /* Clear flags that only apply to the top layer */
723 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ);
725 *child_flags = flags;
728 const BdrvChildRole child_file = {
729 .inherit_options = bdrv_inherited_options,
733 * Returns the options and flags that bs->file should get if the use of formats
734 * (and not only protocols) is permitted for it, based on the given options and
735 * flags for the parent BDS
737 static void bdrv_inherited_fmt_options(int *child_flags, QDict *child_options,
738 int parent_flags, QDict *parent_options)
740 child_file.inherit_options(child_flags, child_options,
741 parent_flags, parent_options);
743 *child_flags &= ~BDRV_O_PROTOCOL;
746 const BdrvChildRole child_format = {
747 .inherit_options = bdrv_inherited_fmt_options,
751 * Returns the options and flags that bs->backing should get, based on the
752 * given options and flags for the parent BDS
754 static void bdrv_backing_options(int *child_flags, QDict *child_options,
755 int parent_flags, QDict *parent_options)
757 int flags = parent_flags;
759 /* The cache mode is inherited unmodified for backing files */
760 qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_WB);
761 qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_DIRECT);
762 qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_NO_FLUSH);
764 /* backing files always opened read-only */
765 flags &= ~(BDRV_O_RDWR | BDRV_O_COPY_ON_READ);
767 /* snapshot=on is handled on the top layer */
768 flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY);
770 *child_flags = flags;
773 static const BdrvChildRole child_backing = {
774 .inherit_options = bdrv_backing_options,
777 static int bdrv_open_flags(BlockDriverState *bs, int flags)
779 int open_flags = flags | BDRV_O_CACHE_WB;
782 * Clear flags that are internal to the block layer before opening the
783 * image.
785 open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL);
788 * Snapshots should be writable.
790 if (flags & BDRV_O_TEMPORARY) {
791 open_flags |= BDRV_O_RDWR;
794 return open_flags;
797 static void update_flags_from_options(int *flags, QemuOpts *opts)
799 *flags &= ~BDRV_O_CACHE_MASK;
801 assert(qemu_opt_find(opts, BDRV_OPT_CACHE_WB));
802 if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_WB, false)) {
803 *flags |= BDRV_O_CACHE_WB;
806 assert(qemu_opt_find(opts, BDRV_OPT_CACHE_NO_FLUSH));
807 if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_NO_FLUSH, false)) {
808 *flags |= BDRV_O_NO_FLUSH;
811 assert(qemu_opt_find(opts, BDRV_OPT_CACHE_DIRECT));
812 if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_DIRECT, false)) {
813 *flags |= BDRV_O_NOCACHE;
817 static void update_options_from_flags(QDict *options, int flags)
819 if (!qdict_haskey(options, BDRV_OPT_CACHE_WB)) {
820 qdict_put(options, BDRV_OPT_CACHE_WB,
821 qbool_from_bool(flags & BDRV_O_CACHE_WB));
823 if (!qdict_haskey(options, BDRV_OPT_CACHE_DIRECT)) {
824 qdict_put(options, BDRV_OPT_CACHE_DIRECT,
825 qbool_from_bool(flags & BDRV_O_NOCACHE));
827 if (!qdict_haskey(options, BDRV_OPT_CACHE_NO_FLUSH)) {
828 qdict_put(options, BDRV_OPT_CACHE_NO_FLUSH,
829 qbool_from_bool(flags & BDRV_O_NO_FLUSH));
833 static void bdrv_assign_node_name(BlockDriverState *bs,
834 const char *node_name,
835 Error **errp)
837 char *gen_node_name = NULL;
839 if (!node_name) {
840 node_name = gen_node_name = id_generate(ID_BLOCK);
841 } else if (!id_wellformed(node_name)) {
843 * Check for empty string or invalid characters, but not if it is
844 * generated (generated names use characters not available to the user)
846 error_setg(errp, "Invalid node name");
847 return;
850 /* takes care of avoiding namespaces collisions */
851 if (blk_by_name(node_name)) {
852 error_setg(errp, "node-name=%s is conflicting with a device id",
853 node_name);
854 goto out;
857 /* takes care of avoiding duplicates node names */
858 if (bdrv_find_node(node_name)) {
859 error_setg(errp, "Duplicate node name");
860 goto out;
863 /* copy node name into the bs and insert it into the graph list */
864 pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
865 QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
866 out:
867 g_free(gen_node_name);
870 static QemuOptsList bdrv_runtime_opts = {
871 .name = "bdrv_common",
872 .head = QTAILQ_HEAD_INITIALIZER(bdrv_runtime_opts.head),
873 .desc = {
875 .name = "node-name",
876 .type = QEMU_OPT_STRING,
877 .help = "Node name of the block device node",
880 .name = "driver",
881 .type = QEMU_OPT_STRING,
882 .help = "Block driver to use for the node",
885 .name = BDRV_OPT_CACHE_WB,
886 .type = QEMU_OPT_BOOL,
887 .help = "Enable writeback mode",
890 .name = BDRV_OPT_CACHE_DIRECT,
891 .type = QEMU_OPT_BOOL,
892 .help = "Bypass software writeback cache on the host",
895 .name = BDRV_OPT_CACHE_NO_FLUSH,
896 .type = QEMU_OPT_BOOL,
897 .help = "Ignore flush requests",
899 { /* end of list */ }
904 * Common part for opening disk images and files
906 * Removes all processed options from *options.
908 static int bdrv_open_common(BlockDriverState *bs, BdrvChild *file,
909 QDict *options, Error **errp)
911 int ret, open_flags;
912 const char *filename;
913 const char *driver_name = NULL;
914 const char *node_name = NULL;
915 QemuOpts *opts;
916 BlockDriver *drv;
917 Error *local_err = NULL;
919 assert(bs->file == NULL);
920 assert(options != NULL && bs->options != options);
922 opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
923 qemu_opts_absorb_qdict(opts, options, &local_err);
924 if (local_err) {
925 error_propagate(errp, local_err);
926 ret = -EINVAL;
927 goto fail_opts;
930 driver_name = qemu_opt_get(opts, "driver");
931 drv = bdrv_find_format(driver_name);
932 assert(drv != NULL);
934 if (file != NULL) {
935 filename = file->bs->filename;
936 } else {
937 filename = qdict_get_try_str(options, "filename");
940 if (drv->bdrv_needs_filename && !filename) {
941 error_setg(errp, "The '%s' block driver requires a file name",
942 drv->format_name);
943 ret = -EINVAL;
944 goto fail_opts;
947 trace_bdrv_open_common(bs, filename ?: "", bs->open_flags,
948 drv->format_name);
950 node_name = qemu_opt_get(opts, "node-name");
951 bdrv_assign_node_name(bs, node_name, &local_err);
952 if (local_err) {
953 error_propagate(errp, local_err);
954 ret = -EINVAL;
955 goto fail_opts;
958 bs->request_alignment = 512;
959 bs->zero_beyond_eof = true;
960 bs->read_only = !(bs->open_flags & BDRV_O_RDWR);
962 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
963 error_setg(errp,
964 !bs->read_only && bdrv_is_whitelisted(drv, true)
965 ? "Driver '%s' can only be used for read-only devices"
966 : "Driver '%s' is not whitelisted",
967 drv->format_name);
968 ret = -ENOTSUP;
969 goto fail_opts;
972 assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
973 if (bs->open_flags & BDRV_O_COPY_ON_READ) {
974 if (!bs->read_only) {
975 bdrv_enable_copy_on_read(bs);
976 } else {
977 error_setg(errp, "Can't use copy-on-read on read-only device");
978 ret = -EINVAL;
979 goto fail_opts;
983 if (filename != NULL) {
984 pstrcpy(bs->filename, sizeof(bs->filename), filename);
985 } else {
986 bs->filename[0] = '\0';
988 pstrcpy(bs->exact_filename, sizeof(bs->exact_filename), bs->filename);
990 bs->drv = drv;
991 bs->opaque = g_malloc0(drv->instance_size);
993 /* Apply cache mode options */
994 update_flags_from_options(&bs->open_flags, opts);
995 bdrv_set_enable_write_cache(bs, bs->open_flags & BDRV_O_CACHE_WB);
997 /* Open the image, either directly or using a protocol */
998 open_flags = bdrv_open_flags(bs, bs->open_flags);
999 if (drv->bdrv_file_open) {
1000 assert(file == NULL);
1001 assert(!drv->bdrv_needs_filename || filename != NULL);
1002 ret = drv->bdrv_file_open(bs, options, open_flags, &local_err);
1003 } else {
1004 if (file == NULL) {
1005 error_setg(errp, "Can't use '%s' as a block driver for the "
1006 "protocol level", drv->format_name);
1007 ret = -EINVAL;
1008 goto free_and_fail;
1010 bs->file = file;
1011 ret = drv->bdrv_open(bs, options, open_flags, &local_err);
1014 if (ret < 0) {
1015 if (local_err) {
1016 error_propagate(errp, local_err);
1017 } else if (bs->filename[0]) {
1018 error_setg_errno(errp, -ret, "Could not open '%s'", bs->filename);
1019 } else {
1020 error_setg_errno(errp, -ret, "Could not open image");
1022 goto free_and_fail;
1025 if (bs->encrypted) {
1026 error_report("Encrypted images are deprecated");
1027 error_printf("Support for them will be removed in a future release.\n"
1028 "You can use 'qemu-img convert' to convert your image"
1029 " to an unencrypted one.\n");
1032 ret = refresh_total_sectors(bs, bs->total_sectors);
1033 if (ret < 0) {
1034 error_setg_errno(errp, -ret, "Could not refresh total sector count");
1035 goto free_and_fail;
1038 bdrv_refresh_limits(bs, &local_err);
1039 if (local_err) {
1040 error_propagate(errp, local_err);
1041 ret = -EINVAL;
1042 goto free_and_fail;
1045 assert(bdrv_opt_mem_align(bs) != 0);
1046 assert(bdrv_min_mem_align(bs) != 0);
1047 assert((bs->request_alignment != 0) || bdrv_is_sg(bs));
1049 qemu_opts_del(opts);
1050 return 0;
1052 free_and_fail:
1053 bs->file = NULL;
1054 g_free(bs->opaque);
1055 bs->opaque = NULL;
1056 bs->drv = NULL;
1057 fail_opts:
1058 qemu_opts_del(opts);
1059 return ret;
1062 static QDict *parse_json_filename(const char *filename, Error **errp)
1064 QObject *options_obj;
1065 QDict *options;
1066 int ret;
1068 ret = strstart(filename, "json:", &filename);
1069 assert(ret);
1071 options_obj = qobject_from_json(filename);
1072 if (!options_obj) {
1073 error_setg(errp, "Could not parse the JSON options");
1074 return NULL;
1077 if (qobject_type(options_obj) != QTYPE_QDICT) {
1078 qobject_decref(options_obj);
1079 error_setg(errp, "Invalid JSON object given");
1080 return NULL;
1083 options = qobject_to_qdict(options_obj);
1084 qdict_flatten(options);
1086 return options;
1089 static void parse_json_protocol(QDict *options, const char **pfilename,
1090 Error **errp)
1092 QDict *json_options;
1093 Error *local_err = NULL;
1095 /* Parse json: pseudo-protocol */
1096 if (!*pfilename || !g_str_has_prefix(*pfilename, "json:")) {
1097 return;
1100 json_options = parse_json_filename(*pfilename, &local_err);
1101 if (local_err) {
1102 error_propagate(errp, local_err);
1103 return;
1106 /* Options given in the filename have lower priority than options
1107 * specified directly */
1108 qdict_join(options, json_options, false);
1109 QDECREF(json_options);
1110 *pfilename = NULL;
1114 * Fills in default options for opening images and converts the legacy
1115 * filename/flags pair to option QDict entries.
1116 * The BDRV_O_PROTOCOL flag in *flags will be set or cleared accordingly if a
1117 * block driver has been specified explicitly.
1119 static int bdrv_fill_options(QDict **options, const char *filename,
1120 int *flags, Error **errp)
1122 const char *drvname;
1123 bool protocol = *flags & BDRV_O_PROTOCOL;
1124 bool parse_filename = false;
1125 BlockDriver *drv = NULL;
1126 Error *local_err = NULL;
1128 drvname = qdict_get_try_str(*options, "driver");
1129 if (drvname) {
1130 drv = bdrv_find_format(drvname);
1131 if (!drv) {
1132 error_setg(errp, "Unknown driver '%s'", drvname);
1133 return -ENOENT;
1135 /* If the user has explicitly specified the driver, this choice should
1136 * override the BDRV_O_PROTOCOL flag */
1137 protocol = drv->bdrv_file_open;
1140 if (protocol) {
1141 *flags |= BDRV_O_PROTOCOL;
1142 } else {
1143 *flags &= ~BDRV_O_PROTOCOL;
1146 /* Translate cache options from flags into options */
1147 update_options_from_flags(*options, *flags);
1149 /* Fetch the file name from the options QDict if necessary */
1150 if (protocol && filename) {
1151 if (!qdict_haskey(*options, "filename")) {
1152 qdict_put(*options, "filename", qstring_from_str(filename));
1153 parse_filename = true;
1154 } else {
1155 error_setg(errp, "Can't specify 'file' and 'filename' options at "
1156 "the same time");
1157 return -EINVAL;
1161 /* Find the right block driver */
1162 filename = qdict_get_try_str(*options, "filename");
1164 if (!drvname && protocol) {
1165 if (filename) {
1166 drv = bdrv_find_protocol(filename, parse_filename, errp);
1167 if (!drv) {
1168 return -EINVAL;
1171 drvname = drv->format_name;
1172 qdict_put(*options, "driver", qstring_from_str(drvname));
1173 } else {
1174 error_setg(errp, "Must specify either driver or file");
1175 return -EINVAL;
1179 assert(drv || !protocol);
1181 /* Driver-specific filename parsing */
1182 if (drv && drv->bdrv_parse_filename && parse_filename) {
1183 drv->bdrv_parse_filename(filename, *options, &local_err);
1184 if (local_err) {
1185 error_propagate(errp, local_err);
1186 return -EINVAL;
1189 if (!drv->bdrv_needs_filename) {
1190 qdict_del(*options, "filename");
1194 if (runstate_check(RUN_STATE_INMIGRATE)) {
1195 *flags |= BDRV_O_INACTIVE;
1198 return 0;
1201 static BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
1202 BlockDriverState *child_bs,
1203 const char *child_name,
1204 const BdrvChildRole *child_role)
1206 BdrvChild *child = g_new(BdrvChild, 1);
1207 *child = (BdrvChild) {
1208 .bs = child_bs,
1209 .name = g_strdup(child_name),
1210 .role = child_role,
1213 QLIST_INSERT_HEAD(&parent_bs->children, child, next);
1214 QLIST_INSERT_HEAD(&child_bs->parents, child, next_parent);
1216 return child;
1219 static void bdrv_detach_child(BdrvChild *child)
1221 QLIST_REMOVE(child, next);
1222 QLIST_REMOVE(child, next_parent);
1223 g_free(child->name);
1224 g_free(child);
1227 void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child)
1229 BlockDriverState *child_bs;
1231 if (child == NULL) {
1232 return;
1235 if (child->bs->inherits_from == parent) {
1236 child->bs->inherits_from = NULL;
1239 child_bs = child->bs;
1240 bdrv_detach_child(child);
1241 bdrv_unref(child_bs);
1245 * Sets the backing file link of a BDS. A new reference is created; callers
1246 * which don't need their own reference any more must call bdrv_unref().
1248 void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
1250 if (backing_hd) {
1251 bdrv_ref(backing_hd);
1254 if (bs->backing) {
1255 assert(bs->backing_blocker);
1256 bdrv_op_unblock_all(bs->backing->bs, bs->backing_blocker);
1257 bdrv_unref_child(bs, bs->backing);
1258 } else if (backing_hd) {
1259 error_setg(&bs->backing_blocker,
1260 "node is used as backing hd of '%s'",
1261 bdrv_get_device_or_node_name(bs));
1264 if (!backing_hd) {
1265 error_free(bs->backing_blocker);
1266 bs->backing_blocker = NULL;
1267 bs->backing = NULL;
1268 goto out;
1270 bs->backing = bdrv_attach_child(bs, backing_hd, "backing", &child_backing);
1271 bs->open_flags &= ~BDRV_O_NO_BACKING;
1272 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename);
1273 pstrcpy(bs->backing_format, sizeof(bs->backing_format),
1274 backing_hd->drv ? backing_hd->drv->format_name : "");
1276 bdrv_op_block_all(backing_hd, bs->backing_blocker);
1277 /* Otherwise we won't be able to commit due to check in bdrv_commit */
1278 bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
1279 bs->backing_blocker);
1280 out:
1281 bdrv_refresh_limits(bs, NULL);
1285 * Opens the backing file for a BlockDriverState if not yet open
1287 * bdref_key specifies the key for the image's BlockdevRef in the options QDict.
1288 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1289 * itself, all options starting with "${bdref_key}." are considered part of the
1290 * BlockdevRef.
1292 * TODO Can this be unified with bdrv_open_image()?
1294 int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
1295 const char *bdref_key, Error **errp)
1297 char *backing_filename = g_malloc0(PATH_MAX);
1298 char *bdref_key_dot;
1299 const char *reference = NULL;
1300 int ret = 0;
1301 BlockDriverState *backing_hd;
1302 QDict *options;
1303 QDict *tmp_parent_options = NULL;
1304 Error *local_err = NULL;
1306 if (bs->backing != NULL) {
1307 goto free_exit;
1310 /* NULL means an empty set of options */
1311 if (parent_options == NULL) {
1312 tmp_parent_options = qdict_new();
1313 parent_options = tmp_parent_options;
1316 bs->open_flags &= ~BDRV_O_NO_BACKING;
1318 bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1319 qdict_extract_subqdict(parent_options, &options, bdref_key_dot);
1320 g_free(bdref_key_dot);
1322 reference = qdict_get_try_str(parent_options, bdref_key);
1323 if (reference || qdict_haskey(options, "file.filename")) {
1324 backing_filename[0] = '\0';
1325 } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
1326 QDECREF(options);
1327 goto free_exit;
1328 } else {
1329 bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX,
1330 &local_err);
1331 if (local_err) {
1332 ret = -EINVAL;
1333 error_propagate(errp, local_err);
1334 QDECREF(options);
1335 goto free_exit;
1339 if (!bs->drv || !bs->drv->supports_backing) {
1340 ret = -EINVAL;
1341 error_setg(errp, "Driver doesn't support backing files");
1342 QDECREF(options);
1343 goto free_exit;
1346 if (bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) {
1347 qdict_put(options, "driver", qstring_from_str(bs->backing_format));
1350 backing_hd = NULL;
1351 ret = bdrv_open_inherit(&backing_hd,
1352 *backing_filename ? backing_filename : NULL,
1353 reference, options, 0, bs, &child_backing,
1354 errp);
1355 if (ret < 0) {
1356 bs->open_flags |= BDRV_O_NO_BACKING;
1357 error_prepend(errp, "Could not open backing file: ");
1358 goto free_exit;
1361 /* Hook up the backing file link; drop our reference, bs owns the
1362 * backing_hd reference now */
1363 bdrv_set_backing_hd(bs, backing_hd);
1364 bdrv_unref(backing_hd);
1366 qdict_del(parent_options, bdref_key);
1368 free_exit:
1369 g_free(backing_filename);
1370 QDECREF(tmp_parent_options);
1371 return ret;
1375 * Opens a disk image whose options are given as BlockdevRef in another block
1376 * device's options.
1378 * If allow_none is true, no image will be opened if filename is false and no
1379 * BlockdevRef is given. NULL will be returned, but errp remains unset.
1381 * bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
1382 * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
1383 * itself, all options starting with "${bdref_key}." are considered part of the
1384 * BlockdevRef.
1386 * The BlockdevRef will be removed from the options QDict.
1388 BdrvChild *bdrv_open_child(const char *filename,
1389 QDict *options, const char *bdref_key,
1390 BlockDriverState* parent,
1391 const BdrvChildRole *child_role,
1392 bool allow_none, Error **errp)
1394 BdrvChild *c = NULL;
1395 BlockDriverState *bs;
1396 QDict *image_options;
1397 int ret;
1398 char *bdref_key_dot;
1399 const char *reference;
1401 assert(child_role != NULL);
1403 bdref_key_dot = g_strdup_printf("%s.", bdref_key);
1404 qdict_extract_subqdict(options, &image_options, bdref_key_dot);
1405 g_free(bdref_key_dot);
1407 reference = qdict_get_try_str(options, bdref_key);
1408 if (!filename && !reference && !qdict_size(image_options)) {
1409 if (!allow_none) {
1410 error_setg(errp, "A block device must be specified for \"%s\"",
1411 bdref_key);
1413 QDECREF(image_options);
1414 goto done;
1417 bs = NULL;
1418 ret = bdrv_open_inherit(&bs, filename, reference, image_options, 0,
1419 parent, child_role, errp);
1420 if (ret < 0) {
1421 goto done;
1424 c = bdrv_attach_child(parent, bs, bdref_key, child_role);
1426 done:
1427 qdict_del(options, bdref_key);
1428 return c;
1431 int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
1433 /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
1434 char *tmp_filename = g_malloc0(PATH_MAX + 1);
1435 int64_t total_size;
1436 QemuOpts *opts = NULL;
1437 QDict *snapshot_options;
1438 BlockDriverState *bs_snapshot;
1439 Error *local_err = NULL;
1440 int ret;
1442 /* if snapshot, we create a temporary backing file and open it
1443 instead of opening 'filename' directly */
1445 /* Get the required size from the image */
1446 total_size = bdrv_getlength(bs);
1447 if (total_size < 0) {
1448 ret = total_size;
1449 error_setg_errno(errp, -total_size, "Could not get image size");
1450 goto out;
1453 /* Create the temporary image */
1454 ret = get_tmp_filename(tmp_filename, PATH_MAX + 1);
1455 if (ret < 0) {
1456 error_setg_errno(errp, -ret, "Could not get temporary filename");
1457 goto out;
1460 opts = qemu_opts_create(bdrv_qcow2.create_opts, NULL, 0,
1461 &error_abort);
1462 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size, &error_abort);
1463 ret = bdrv_create(&bdrv_qcow2, tmp_filename, opts, errp);
1464 qemu_opts_del(opts);
1465 if (ret < 0) {
1466 error_prepend(errp, "Could not create temporary overlay '%s': ",
1467 tmp_filename);
1468 goto out;
1471 /* Prepare a new options QDict for the temporary file */
1472 snapshot_options = qdict_new();
1473 qdict_put(snapshot_options, "file.driver",
1474 qstring_from_str("file"));
1475 qdict_put(snapshot_options, "file.filename",
1476 qstring_from_str(tmp_filename));
1477 qdict_put(snapshot_options, "driver",
1478 qstring_from_str("qcow2"));
1480 bs_snapshot = bdrv_new();
1482 ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options,
1483 flags, &local_err);
1484 if (ret < 0) {
1485 error_propagate(errp, local_err);
1486 goto out;
1489 bdrv_append(bs_snapshot, bs);
1491 out:
1492 g_free(tmp_filename);
1493 return ret;
1497 * Opens a disk image (raw, qcow2, vmdk, ...)
1499 * options is a QDict of options to pass to the block drivers, or NULL for an
1500 * empty set of options. The reference to the QDict belongs to the block layer
1501 * after the call (even on failure), so if the caller intends to reuse the
1502 * dictionary, it needs to use QINCREF() before calling bdrv_open.
1504 * If *pbs is NULL, a new BDS will be created with a pointer to it stored there.
1505 * If it is not NULL, the referenced BDS will be reused.
1507 * The reference parameter may be used to specify an existing block device which
1508 * should be opened. If specified, neither options nor a filename may be given,
1509 * nor can an existing BDS be reused (that is, *pbs has to be NULL).
1511 static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
1512 const char *reference, QDict *options, int flags,
1513 BlockDriverState *parent,
1514 const BdrvChildRole *child_role, Error **errp)
1516 int ret;
1517 BdrvChild *file = NULL;
1518 BlockDriverState *bs;
1519 BlockDriver *drv = NULL;
1520 const char *drvname;
1521 const char *backing;
1522 Error *local_err = NULL;
1523 int snapshot_flags = 0;
1525 assert(pbs);
1526 assert(!child_role || !flags);
1527 assert(!child_role == !parent);
1529 if (reference) {
1530 bool options_non_empty = options ? qdict_size(options) : false;
1531 QDECREF(options);
1533 if (*pbs) {
1534 error_setg(errp, "Cannot reuse an existing BDS when referencing "
1535 "another block device");
1536 return -EINVAL;
1539 if (filename || options_non_empty) {
1540 error_setg(errp, "Cannot reference an existing block device with "
1541 "additional options or a new filename");
1542 return -EINVAL;
1545 bs = bdrv_lookup_bs(reference, reference, errp);
1546 if (!bs) {
1547 return -ENODEV;
1549 bdrv_ref(bs);
1550 *pbs = bs;
1551 return 0;
1554 if (*pbs) {
1555 bs = *pbs;
1556 } else {
1557 bs = bdrv_new();
1560 /* NULL means an empty set of options */
1561 if (options == NULL) {
1562 options = qdict_new();
1565 /* json: syntax counts as explicit options, as if in the QDict */
1566 parse_json_protocol(options, &filename, &local_err);
1567 if (local_err) {
1568 ret = -EINVAL;
1569 goto fail;
1572 bs->explicit_options = qdict_clone_shallow(options);
1574 if (child_role) {
1575 bs->inherits_from = parent;
1576 child_role->inherit_options(&flags, options,
1577 parent->open_flags, parent->options);
1580 ret = bdrv_fill_options(&options, filename, &flags, &local_err);
1581 if (local_err) {
1582 goto fail;
1585 bs->open_flags = flags;
1586 bs->options = options;
1587 options = qdict_clone_shallow(options);
1589 /* Find the right image format driver */
1590 drvname = qdict_get_try_str(options, "driver");
1591 if (drvname) {
1592 drv = bdrv_find_format(drvname);
1593 if (!drv) {
1594 error_setg(errp, "Unknown driver: '%s'", drvname);
1595 ret = -EINVAL;
1596 goto fail;
1600 assert(drvname || !(flags & BDRV_O_PROTOCOL));
1602 backing = qdict_get_try_str(options, "backing");
1603 if (backing && *backing == '\0') {
1604 flags |= BDRV_O_NO_BACKING;
1605 qdict_del(options, "backing");
1608 /* Open image file without format layer */
1609 if ((flags & BDRV_O_PROTOCOL) == 0) {
1610 if (flags & BDRV_O_RDWR) {
1611 flags |= BDRV_O_ALLOW_RDWR;
1613 if (flags & BDRV_O_SNAPSHOT) {
1614 snapshot_flags = bdrv_temp_snapshot_flags(flags);
1615 bdrv_backing_options(&flags, options, flags, options);
1618 bs->open_flags = flags;
1620 file = bdrv_open_child(filename, options, "file", bs,
1621 &child_file, true, &local_err);
1622 if (local_err) {
1623 ret = -EINVAL;
1624 goto fail;
1628 /* Image format probing */
1629 bs->probed = !drv;
1630 if (!drv && file) {
1631 ret = find_image_format(file->bs, filename, &drv, &local_err);
1632 if (ret < 0) {
1633 goto fail;
1636 * This option update would logically belong in bdrv_fill_options(),
1637 * but we first need to open bs->file for the probing to work, while
1638 * opening bs->file already requires the (mostly) final set of options
1639 * so that cache mode etc. can be inherited.
1641 * Adding the driver later is somewhat ugly, but it's not an option
1642 * that would ever be inherited, so it's correct. We just need to make
1643 * sure to update both bs->options (which has the full effective
1644 * options for bs) and options (which has file.* already removed).
1646 qdict_put(bs->options, "driver", qstring_from_str(drv->format_name));
1647 qdict_put(options, "driver", qstring_from_str(drv->format_name));
1648 } else if (!drv) {
1649 error_setg(errp, "Must specify either driver or file");
1650 ret = -EINVAL;
1651 goto fail;
1654 /* BDRV_O_PROTOCOL must be set iff a protocol BDS is about to be created */
1655 assert(!!(flags & BDRV_O_PROTOCOL) == !!drv->bdrv_file_open);
1656 /* file must be NULL if a protocol BDS is about to be created
1657 * (the inverse results in an error message from bdrv_open_common()) */
1658 assert(!(flags & BDRV_O_PROTOCOL) || !file);
1660 /* Open the image */
1661 ret = bdrv_open_common(bs, file, options, &local_err);
1662 if (ret < 0) {
1663 goto fail;
1666 if (file && (bs->file != file)) {
1667 bdrv_unref_child(bs, file);
1668 file = NULL;
1671 /* If there is a backing file, use it */
1672 if ((flags & BDRV_O_NO_BACKING) == 0) {
1673 ret = bdrv_open_backing_file(bs, options, "backing", &local_err);
1674 if (ret < 0) {
1675 goto close_and_fail;
1679 bdrv_refresh_filename(bs);
1681 /* Check if any unknown options were used */
1682 if (options && (qdict_size(options) != 0)) {
1683 const QDictEntry *entry = qdict_first(options);
1684 if (flags & BDRV_O_PROTOCOL) {
1685 error_setg(errp, "Block protocol '%s' doesn't support the option "
1686 "'%s'", drv->format_name, entry->key);
1687 } else {
1688 error_setg(errp, "Block format '%s' used by device '%s' doesn't "
1689 "support the option '%s'", drv->format_name,
1690 bdrv_get_device_name(bs), entry->key);
1693 ret = -EINVAL;
1694 goto close_and_fail;
1697 if (!bdrv_key_required(bs)) {
1698 if (bs->blk) {
1699 blk_dev_change_media_cb(bs->blk, true);
1701 } else if (!runstate_check(RUN_STATE_PRELAUNCH)
1702 && !runstate_check(RUN_STATE_INMIGRATE)
1703 && !runstate_check(RUN_STATE_PAUSED)) { /* HACK */
1704 error_setg(errp,
1705 "Guest must be stopped for opening of encrypted image");
1706 ret = -EBUSY;
1707 goto close_and_fail;
1710 QDECREF(options);
1711 *pbs = bs;
1713 /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
1714 * temporary snapshot afterwards. */
1715 if (snapshot_flags) {
1716 ret = bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err);
1717 if (local_err) {
1718 goto close_and_fail;
1722 return 0;
1724 fail:
1725 if (file != NULL) {
1726 bdrv_unref_child(bs, file);
1728 QDECREF(bs->explicit_options);
1729 QDECREF(bs->options);
1730 QDECREF(options);
1731 bs->options = NULL;
1732 if (!*pbs) {
1733 /* If *pbs is NULL, a new BDS has been created in this function and
1734 needs to be freed now. Otherwise, it does not need to be closed,
1735 since it has not really been opened yet. */
1736 bdrv_unref(bs);
1738 if (local_err) {
1739 error_propagate(errp, local_err);
1741 return ret;
1743 close_and_fail:
1744 /* See fail path, but now the BDS has to be always closed */
1745 if (*pbs) {
1746 bdrv_close(bs);
1747 } else {
1748 bdrv_unref(bs);
1750 QDECREF(options);
1751 if (local_err) {
1752 error_propagate(errp, local_err);
1754 return ret;
1757 int bdrv_open(BlockDriverState **pbs, const char *filename,
1758 const char *reference, QDict *options, int flags, Error **errp)
1760 return bdrv_open_inherit(pbs, filename, reference, options, flags, NULL,
1761 NULL, errp);
1764 typedef struct BlockReopenQueueEntry {
1765 bool prepared;
1766 BDRVReopenState state;
1767 QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
1768 } BlockReopenQueueEntry;
1771 * Adds a BlockDriverState to a simple queue for an atomic, transactional
1772 * reopen of multiple devices.
1774 * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
1775 * already performed, or alternatively may be NULL a new BlockReopenQueue will
1776 * be created and initialized. This newly created BlockReopenQueue should be
1777 * passed back in for subsequent calls that are intended to be of the same
1778 * atomic 'set'.
1780 * bs is the BlockDriverState to add to the reopen queue.
1782 * options contains the changed options for the associated bs
1783 * (the BlockReopenQueue takes ownership)
1785 * flags contains the open flags for the associated bs
1787 * returns a pointer to bs_queue, which is either the newly allocated
1788 * bs_queue, or the existing bs_queue being used.
1791 static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
1792 BlockDriverState *bs,
1793 QDict *options,
1794 int flags,
1795 const BdrvChildRole *role,
1796 QDict *parent_options,
1797 int parent_flags)
1799 assert(bs != NULL);
1801 BlockReopenQueueEntry *bs_entry;
1802 BdrvChild *child;
1803 QDict *old_options, *explicit_options;
1805 if (bs_queue == NULL) {
1806 bs_queue = g_new0(BlockReopenQueue, 1);
1807 QSIMPLEQ_INIT(bs_queue);
1810 if (!options) {
1811 options = qdict_new();
1815 * Precedence of options:
1816 * 1. Explicitly passed in options (highest)
1817 * 2. Set in flags (only for top level)
1818 * 3. Retained from explicitly set options of bs
1819 * 4. Inherited from parent node
1820 * 5. Retained from effective options of bs
1823 if (!parent_options) {
1825 * Any setting represented by flags is always updated. If the
1826 * corresponding QDict option is set, it takes precedence. Otherwise
1827 * the flag is translated into a QDict option. The old setting of bs is
1828 * not considered.
1830 update_options_from_flags(options, flags);
1833 /* Old explicitly set values (don't overwrite by inherited value) */
1834 old_options = qdict_clone_shallow(bs->explicit_options);
1835 bdrv_join_options(bs, options, old_options);
1836 QDECREF(old_options);
1838 explicit_options = qdict_clone_shallow(options);
1840 /* Inherit from parent node */
1841 if (parent_options) {
1842 assert(!flags);
1843 role->inherit_options(&flags, options, parent_flags, parent_options);
1846 /* Old values are used for options that aren't set yet */
1847 old_options = qdict_clone_shallow(bs->options);
1848 bdrv_join_options(bs, options, old_options);
1849 QDECREF(old_options);
1851 /* bdrv_open() masks this flag out */
1852 flags &= ~BDRV_O_PROTOCOL;
1854 QLIST_FOREACH(child, &bs->children, next) {
1855 QDict *new_child_options;
1856 char *child_key_dot;
1858 /* reopen can only change the options of block devices that were
1859 * implicitly created and inherited options. For other (referenced)
1860 * block devices, a syntax like "backing.foo" results in an error. */
1861 if (child->bs->inherits_from != bs) {
1862 continue;
1865 child_key_dot = g_strdup_printf("%s.", child->name);
1866 qdict_extract_subqdict(options, &new_child_options, child_key_dot);
1867 g_free(child_key_dot);
1869 bdrv_reopen_queue_child(bs_queue, child->bs, new_child_options, 0,
1870 child->role, options, flags);
1873 bs_entry = g_new0(BlockReopenQueueEntry, 1);
1874 QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
1876 bs_entry->state.bs = bs;
1877 bs_entry->state.options = options;
1878 bs_entry->state.explicit_options = explicit_options;
1879 bs_entry->state.flags = flags;
1881 return bs_queue;
1884 BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
1885 BlockDriverState *bs,
1886 QDict *options, int flags)
1888 return bdrv_reopen_queue_child(bs_queue, bs, options, flags,
1889 NULL, NULL, 0);
1893 * Reopen multiple BlockDriverStates atomically & transactionally.
1895 * The queue passed in (bs_queue) must have been built up previous
1896 * via bdrv_reopen_queue().
1898 * Reopens all BDS specified in the queue, with the appropriate
1899 * flags. All devices are prepared for reopen, and failure of any
1900 * device will cause all device changes to be abandonded, and intermediate
1901 * data cleaned up.
1903 * If all devices prepare successfully, then the changes are committed
1904 * to all devices.
1907 int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
1909 int ret = -1;
1910 BlockReopenQueueEntry *bs_entry, *next;
1911 Error *local_err = NULL;
1913 assert(bs_queue != NULL);
1915 bdrv_drain_all();
1917 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1918 if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
1919 error_propagate(errp, local_err);
1920 goto cleanup;
1922 bs_entry->prepared = true;
1925 /* If we reach this point, we have success and just need to apply the
1926 * changes
1928 QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
1929 bdrv_reopen_commit(&bs_entry->state);
1932 ret = 0;
1934 cleanup:
1935 QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
1936 if (ret && bs_entry->prepared) {
1937 bdrv_reopen_abort(&bs_entry->state);
1938 } else if (ret) {
1939 QDECREF(bs_entry->state.explicit_options);
1941 QDECREF(bs_entry->state.options);
1942 g_free(bs_entry);
1944 g_free(bs_queue);
1945 return ret;
1949 /* Reopen a single BlockDriverState with the specified flags. */
1950 int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
1952 int ret = -1;
1953 Error *local_err = NULL;
1954 BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, NULL, bdrv_flags);
1956 ret = bdrv_reopen_multiple(queue, &local_err);
1957 if (local_err != NULL) {
1958 error_propagate(errp, local_err);
1960 return ret;
1965 * Prepares a BlockDriverState for reopen. All changes are staged in the
1966 * 'opaque' field of the BDRVReopenState, which is used and allocated by
1967 * the block driver layer .bdrv_reopen_prepare()
1969 * bs is the BlockDriverState to reopen
1970 * flags are the new open flags
1971 * queue is the reopen queue
1973 * Returns 0 on success, non-zero on error. On error errp will be set
1974 * as well.
1976 * On failure, bdrv_reopen_abort() will be called to clean up any data.
1977 * It is the responsibility of the caller to then call the abort() or
1978 * commit() for any other BDS that have been left in a prepare() state
1981 int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
1982 Error **errp)
1984 int ret = -1;
1985 Error *local_err = NULL;
1986 BlockDriver *drv;
1987 QemuOpts *opts;
1988 const char *value;
1990 assert(reopen_state != NULL);
1991 assert(reopen_state->bs->drv != NULL);
1992 drv = reopen_state->bs->drv;
1994 /* Process generic block layer options */
1995 opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
1996 qemu_opts_absorb_qdict(opts, reopen_state->options, &local_err);
1997 if (local_err) {
1998 error_propagate(errp, local_err);
1999 ret = -EINVAL;
2000 goto error;
2003 update_flags_from_options(&reopen_state->flags, opts);
2005 /* If a guest device is attached, it owns WCE */
2006 if (reopen_state->bs->blk && blk_get_attached_dev(reopen_state->bs->blk)) {
2007 bool old_wce = bdrv_enable_write_cache(reopen_state->bs);
2008 bool new_wce = (reopen_state->flags & BDRV_O_CACHE_WB);
2009 if (old_wce != new_wce) {
2010 error_setg(errp, "Cannot change cache.writeback: Device attached");
2011 ret = -EINVAL;
2012 goto error;
2016 /* node-name and driver must be unchanged. Put them back into the QDict, so
2017 * that they are checked at the end of this function. */
2018 value = qemu_opt_get(opts, "node-name");
2019 if (value) {
2020 qdict_put(reopen_state->options, "node-name", qstring_from_str(value));
2023 value = qemu_opt_get(opts, "driver");
2024 if (value) {
2025 qdict_put(reopen_state->options, "driver", qstring_from_str(value));
2028 /* if we are to stay read-only, do not allow permission change
2029 * to r/w */
2030 if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
2031 reopen_state->flags & BDRV_O_RDWR) {
2032 error_setg(errp, "Node '%s' is read only",
2033 bdrv_get_device_or_node_name(reopen_state->bs));
2034 goto error;
2038 ret = bdrv_flush(reopen_state->bs);
2039 if (ret) {
2040 error_setg_errno(errp, -ret, "Error flushing drive");
2041 goto error;
2044 if (drv->bdrv_reopen_prepare) {
2045 ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
2046 if (ret) {
2047 if (local_err != NULL) {
2048 error_propagate(errp, local_err);
2049 } else {
2050 error_setg(errp, "failed while preparing to reopen image '%s'",
2051 reopen_state->bs->filename);
2053 goto error;
2055 } else {
2056 /* It is currently mandatory to have a bdrv_reopen_prepare()
2057 * handler for each supported drv. */
2058 error_setg(errp, "Block format '%s' used by node '%s' "
2059 "does not support reopening files", drv->format_name,
2060 bdrv_get_device_or_node_name(reopen_state->bs));
2061 ret = -1;
2062 goto error;
2065 /* Options that are not handled are only okay if they are unchanged
2066 * compared to the old state. It is expected that some options are only
2067 * used for the initial open, but not reopen (e.g. filename) */
2068 if (qdict_size(reopen_state->options)) {
2069 const QDictEntry *entry = qdict_first(reopen_state->options);
2071 do {
2072 QString *new_obj = qobject_to_qstring(entry->value);
2073 const char *new = qstring_get_str(new_obj);
2074 const char *old = qdict_get_try_str(reopen_state->bs->options,
2075 entry->key);
2077 if (!old || strcmp(new, old)) {
2078 error_setg(errp, "Cannot change the option '%s'", entry->key);
2079 ret = -EINVAL;
2080 goto error;
2082 } while ((entry = qdict_next(reopen_state->options, entry)));
2085 ret = 0;
2087 error:
2088 qemu_opts_del(opts);
2089 return ret;
2093 * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
2094 * makes them final by swapping the staging BlockDriverState contents into
2095 * the active BlockDriverState contents.
2097 void bdrv_reopen_commit(BDRVReopenState *reopen_state)
2099 BlockDriver *drv;
2101 assert(reopen_state != NULL);
2102 drv = reopen_state->bs->drv;
2103 assert(drv != NULL);
2105 /* If there are any driver level actions to take */
2106 if (drv->bdrv_reopen_commit) {
2107 drv->bdrv_reopen_commit(reopen_state);
2110 /* set BDS specific flags now */
2111 QDECREF(reopen_state->bs->explicit_options);
2113 reopen_state->bs->explicit_options = reopen_state->explicit_options;
2114 reopen_state->bs->open_flags = reopen_state->flags;
2115 reopen_state->bs->enable_write_cache = !!(reopen_state->flags &
2116 BDRV_O_CACHE_WB);
2117 reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
2119 bdrv_refresh_limits(reopen_state->bs, NULL);
2123 * Abort the reopen, and delete and free the staged changes in
2124 * reopen_state
2126 void bdrv_reopen_abort(BDRVReopenState *reopen_state)
2128 BlockDriver *drv;
2130 assert(reopen_state != NULL);
2131 drv = reopen_state->bs->drv;
2132 assert(drv != NULL);
2134 if (drv->bdrv_reopen_abort) {
2135 drv->bdrv_reopen_abort(reopen_state);
2138 QDECREF(reopen_state->explicit_options);
2142 static void bdrv_close(BlockDriverState *bs)
2144 BdrvAioNotifier *ban, *ban_next;
2146 assert(!bs->job);
2148 /* Disable I/O limits and drain all pending throttled requests */
2149 if (bs->throttle_state) {
2150 bdrv_io_limits_disable(bs);
2153 bdrv_drained_begin(bs); /* complete I/O */
2154 bdrv_flush(bs);
2155 bdrv_drain(bs); /* in case flush left pending I/O */
2157 bdrv_release_named_dirty_bitmaps(bs);
2158 assert(QLIST_EMPTY(&bs->dirty_bitmaps));
2160 if (bs->blk) {
2161 blk_dev_change_media_cb(bs->blk, false);
2164 if (bs->drv) {
2165 BdrvChild *child, *next;
2167 bs->drv->bdrv_close(bs);
2168 bs->drv = NULL;
2170 bdrv_set_backing_hd(bs, NULL);
2172 if (bs->file != NULL) {
2173 bdrv_unref_child(bs, bs->file);
2174 bs->file = NULL;
2177 QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
2178 /* TODO Remove bdrv_unref() from drivers' close function and use
2179 * bdrv_unref_child() here */
2180 if (child->bs->inherits_from == bs) {
2181 child->bs->inherits_from = NULL;
2183 bdrv_detach_child(child);
2186 g_free(bs->opaque);
2187 bs->opaque = NULL;
2188 bs->copy_on_read = 0;
2189 bs->backing_file[0] = '\0';
2190 bs->backing_format[0] = '\0';
2191 bs->total_sectors = 0;
2192 bs->encrypted = 0;
2193 bs->valid_key = 0;
2194 bs->sg = 0;
2195 bs->zero_beyond_eof = false;
2196 QDECREF(bs->options);
2197 QDECREF(bs->explicit_options);
2198 bs->options = NULL;
2199 QDECREF(bs->full_open_options);
2200 bs->full_open_options = NULL;
2203 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
2204 g_free(ban);
2206 QLIST_INIT(&bs->aio_notifiers);
2207 bdrv_drained_end(bs);
2210 void bdrv_close_all(void)
2212 BlockDriverState *bs;
2213 AioContext *aio_context;
2215 /* Drop references from requests still in flight, such as canceled block
2216 * jobs whose AIO context has not been polled yet */
2217 bdrv_drain_all();
2219 blk_remove_all_bs();
2220 blockdev_close_all_bdrv_states();
2222 /* Cancel all block jobs */
2223 while (!QTAILQ_EMPTY(&all_bdrv_states)) {
2224 QTAILQ_FOREACH(bs, &all_bdrv_states, bs_list) {
2225 aio_context = bdrv_get_aio_context(bs);
2227 aio_context_acquire(aio_context);
2228 if (bs->job) {
2229 block_job_cancel_sync(bs->job);
2230 aio_context_release(aio_context);
2231 break;
2233 aio_context_release(aio_context);
2236 /* All the remaining BlockDriverStates are referenced directly or
2237 * indirectly from block jobs, so there needs to be at least one BDS
2238 * directly used by a block job */
2239 assert(bs);
2243 /* Note that bs->device_list.tqe_prev is initially null,
2244 * and gets set to non-null by QTAILQ_INSERT_TAIL(). Establish
2245 * the useful invariant "bs in bdrv_states iff bs->tqe_prev" by
2246 * resetting it to null on remove. */
2247 void bdrv_device_remove(BlockDriverState *bs)
2249 QTAILQ_REMOVE(&bdrv_states, bs, device_list);
2250 bs->device_list.tqe_prev = NULL;
2253 /* make a BlockDriverState anonymous by removing from bdrv_state and
2254 * graph_bdrv_state list.
2255 Also, NULL terminate the device_name to prevent double remove */
2256 void bdrv_make_anon(BlockDriverState *bs)
2258 /* Take care to remove bs from bdrv_states only when it's actually
2259 * in it. */
2260 if (bs->device_list.tqe_prev) {
2261 bdrv_device_remove(bs);
2263 if (bs->node_name[0] != '\0') {
2264 QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
2266 bs->node_name[0] = '\0';
2269 /* Fields that need to stay with the top-level BDS */
2270 static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
2271 BlockDriverState *bs_src)
2273 /* move some fields that need to stay attached to the device */
2275 /* dev info */
2276 bs_dest->copy_on_read = bs_src->copy_on_read;
2278 bs_dest->enable_write_cache = bs_src->enable_write_cache;
2280 /* dirty bitmap */
2281 bs_dest->dirty_bitmaps = bs_src->dirty_bitmaps;
2284 static void change_parent_backing_link(BlockDriverState *from,
2285 BlockDriverState *to)
2287 BdrvChild *c, *next;
2289 QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) {
2290 assert(c->role != &child_backing);
2291 c->bs = to;
2292 QLIST_REMOVE(c, next_parent);
2293 QLIST_INSERT_HEAD(&to->parents, c, next_parent);
2294 bdrv_ref(to);
2295 bdrv_unref(from);
2297 if (from->blk) {
2298 blk_set_bs(from->blk, to);
2299 if (!to->device_list.tqe_prev) {
2300 QTAILQ_INSERT_BEFORE(from, to, device_list);
2302 bdrv_device_remove(from);
2306 static void swap_feature_fields(BlockDriverState *bs_top,
2307 BlockDriverState *bs_new)
2309 BlockDriverState tmp;
2311 bdrv_move_feature_fields(&tmp, bs_top);
2312 bdrv_move_feature_fields(bs_top, bs_new);
2313 bdrv_move_feature_fields(bs_new, &tmp);
2315 assert(!bs_new->throttle_state);
2316 if (bs_top->throttle_state) {
2317 assert(bs_top->io_limits_enabled);
2318 bdrv_io_limits_enable(bs_new, throttle_group_get_name(bs_top));
2319 bdrv_io_limits_disable(bs_top);
2324 * Add new bs contents at the top of an image chain while the chain is
2325 * live, while keeping required fields on the top layer.
2327 * This will modify the BlockDriverState fields, and swap contents
2328 * between bs_new and bs_top. Both bs_new and bs_top are modified.
2330 * bs_new must not be attached to a BlockBackend.
2332 * This function does not create any image files.
2334 * bdrv_append() takes ownership of a bs_new reference and unrefs it because
2335 * that's what the callers commonly need. bs_new will be referenced by the old
2336 * parents of bs_top after bdrv_append() returns. If the caller needs to keep a
2337 * reference of its own, it must call bdrv_ref().
2339 void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
2341 assert(!bdrv_requests_pending(bs_top));
2342 assert(!bdrv_requests_pending(bs_new));
2344 bdrv_ref(bs_top);
2345 change_parent_backing_link(bs_top, bs_new);
2347 /* Some fields always stay on top of the backing file chain */
2348 swap_feature_fields(bs_top, bs_new);
2350 bdrv_set_backing_hd(bs_new, bs_top);
2351 bdrv_unref(bs_top);
2353 /* bs_new is now referenced by its new parents, we don't need the
2354 * additional reference any more. */
2355 bdrv_unref(bs_new);
2358 void bdrv_replace_in_backing_chain(BlockDriverState *old, BlockDriverState *new)
2360 assert(!bdrv_requests_pending(old));
2361 assert(!bdrv_requests_pending(new));
2363 bdrv_ref(old);
2365 if (old->blk) {
2366 /* As long as these fields aren't in BlockBackend, but in the top-level
2367 * BlockDriverState, it's not possible for a BDS to have two BBs.
2369 * We really want to copy the fields from old to new, but we go for a
2370 * swap instead so that pointers aren't duplicated and cause trouble.
2371 * (Also, bdrv_swap() used to do the same.) */
2372 assert(!new->blk);
2373 swap_feature_fields(old, new);
2375 change_parent_backing_link(old, new);
2377 /* Change backing files if a previously independent node is added to the
2378 * chain. For active commit, we replace top by its own (indirect) backing
2379 * file and don't do anything here so we don't build a loop. */
2380 if (new->backing == NULL && !bdrv_chain_contains(backing_bs(old), new)) {
2381 bdrv_set_backing_hd(new, backing_bs(old));
2382 bdrv_set_backing_hd(old, NULL);
2385 bdrv_unref(old);
2388 static void bdrv_delete(BlockDriverState *bs)
2390 assert(!bs->job);
2391 assert(bdrv_op_blocker_is_empty(bs));
2392 assert(!bs->refcnt);
2394 bdrv_close(bs);
2396 /* remove from list, if necessary */
2397 bdrv_make_anon(bs);
2399 QTAILQ_REMOVE(&all_bdrv_states, bs, bs_list);
2401 g_free(bs);
2405 * Run consistency checks on an image
2407 * Returns 0 if the check could be completed (it doesn't mean that the image is
2408 * free of errors) or -errno when an internal error occurred. The results of the
2409 * check are stored in res.
2411 int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
2413 if (bs->drv == NULL) {
2414 return -ENOMEDIUM;
2416 if (bs->drv->bdrv_check == NULL) {
2417 return -ENOTSUP;
2420 memset(res, 0, sizeof(*res));
2421 return bs->drv->bdrv_check(bs, res, fix);
2424 #define COMMIT_BUF_SECTORS 2048
2426 /* commit COW file into the raw image */
2427 int bdrv_commit(BlockDriverState *bs)
2429 BlockDriver *drv = bs->drv;
2430 int64_t sector, total_sectors, length, backing_length;
2431 int n, ro, open_flags;
2432 int ret = 0;
2433 uint8_t *buf = NULL;
2435 if (!drv)
2436 return -ENOMEDIUM;
2438 if (!bs->backing) {
2439 return -ENOTSUP;
2442 if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) ||
2443 bdrv_op_is_blocked(bs->backing->bs, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) {
2444 return -EBUSY;
2447 ro = bs->backing->bs->read_only;
2448 open_flags = bs->backing->bs->open_flags;
2450 if (ro) {
2451 if (bdrv_reopen(bs->backing->bs, open_flags | BDRV_O_RDWR, NULL)) {
2452 return -EACCES;
2456 length = bdrv_getlength(bs);
2457 if (length < 0) {
2458 ret = length;
2459 goto ro_cleanup;
2462 backing_length = bdrv_getlength(bs->backing->bs);
2463 if (backing_length < 0) {
2464 ret = backing_length;
2465 goto ro_cleanup;
2468 /* If our top snapshot is larger than the backing file image,
2469 * grow the backing file image if possible. If not possible,
2470 * we must return an error */
2471 if (length > backing_length) {
2472 ret = bdrv_truncate(bs->backing->bs, length);
2473 if (ret < 0) {
2474 goto ro_cleanup;
2478 total_sectors = length >> BDRV_SECTOR_BITS;
2480 /* qemu_try_blockalign() for bs will choose an alignment that works for
2481 * bs->backing->bs as well, so no need to compare the alignment manually. */
2482 buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
2483 if (buf == NULL) {
2484 ret = -ENOMEM;
2485 goto ro_cleanup;
2488 for (sector = 0; sector < total_sectors; sector += n) {
2489 ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
2490 if (ret < 0) {
2491 goto ro_cleanup;
2493 if (ret) {
2494 ret = bdrv_read(bs, sector, buf, n);
2495 if (ret < 0) {
2496 goto ro_cleanup;
2499 ret = bdrv_write(bs->backing->bs, sector, buf, n);
2500 if (ret < 0) {
2501 goto ro_cleanup;
2506 if (drv->bdrv_make_empty) {
2507 ret = drv->bdrv_make_empty(bs);
2508 if (ret < 0) {
2509 goto ro_cleanup;
2511 bdrv_flush(bs);
2515 * Make sure all data we wrote to the backing device is actually
2516 * stable on disk.
2518 if (bs->backing) {
2519 bdrv_flush(bs->backing->bs);
2522 ret = 0;
2523 ro_cleanup:
2524 qemu_vfree(buf);
2526 if (ro) {
2527 /* ignoring error return here */
2528 bdrv_reopen(bs->backing->bs, open_flags & ~BDRV_O_RDWR, NULL);
2531 return ret;
2534 int bdrv_commit_all(void)
2536 BlockDriverState *bs;
2538 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
2539 AioContext *aio_context = bdrv_get_aio_context(bs);
2541 aio_context_acquire(aio_context);
2542 if (bs->drv && bs->backing) {
2543 int ret = bdrv_commit(bs);
2544 if (ret < 0) {
2545 aio_context_release(aio_context);
2546 return ret;
2549 aio_context_release(aio_context);
2551 return 0;
2555 * Return values:
2556 * 0 - success
2557 * -EINVAL - backing format specified, but no file
2558 * -ENOSPC - can't update the backing file because no space is left in the
2559 * image file header
2560 * -ENOTSUP - format driver doesn't support changing the backing file
2562 int bdrv_change_backing_file(BlockDriverState *bs,
2563 const char *backing_file, const char *backing_fmt)
2565 BlockDriver *drv = bs->drv;
2566 int ret;
2568 /* Backing file format doesn't make sense without a backing file */
2569 if (backing_fmt && !backing_file) {
2570 return -EINVAL;
2573 if (drv->bdrv_change_backing_file != NULL) {
2574 ret = drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
2575 } else {
2576 ret = -ENOTSUP;
2579 if (ret == 0) {
2580 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
2581 pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
2583 return ret;
2587 * Finds the image layer in the chain that has 'bs' as its backing file.
2589 * active is the current topmost image.
2591 * Returns NULL if bs is not found in active's image chain,
2592 * or if active == bs.
2594 * Returns the bottommost base image if bs == NULL.
2596 BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
2597 BlockDriverState *bs)
2599 while (active && bs != backing_bs(active)) {
2600 active = backing_bs(active);
2603 return active;
2606 /* Given a BDS, searches for the base layer. */
2607 BlockDriverState *bdrv_find_base(BlockDriverState *bs)
2609 return bdrv_find_overlay(bs, NULL);
2613 * Drops images above 'base' up to and including 'top', and sets the image
2614 * above 'top' to have base as its backing file.
2616 * Requires that the overlay to 'top' is opened r/w, so that the backing file
2617 * information in 'bs' can be properly updated.
2619 * E.g., this will convert the following chain:
2620 * bottom <- base <- intermediate <- top <- active
2622 * to
2624 * bottom <- base <- active
2626 * It is allowed for bottom==base, in which case it converts:
2628 * base <- intermediate <- top <- active
2630 * to
2632 * base <- active
2634 * If backing_file_str is non-NULL, it will be used when modifying top's
2635 * overlay image metadata.
2637 * Error conditions:
2638 * if active == top, that is considered an error
2641 int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
2642 BlockDriverState *base, const char *backing_file_str)
2644 BlockDriverState *new_top_bs = NULL;
2645 int ret = -EIO;
2647 if (!top->drv || !base->drv) {
2648 goto exit;
2651 new_top_bs = bdrv_find_overlay(active, top);
2653 if (new_top_bs == NULL) {
2654 /* we could not find the image above 'top', this is an error */
2655 goto exit;
2658 /* special case of new_top_bs->backing->bs already pointing to base - nothing
2659 * to do, no intermediate images */
2660 if (backing_bs(new_top_bs) == base) {
2661 ret = 0;
2662 goto exit;
2665 /* Make sure that base is in the backing chain of top */
2666 if (!bdrv_chain_contains(top, base)) {
2667 goto exit;
2670 /* success - we can delete the intermediate states, and link top->base */
2671 backing_file_str = backing_file_str ? backing_file_str : base->filename;
2672 ret = bdrv_change_backing_file(new_top_bs, backing_file_str,
2673 base->drv ? base->drv->format_name : "");
2674 if (ret) {
2675 goto exit;
2677 bdrv_set_backing_hd(new_top_bs, base);
2679 ret = 0;
2680 exit:
2681 return ret;
2685 * Truncate file to 'offset' bytes (needed only for file protocols)
2687 int bdrv_truncate(BlockDriverState *bs, int64_t offset)
2689 BlockDriver *drv = bs->drv;
2690 int ret;
2691 if (!drv)
2692 return -ENOMEDIUM;
2693 if (!drv->bdrv_truncate)
2694 return -ENOTSUP;
2695 if (bs->read_only)
2696 return -EACCES;
2698 ret = drv->bdrv_truncate(bs, offset);
2699 if (ret == 0) {
2700 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
2701 bdrv_dirty_bitmap_truncate(bs);
2702 if (bs->blk) {
2703 blk_dev_resize_cb(bs->blk);
2706 return ret;
2710 * Length of a allocated file in bytes. Sparse files are counted by actual
2711 * allocated space. Return < 0 if error or unknown.
2713 int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
2715 BlockDriver *drv = bs->drv;
2716 if (!drv) {
2717 return -ENOMEDIUM;
2719 if (drv->bdrv_get_allocated_file_size) {
2720 return drv->bdrv_get_allocated_file_size(bs);
2722 if (bs->file) {
2723 return bdrv_get_allocated_file_size(bs->file->bs);
2725 return -ENOTSUP;
2729 * Return number of sectors on success, -errno on error.
2731 int64_t bdrv_nb_sectors(BlockDriverState *bs)
2733 BlockDriver *drv = bs->drv;
2735 if (!drv)
2736 return -ENOMEDIUM;
2738 if (drv->has_variable_length) {
2739 int ret = refresh_total_sectors(bs, bs->total_sectors);
2740 if (ret < 0) {
2741 return ret;
2744 return bs->total_sectors;
2748 * Return length in bytes on success, -errno on error.
2749 * The length is always a multiple of BDRV_SECTOR_SIZE.
2751 int64_t bdrv_getlength(BlockDriverState *bs)
2753 int64_t ret = bdrv_nb_sectors(bs);
2755 ret = ret > INT64_MAX / BDRV_SECTOR_SIZE ? -EFBIG : ret;
2756 return ret < 0 ? ret : ret * BDRV_SECTOR_SIZE;
2759 /* return 0 as number of sectors if no device present or error */
2760 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
2762 int64_t nb_sectors = bdrv_nb_sectors(bs);
2764 *nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
2767 int bdrv_is_read_only(BlockDriverState *bs)
2769 return bs->read_only;
2772 int bdrv_is_sg(BlockDriverState *bs)
2774 return bs->sg;
2777 int bdrv_enable_write_cache(BlockDriverState *bs)
2779 return bs->enable_write_cache;
2782 void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)
2784 bs->enable_write_cache = wce;
2786 /* so a reopen() will preserve wce */
2787 if (wce) {
2788 bs->open_flags |= BDRV_O_CACHE_WB;
2789 } else {
2790 bs->open_flags &= ~BDRV_O_CACHE_WB;
2794 int bdrv_is_encrypted(BlockDriverState *bs)
2796 if (bs->backing && bs->backing->bs->encrypted) {
2797 return 1;
2799 return bs->encrypted;
2802 int bdrv_key_required(BlockDriverState *bs)
2804 BdrvChild *backing = bs->backing;
2806 if (backing && backing->bs->encrypted && !backing->bs->valid_key) {
2807 return 1;
2809 return (bs->encrypted && !bs->valid_key);
2812 int bdrv_set_key(BlockDriverState *bs, const char *key)
2814 int ret;
2815 if (bs->backing && bs->backing->bs->encrypted) {
2816 ret = bdrv_set_key(bs->backing->bs, key);
2817 if (ret < 0)
2818 return ret;
2819 if (!bs->encrypted)
2820 return 0;
2822 if (!bs->encrypted) {
2823 return -EINVAL;
2824 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
2825 return -ENOMEDIUM;
2827 ret = bs->drv->bdrv_set_key(bs, key);
2828 if (ret < 0) {
2829 bs->valid_key = 0;
2830 } else if (!bs->valid_key) {
2831 bs->valid_key = 1;
2832 if (bs->blk) {
2833 /* call the change callback now, we skipped it on open */
2834 blk_dev_change_media_cb(bs->blk, true);
2837 return ret;
2841 * Provide an encryption key for @bs.
2842 * If @key is non-null:
2843 * If @bs is not encrypted, fail.
2844 * Else if the key is invalid, fail.
2845 * Else set @bs's key to @key, replacing the existing key, if any.
2846 * If @key is null:
2847 * If @bs is encrypted and still lacks a key, fail.
2848 * Else do nothing.
2849 * On failure, store an error object through @errp if non-null.
2851 void bdrv_add_key(BlockDriverState *bs, const char *key, Error **errp)
2853 if (key) {
2854 if (!bdrv_is_encrypted(bs)) {
2855 error_setg(errp, "Node '%s' is not encrypted",
2856 bdrv_get_device_or_node_name(bs));
2857 } else if (bdrv_set_key(bs, key) < 0) {
2858 error_setg(errp, QERR_INVALID_PASSWORD);
2860 } else {
2861 if (bdrv_key_required(bs)) {
2862 error_set(errp, ERROR_CLASS_DEVICE_ENCRYPTED,
2863 "'%s' (%s) is encrypted",
2864 bdrv_get_device_or_node_name(bs),
2865 bdrv_get_encrypted_filename(bs));
2870 const char *bdrv_get_format_name(BlockDriverState *bs)
2872 return bs->drv ? bs->drv->format_name : NULL;
2875 static int qsort_strcmp(const void *a, const void *b)
2877 return strcmp(a, b);
2880 void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
2881 void *opaque)
2883 BlockDriver *drv;
2884 int count = 0;
2885 int i;
2886 const char **formats = NULL;
2888 QLIST_FOREACH(drv, &bdrv_drivers, list) {
2889 if (drv->format_name) {
2890 bool found = false;
2891 int i = count;
2892 while (formats && i && !found) {
2893 found = !strcmp(formats[--i], drv->format_name);
2896 if (!found) {
2897 formats = g_renew(const char *, formats, count + 1);
2898 formats[count++] = drv->format_name;
2903 qsort(formats, count, sizeof(formats[0]), qsort_strcmp);
2905 for (i = 0; i < count; i++) {
2906 it(opaque, formats[i]);
2909 g_free(formats);
2912 /* This function is to find a node in the bs graph */
2913 BlockDriverState *bdrv_find_node(const char *node_name)
2915 BlockDriverState *bs;
2917 assert(node_name);
2919 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
2920 if (!strcmp(node_name, bs->node_name)) {
2921 return bs;
2924 return NULL;
2927 /* Put this QMP function here so it can access the static graph_bdrv_states. */
2928 BlockDeviceInfoList *bdrv_named_nodes_list(Error **errp)
2930 BlockDeviceInfoList *list, *entry;
2931 BlockDriverState *bs;
2933 list = NULL;
2934 QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
2935 BlockDeviceInfo *info = bdrv_block_device_info(bs, errp);
2936 if (!info) {
2937 qapi_free_BlockDeviceInfoList(list);
2938 return NULL;
2940 entry = g_malloc0(sizeof(*entry));
2941 entry->value = info;
2942 entry->next = list;
2943 list = entry;
2946 return list;
2949 BlockDriverState *bdrv_lookup_bs(const char *device,
2950 const char *node_name,
2951 Error **errp)
2953 BlockBackend *blk;
2954 BlockDriverState *bs;
2956 if (device) {
2957 blk = blk_by_name(device);
2959 if (blk) {
2960 bs = blk_bs(blk);
2961 if (!bs) {
2962 error_setg(errp, "Device '%s' has no medium", device);
2965 return bs;
2969 if (node_name) {
2970 bs = bdrv_find_node(node_name);
2972 if (bs) {
2973 return bs;
2977 error_setg(errp, "Cannot find device=%s nor node_name=%s",
2978 device ? device : "",
2979 node_name ? node_name : "");
2980 return NULL;
2983 /* If 'base' is in the same chain as 'top', return true. Otherwise,
2984 * return false. If either argument is NULL, return false. */
2985 bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
2987 while (top && top != base) {
2988 top = backing_bs(top);
2991 return top != NULL;
2994 BlockDriverState *bdrv_next_node(BlockDriverState *bs)
2996 if (!bs) {
2997 return QTAILQ_FIRST(&graph_bdrv_states);
2999 return QTAILQ_NEXT(bs, node_list);
3002 BlockDriverState *bdrv_next(BlockDriverState *bs)
3004 if (!bs) {
3005 return QTAILQ_FIRST(&bdrv_states);
3007 return QTAILQ_NEXT(bs, device_list);
3010 const char *bdrv_get_node_name(const BlockDriverState *bs)
3012 return bs->node_name;
3015 /* TODO check what callers really want: bs->node_name or blk_name() */
3016 const char *bdrv_get_device_name(const BlockDriverState *bs)
3018 return bs->blk ? blk_name(bs->blk) : "";
3021 /* This can be used to identify nodes that might not have a device
3022 * name associated. Since node and device names live in the same
3023 * namespace, the result is unambiguous. The exception is if both are
3024 * absent, then this returns an empty (non-null) string. */
3025 const char *bdrv_get_device_or_node_name(const BlockDriverState *bs)
3027 return bs->blk ? blk_name(bs->blk) : bs->node_name;
3030 int bdrv_get_flags(BlockDriverState *bs)
3032 return bs->open_flags;
3035 int bdrv_has_zero_init_1(BlockDriverState *bs)
3037 return 1;
3040 int bdrv_has_zero_init(BlockDriverState *bs)
3042 assert(bs->drv);
3044 /* If BS is a copy on write image, it is initialized to
3045 the contents of the base image, which may not be zeroes. */
3046 if (bs->backing) {
3047 return 0;
3049 if (bs->drv->bdrv_has_zero_init) {
3050 return bs->drv->bdrv_has_zero_init(bs);
3053 /* safe default */
3054 return 0;
3057 bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs)
3059 BlockDriverInfo bdi;
3061 if (bs->backing) {
3062 return false;
3065 if (bdrv_get_info(bs, &bdi) == 0) {
3066 return bdi.unallocated_blocks_are_zero;
3069 return false;
3072 bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs)
3074 BlockDriverInfo bdi;
3076 if (bs->backing || !(bs->open_flags & BDRV_O_UNMAP)) {
3077 return false;
3080 if (bdrv_get_info(bs, &bdi) == 0) {
3081 return bdi.can_write_zeroes_with_unmap;
3084 return false;
3087 const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
3089 if (bs->backing && bs->backing->bs->encrypted)
3090 return bs->backing_file;
3091 else if (bs->encrypted)
3092 return bs->filename;
3093 else
3094 return NULL;
3097 void bdrv_get_backing_filename(BlockDriverState *bs,
3098 char *filename, int filename_size)
3100 pstrcpy(filename, filename_size, bs->backing_file);
3103 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
3105 BlockDriver *drv = bs->drv;
3106 if (!drv)
3107 return -ENOMEDIUM;
3108 if (!drv->bdrv_get_info)
3109 return -ENOTSUP;
3110 memset(bdi, 0, sizeof(*bdi));
3111 return drv->bdrv_get_info(bs, bdi);
3114 ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs)
3116 BlockDriver *drv = bs->drv;
3117 if (drv && drv->bdrv_get_specific_info) {
3118 return drv->bdrv_get_specific_info(bs);
3120 return NULL;
3123 void bdrv_debug_event(BlockDriverState *bs, BlkdebugEvent event)
3125 if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
3126 return;
3129 bs->drv->bdrv_debug_event(bs, event);
3132 int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
3133 const char *tag)
3135 while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
3136 bs = bs->file ? bs->file->bs : NULL;
3139 if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
3140 return bs->drv->bdrv_debug_breakpoint(bs, event, tag);
3143 return -ENOTSUP;
3146 int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
3148 while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) {
3149 bs = bs->file ? bs->file->bs : NULL;
3152 if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) {
3153 return bs->drv->bdrv_debug_remove_breakpoint(bs, tag);
3156 return -ENOTSUP;
3159 int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
3161 while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) {
3162 bs = bs->file ? bs->file->bs : NULL;
3165 if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
3166 return bs->drv->bdrv_debug_resume(bs, tag);
3169 return -ENOTSUP;
3172 bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
3174 while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
3175 bs = bs->file ? bs->file->bs : NULL;
3178 if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
3179 return bs->drv->bdrv_debug_is_suspended(bs, tag);
3182 return false;
3185 int bdrv_is_snapshot(BlockDriverState *bs)
3187 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
3190 /* backing_file can either be relative, or absolute, or a protocol. If it is
3191 * relative, it must be relative to the chain. So, passing in bs->filename
3192 * from a BDS as backing_file should not be done, as that may be relative to
3193 * the CWD rather than the chain. */
3194 BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
3195 const char *backing_file)
3197 char *filename_full = NULL;
3198 char *backing_file_full = NULL;
3199 char *filename_tmp = NULL;
3200 int is_protocol = 0;
3201 BlockDriverState *curr_bs = NULL;
3202 BlockDriverState *retval = NULL;
3204 if (!bs || !bs->drv || !backing_file) {
3205 return NULL;
3208 filename_full = g_malloc(PATH_MAX);
3209 backing_file_full = g_malloc(PATH_MAX);
3210 filename_tmp = g_malloc(PATH_MAX);
3212 is_protocol = path_has_protocol(backing_file);
3214 for (curr_bs = bs; curr_bs->backing; curr_bs = curr_bs->backing->bs) {
3216 /* If either of the filename paths is actually a protocol, then
3217 * compare unmodified paths; otherwise make paths relative */
3218 if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
3219 if (strcmp(backing_file, curr_bs->backing_file) == 0) {
3220 retval = curr_bs->backing->bs;
3221 break;
3223 } else {
3224 /* If not an absolute filename path, make it relative to the current
3225 * image's filename path */
3226 path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
3227 backing_file);
3229 /* We are going to compare absolute pathnames */
3230 if (!realpath(filename_tmp, filename_full)) {
3231 continue;
3234 /* We need to make sure the backing filename we are comparing against
3235 * is relative to the current image filename (or absolute) */
3236 path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
3237 curr_bs->backing_file);
3239 if (!realpath(filename_tmp, backing_file_full)) {
3240 continue;
3243 if (strcmp(backing_file_full, filename_full) == 0) {
3244 retval = curr_bs->backing->bs;
3245 break;
3250 g_free(filename_full);
3251 g_free(backing_file_full);
3252 g_free(filename_tmp);
3253 return retval;
3256 int bdrv_get_backing_file_depth(BlockDriverState *bs)
3258 if (!bs->drv) {
3259 return 0;
3262 if (!bs->backing) {
3263 return 0;
3266 return 1 + bdrv_get_backing_file_depth(bs->backing->bs);
3269 void bdrv_init(void)
3271 module_call_init(MODULE_INIT_BLOCK);
3274 void bdrv_init_with_whitelist(void)
3276 use_bdrv_whitelist = 1;
3277 bdrv_init();
3280 void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
3282 Error *local_err = NULL;
3283 int ret;
3285 if (!bs->drv) {
3286 return;
3289 if (!(bs->open_flags & BDRV_O_INACTIVE)) {
3290 return;
3292 bs->open_flags &= ~BDRV_O_INACTIVE;
3294 if (bs->drv->bdrv_invalidate_cache) {
3295 bs->drv->bdrv_invalidate_cache(bs, &local_err);
3296 } else if (bs->file) {
3297 bdrv_invalidate_cache(bs->file->bs, &local_err);
3299 if (local_err) {
3300 bs->open_flags |= BDRV_O_INACTIVE;
3301 error_propagate(errp, local_err);
3302 return;
3305 ret = refresh_total_sectors(bs, bs->total_sectors);
3306 if (ret < 0) {
3307 bs->open_flags |= BDRV_O_INACTIVE;
3308 error_setg_errno(errp, -ret, "Could not refresh total sector count");
3309 return;
3313 void bdrv_invalidate_cache_all(Error **errp)
3315 BlockDriverState *bs;
3316 Error *local_err = NULL;
3318 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
3319 AioContext *aio_context = bdrv_get_aio_context(bs);
3321 aio_context_acquire(aio_context);
3322 bdrv_invalidate_cache(bs, &local_err);
3323 aio_context_release(aio_context);
3324 if (local_err) {
3325 error_propagate(errp, local_err);
3326 return;
3331 static int bdrv_inactivate(BlockDriverState *bs)
3333 int ret;
3335 if (bs->drv->bdrv_inactivate) {
3336 ret = bs->drv->bdrv_inactivate(bs);
3337 if (ret < 0) {
3338 return ret;
3342 bs->open_flags |= BDRV_O_INACTIVE;
3343 return 0;
3346 int bdrv_inactivate_all(void)
3348 BlockDriverState *bs;
3349 int ret;
3351 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
3352 AioContext *aio_context = bdrv_get_aio_context(bs);
3354 aio_context_acquire(aio_context);
3355 ret = bdrv_inactivate(bs);
3356 aio_context_release(aio_context);
3357 if (ret < 0) {
3358 return ret;
3362 return 0;
3365 /**************************************************************/
3366 /* removable device support */
3369 * Return TRUE if the media is present
3371 bool bdrv_is_inserted(BlockDriverState *bs)
3373 BlockDriver *drv = bs->drv;
3374 BdrvChild *child;
3376 if (!drv) {
3377 return false;
3379 if (drv->bdrv_is_inserted) {
3380 return drv->bdrv_is_inserted(bs);
3382 QLIST_FOREACH(child, &bs->children, next) {
3383 if (!bdrv_is_inserted(child->bs)) {
3384 return false;
3387 return true;
3391 * Return whether the media changed since the last call to this
3392 * function, or -ENOTSUP if we don't know. Most drivers don't know.
3394 int bdrv_media_changed(BlockDriverState *bs)
3396 BlockDriver *drv = bs->drv;
3398 if (drv && drv->bdrv_media_changed) {
3399 return drv->bdrv_media_changed(bs);
3401 return -ENOTSUP;
3405 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3407 void bdrv_eject(BlockDriverState *bs, bool eject_flag)
3409 BlockDriver *drv = bs->drv;
3410 const char *device_name;
3412 if (drv && drv->bdrv_eject) {
3413 drv->bdrv_eject(bs, eject_flag);
3416 device_name = bdrv_get_device_name(bs);
3417 if (device_name[0] != '\0') {
3418 qapi_event_send_device_tray_moved(device_name,
3419 eject_flag, &error_abort);
3424 * Lock or unlock the media (if it is locked, the user won't be able
3425 * to eject it manually).
3427 void bdrv_lock_medium(BlockDriverState *bs, bool locked)
3429 BlockDriver *drv = bs->drv;
3431 trace_bdrv_lock_medium(bs, locked);
3433 if (drv && drv->bdrv_lock_medium) {
3434 drv->bdrv_lock_medium(bs, locked);
3438 BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name)
3440 BdrvDirtyBitmap *bm;
3442 assert(name);
3443 QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
3444 if (bm->name && !strcmp(name, bm->name)) {
3445 return bm;
3448 return NULL;
3451 void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap)
3453 assert(!bdrv_dirty_bitmap_frozen(bitmap));
3454 g_free(bitmap->name);
3455 bitmap->name = NULL;
3458 BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
3459 uint32_t granularity,
3460 const char *name,
3461 Error **errp)
3463 int64_t bitmap_size;
3464 BdrvDirtyBitmap *bitmap;
3465 uint32_t sector_granularity;
3467 assert((granularity & (granularity - 1)) == 0);
3469 if (name && bdrv_find_dirty_bitmap(bs, name)) {
3470 error_setg(errp, "Bitmap already exists: %s", name);
3471 return NULL;
3473 sector_granularity = granularity >> BDRV_SECTOR_BITS;
3474 assert(sector_granularity);
3475 bitmap_size = bdrv_nb_sectors(bs);
3476 if (bitmap_size < 0) {
3477 error_setg_errno(errp, -bitmap_size, "could not get length of device");
3478 errno = -bitmap_size;
3479 return NULL;
3481 bitmap = g_new0(BdrvDirtyBitmap, 1);
3482 bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(sector_granularity));
3483 bitmap->size = bitmap_size;
3484 bitmap->name = g_strdup(name);
3485 bitmap->disabled = false;
3486 QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
3487 return bitmap;
3490 bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap)
3492 return bitmap->successor;
3495 bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap)
3497 return !(bitmap->disabled || bitmap->successor);
3500 DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap)
3502 if (bdrv_dirty_bitmap_frozen(bitmap)) {
3503 return DIRTY_BITMAP_STATUS_FROZEN;
3504 } else if (!bdrv_dirty_bitmap_enabled(bitmap)) {
3505 return DIRTY_BITMAP_STATUS_DISABLED;
3506 } else {
3507 return DIRTY_BITMAP_STATUS_ACTIVE;
3512 * Create a successor bitmap destined to replace this bitmap after an operation.
3513 * Requires that the bitmap is not frozen and has no successor.
3515 int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs,
3516 BdrvDirtyBitmap *bitmap, Error **errp)
3518 uint64_t granularity;
3519 BdrvDirtyBitmap *child;
3521 if (bdrv_dirty_bitmap_frozen(bitmap)) {
3522 error_setg(errp, "Cannot create a successor for a bitmap that is "
3523 "currently frozen");
3524 return -1;
3526 assert(!bitmap->successor);
3528 /* Create an anonymous successor */
3529 granularity = bdrv_dirty_bitmap_granularity(bitmap);
3530 child = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp);
3531 if (!child) {
3532 return -1;
3535 /* Successor will be on or off based on our current state. */
3536 child->disabled = bitmap->disabled;
3538 /* Install the successor and freeze the parent */
3539 bitmap->successor = child;
3540 return 0;
3544 * For a bitmap with a successor, yield our name to the successor,
3545 * delete the old bitmap, and return a handle to the new bitmap.
3547 BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
3548 BdrvDirtyBitmap *bitmap,
3549 Error **errp)
3551 char *name;
3552 BdrvDirtyBitmap *successor = bitmap->successor;
3554 if (successor == NULL) {
3555 error_setg(errp, "Cannot relinquish control if "
3556 "there's no successor present");
3557 return NULL;
3560 name = bitmap->name;
3561 bitmap->name = NULL;
3562 successor->name = name;
3563 bitmap->successor = NULL;
3564 bdrv_release_dirty_bitmap(bs, bitmap);
3566 return successor;
3570 * In cases of failure where we can no longer safely delete the parent,
3571 * we may wish to re-join the parent and child/successor.
3572 * The merged parent will be un-frozen, but not explicitly re-enabled.
3574 BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
3575 BdrvDirtyBitmap *parent,
3576 Error **errp)
3578 BdrvDirtyBitmap *successor = parent->successor;
3580 if (!successor) {
3581 error_setg(errp, "Cannot reclaim a successor when none is present");
3582 return NULL;
3585 if (!hbitmap_merge(parent->bitmap, successor->bitmap)) {
3586 error_setg(errp, "Merging of parent and successor bitmap failed");
3587 return NULL;
3589 bdrv_release_dirty_bitmap(bs, successor);
3590 parent->successor = NULL;
3592 return parent;
3596 * Truncates _all_ bitmaps attached to a BDS.
3598 static void bdrv_dirty_bitmap_truncate(BlockDriverState *bs)
3600 BdrvDirtyBitmap *bitmap;
3601 uint64_t size = bdrv_nb_sectors(bs);
3603 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
3604 assert(!bdrv_dirty_bitmap_frozen(bitmap));
3605 hbitmap_truncate(bitmap->bitmap, size);
3606 bitmap->size = size;
3610 static void bdrv_do_release_matching_dirty_bitmap(BlockDriverState *bs,
3611 BdrvDirtyBitmap *bitmap,
3612 bool only_named)
3614 BdrvDirtyBitmap *bm, *next;
3615 QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
3616 if ((!bitmap || bm == bitmap) && (!only_named || bm->name)) {
3617 assert(!bdrv_dirty_bitmap_frozen(bm));
3618 QLIST_REMOVE(bm, list);
3619 hbitmap_free(bm->bitmap);
3620 g_free(bm->name);
3621 g_free(bm);
3623 if (bitmap) {
3624 return;
3630 void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
3632 bdrv_do_release_matching_dirty_bitmap(bs, bitmap, false);
3636 * Release all named dirty bitmaps attached to a BDS (for use in bdrv_close()).
3637 * There must not be any frozen bitmaps attached.
3639 static void bdrv_release_named_dirty_bitmaps(BlockDriverState *bs)
3641 bdrv_do_release_matching_dirty_bitmap(bs, NULL, true);
3644 void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
3646 assert(!bdrv_dirty_bitmap_frozen(bitmap));
3647 bitmap->disabled = true;
3650 void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
3652 assert(!bdrv_dirty_bitmap_frozen(bitmap));
3653 bitmap->disabled = false;
3656 BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
3658 BdrvDirtyBitmap *bm;
3659 BlockDirtyInfoList *list = NULL;
3660 BlockDirtyInfoList **plist = &list;
3662 QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
3663 BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1);
3664 BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1);
3665 info->count = bdrv_get_dirty_count(bm);
3666 info->granularity = bdrv_dirty_bitmap_granularity(bm);
3667 info->has_name = !!bm->name;
3668 info->name = g_strdup(bm->name);
3669 info->status = bdrv_dirty_bitmap_status(bm);
3670 entry->value = info;
3671 *plist = entry;
3672 plist = &entry->next;
3675 return list;
3678 int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector)
3680 if (bitmap) {
3681 return hbitmap_get(bitmap->bitmap, sector);
3682 } else {
3683 return 0;
3688 * Chooses a default granularity based on the existing cluster size,
3689 * but clamped between [4K, 64K]. Defaults to 64K in the case that there
3690 * is no cluster size information available.
3692 uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs)
3694 BlockDriverInfo bdi;
3695 uint32_t granularity;
3697 if (bdrv_get_info(bs, &bdi) >= 0 && bdi.cluster_size > 0) {
3698 granularity = MAX(4096, bdi.cluster_size);
3699 granularity = MIN(65536, granularity);
3700 } else {
3701 granularity = 65536;
3704 return granularity;
3707 uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap)
3709 return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap);
3712 void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
3714 hbitmap_iter_init(hbi, bitmap->bitmap, 0);
3717 void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
3718 int64_t cur_sector, int nr_sectors)
3720 assert(bdrv_dirty_bitmap_enabled(bitmap));
3721 hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
3724 void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
3725 int64_t cur_sector, int nr_sectors)
3727 assert(bdrv_dirty_bitmap_enabled(bitmap));
3728 hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
3731 void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out)
3733 assert(bdrv_dirty_bitmap_enabled(bitmap));
3734 if (!out) {
3735 hbitmap_reset_all(bitmap->bitmap);
3736 } else {
3737 HBitmap *backup = bitmap->bitmap;
3738 bitmap->bitmap = hbitmap_alloc(bitmap->size,
3739 hbitmap_granularity(backup));
3740 *out = backup;
3744 void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in)
3746 HBitmap *tmp = bitmap->bitmap;
3747 assert(bdrv_dirty_bitmap_enabled(bitmap));
3748 bitmap->bitmap = in;
3749 hbitmap_free(tmp);
3752 void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
3753 int nr_sectors)
3755 BdrvDirtyBitmap *bitmap;
3756 QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
3757 if (!bdrv_dirty_bitmap_enabled(bitmap)) {
3758 continue;
3760 hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
3765 * Advance an HBitmapIter to an arbitrary offset.
3767 void bdrv_set_dirty_iter(HBitmapIter *hbi, int64_t offset)
3769 assert(hbi->hb);
3770 hbitmap_iter_init(hbi, hbi->hb, offset);
3773 int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap)
3775 return hbitmap_count(bitmap->bitmap);
3778 /* Get a reference to bs */
3779 void bdrv_ref(BlockDriverState *bs)
3781 bs->refcnt++;
3784 /* Release a previously grabbed reference to bs.
3785 * If after releasing, reference count is zero, the BlockDriverState is
3786 * deleted. */
3787 void bdrv_unref(BlockDriverState *bs)
3789 if (!bs) {
3790 return;
3792 assert(bs->refcnt > 0);
3793 if (--bs->refcnt == 0) {
3794 bdrv_delete(bs);
3798 struct BdrvOpBlocker {
3799 Error *reason;
3800 QLIST_ENTRY(BdrvOpBlocker) list;
3803 bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp)
3805 BdrvOpBlocker *blocker;
3806 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3807 if (!QLIST_EMPTY(&bs->op_blockers[op])) {
3808 blocker = QLIST_FIRST(&bs->op_blockers[op]);
3809 if (errp) {
3810 *errp = error_copy(blocker->reason);
3811 error_prepend(errp, "Node '%s' is busy: ",
3812 bdrv_get_device_or_node_name(bs));
3814 return true;
3816 return false;
3819 void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason)
3821 BdrvOpBlocker *blocker;
3822 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3824 blocker = g_new0(BdrvOpBlocker, 1);
3825 blocker->reason = reason;
3826 QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list);
3829 void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason)
3831 BdrvOpBlocker *blocker, *next;
3832 assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX);
3833 QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) {
3834 if (blocker->reason == reason) {
3835 QLIST_REMOVE(blocker, list);
3836 g_free(blocker);
3841 void bdrv_op_block_all(BlockDriverState *bs, Error *reason)
3843 int i;
3844 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3845 bdrv_op_block(bs, i, reason);
3849 void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason)
3851 int i;
3852 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3853 bdrv_op_unblock(bs, i, reason);
3857 bool bdrv_op_blocker_is_empty(BlockDriverState *bs)
3859 int i;
3861 for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
3862 if (!QLIST_EMPTY(&bs->op_blockers[i])) {
3863 return false;
3866 return true;
3869 void bdrv_img_create(const char *filename, const char *fmt,
3870 const char *base_filename, const char *base_fmt,
3871 char *options, uint64_t img_size, int flags,
3872 Error **errp, bool quiet)
3874 QemuOptsList *create_opts = NULL;
3875 QemuOpts *opts = NULL;
3876 const char *backing_fmt, *backing_file;
3877 int64_t size;
3878 BlockDriver *drv, *proto_drv;
3879 Error *local_err = NULL;
3880 int ret = 0;
3882 /* Find driver and parse its options */
3883 drv = bdrv_find_format(fmt);
3884 if (!drv) {
3885 error_setg(errp, "Unknown file format '%s'", fmt);
3886 return;
3889 proto_drv = bdrv_find_protocol(filename, true, errp);
3890 if (!proto_drv) {
3891 return;
3894 if (!drv->create_opts) {
3895 error_setg(errp, "Format driver '%s' does not support image creation",
3896 drv->format_name);
3897 return;
3900 if (!proto_drv->create_opts) {
3901 error_setg(errp, "Protocol driver '%s' does not support image creation",
3902 proto_drv->format_name);
3903 return;
3906 create_opts = qemu_opts_append(create_opts, drv->create_opts);
3907 create_opts = qemu_opts_append(create_opts, proto_drv->create_opts);
3909 /* Create parameter list with default values */
3910 opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
3911 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort);
3913 /* Parse -o options */
3914 if (options) {
3915 qemu_opts_do_parse(opts, options, NULL, &local_err);
3916 if (local_err) {
3917 error_report_err(local_err);
3918 local_err = NULL;
3919 error_setg(errp, "Invalid options for file format '%s'", fmt);
3920 goto out;
3924 if (base_filename) {
3925 qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, &local_err);
3926 if (local_err) {
3927 error_setg(errp, "Backing file not supported for file format '%s'",
3928 fmt);
3929 goto out;
3933 if (base_fmt) {
3934 qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, &local_err);
3935 if (local_err) {
3936 error_setg(errp, "Backing file format not supported for file "
3937 "format '%s'", fmt);
3938 goto out;
3942 backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
3943 if (backing_file) {
3944 if (!strcmp(filename, backing_file)) {
3945 error_setg(errp, "Error: Trying to create an image with the "
3946 "same filename as the backing file");
3947 goto out;
3951 backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
3953 // The size for the image must always be specified, with one exception:
3954 // If we are using a backing file, we can obtain the size from there
3955 size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
3956 if (size == -1) {
3957 if (backing_file) {
3958 BlockDriverState *bs;
3959 char *full_backing = g_new0(char, PATH_MAX);
3960 int64_t size;
3961 int back_flags;
3962 QDict *backing_options = NULL;
3964 bdrv_get_full_backing_filename_from_filename(filename, backing_file,
3965 full_backing, PATH_MAX,
3966 &local_err);
3967 if (local_err) {
3968 g_free(full_backing);
3969 goto out;
3972 /* backing files always opened read-only */
3973 back_flags =
3974 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
3976 if (backing_fmt) {
3977 backing_options = qdict_new();
3978 qdict_put(backing_options, "driver",
3979 qstring_from_str(backing_fmt));
3982 bs = NULL;
3983 ret = bdrv_open(&bs, full_backing, NULL, backing_options,
3984 back_flags, &local_err);
3985 g_free(full_backing);
3986 if (ret < 0) {
3987 goto out;
3989 size = bdrv_getlength(bs);
3990 if (size < 0) {
3991 error_setg_errno(errp, -size, "Could not get size of '%s'",
3992 backing_file);
3993 bdrv_unref(bs);
3994 goto out;
3997 qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort);
3999 bdrv_unref(bs);
4000 } else {
4001 error_setg(errp, "Image creation needs a size parameter");
4002 goto out;
4006 if (!quiet) {
4007 printf("Formatting '%s', fmt=%s ", filename, fmt);
4008 qemu_opts_print(opts, " ");
4009 puts("");
4012 ret = bdrv_create(drv, filename, opts, &local_err);
4014 if (ret == -EFBIG) {
4015 /* This is generally a better message than whatever the driver would
4016 * deliver (especially because of the cluster_size_hint), since that
4017 * is most probably not much different from "image too large". */
4018 const char *cluster_size_hint = "";
4019 if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) {
4020 cluster_size_hint = " (try using a larger cluster size)";
4022 error_setg(errp, "The image size is too large for file format '%s'"
4023 "%s", fmt, cluster_size_hint);
4024 error_free(local_err);
4025 local_err = NULL;
4028 out:
4029 qemu_opts_del(opts);
4030 qemu_opts_free(create_opts);
4031 if (local_err) {
4032 error_propagate(errp, local_err);
4036 AioContext *bdrv_get_aio_context(BlockDriverState *bs)
4038 return bs->aio_context;
4041 void bdrv_detach_aio_context(BlockDriverState *bs)
4043 BdrvAioNotifier *baf;
4045 if (!bs->drv) {
4046 return;
4049 QLIST_FOREACH(baf, &bs->aio_notifiers, list) {
4050 baf->detach_aio_context(baf->opaque);
4053 if (bs->throttle_state) {
4054 throttle_timers_detach_aio_context(&bs->throttle_timers);
4056 if (bs->drv->bdrv_detach_aio_context) {
4057 bs->drv->bdrv_detach_aio_context(bs);
4059 if (bs->file) {
4060 bdrv_detach_aio_context(bs->file->bs);
4062 if (bs->backing) {
4063 bdrv_detach_aio_context(bs->backing->bs);
4066 bs->aio_context = NULL;
4069 void bdrv_attach_aio_context(BlockDriverState *bs,
4070 AioContext *new_context)
4072 BdrvAioNotifier *ban;
4074 if (!bs->drv) {
4075 return;
4078 bs->aio_context = new_context;
4080 if (bs->backing) {
4081 bdrv_attach_aio_context(bs->backing->bs, new_context);
4083 if (bs->file) {
4084 bdrv_attach_aio_context(bs->file->bs, new_context);
4086 if (bs->drv->bdrv_attach_aio_context) {
4087 bs->drv->bdrv_attach_aio_context(bs, new_context);
4089 if (bs->throttle_state) {
4090 throttle_timers_attach_aio_context(&bs->throttle_timers, new_context);
4093 QLIST_FOREACH(ban, &bs->aio_notifiers, list) {
4094 ban->attached_aio_context(new_context, ban->opaque);
4098 void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
4100 bdrv_drain(bs); /* ensure there are no in-flight requests */
4102 bdrv_detach_aio_context(bs);
4104 /* This function executes in the old AioContext so acquire the new one in
4105 * case it runs in a different thread.
4107 aio_context_acquire(new_context);
4108 bdrv_attach_aio_context(bs, new_context);
4109 aio_context_release(new_context);
4112 void bdrv_add_aio_context_notifier(BlockDriverState *bs,
4113 void (*attached_aio_context)(AioContext *new_context, void *opaque),
4114 void (*detach_aio_context)(void *opaque), void *opaque)
4116 BdrvAioNotifier *ban = g_new(BdrvAioNotifier, 1);
4117 *ban = (BdrvAioNotifier){
4118 .attached_aio_context = attached_aio_context,
4119 .detach_aio_context = detach_aio_context,
4120 .opaque = opaque
4123 QLIST_INSERT_HEAD(&bs->aio_notifiers, ban, list);
4126 void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
4127 void (*attached_aio_context)(AioContext *,
4128 void *),
4129 void (*detach_aio_context)(void *),
4130 void *opaque)
4132 BdrvAioNotifier *ban, *ban_next;
4134 QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
4135 if (ban->attached_aio_context == attached_aio_context &&
4136 ban->detach_aio_context == detach_aio_context &&
4137 ban->opaque == opaque)
4139 QLIST_REMOVE(ban, list);
4140 g_free(ban);
4142 return;
4146 abort();
4149 int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts,
4150 BlockDriverAmendStatusCB *status_cb, void *cb_opaque)
4152 if (!bs->drv->bdrv_amend_options) {
4153 return -ENOTSUP;
4155 return bs->drv->bdrv_amend_options(bs, opts, status_cb, cb_opaque);
4158 /* This function will be called by the bdrv_recurse_is_first_non_filter method
4159 * of block filter and by bdrv_is_first_non_filter.
4160 * It is used to test if the given bs is the candidate or recurse more in the
4161 * node graph.
4163 bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
4164 BlockDriverState *candidate)
4166 /* return false if basic checks fails */
4167 if (!bs || !bs->drv) {
4168 return false;
4171 /* the code reached a non block filter driver -> check if the bs is
4172 * the same as the candidate. It's the recursion termination condition.
4174 if (!bs->drv->is_filter) {
4175 return bs == candidate;
4177 /* Down this path the driver is a block filter driver */
4179 /* If the block filter recursion method is defined use it to recurse down
4180 * the node graph.
4182 if (bs->drv->bdrv_recurse_is_first_non_filter) {
4183 return bs->drv->bdrv_recurse_is_first_non_filter(bs, candidate);
4186 /* the driver is a block filter but don't allow to recurse -> return false
4188 return false;
4191 /* This function checks if the candidate is the first non filter bs down it's
4192 * bs chain. Since we don't have pointers to parents it explore all bs chains
4193 * from the top. Some filters can choose not to pass down the recursion.
4195 bool bdrv_is_first_non_filter(BlockDriverState *candidate)
4197 BlockDriverState *bs;
4199 /* walk down the bs forest recursively */
4200 QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
4201 bool perm;
4203 /* try to recurse in this top level bs */
4204 perm = bdrv_recurse_is_first_non_filter(bs, candidate);
4206 /* candidate is the first non filter */
4207 if (perm) {
4208 return true;
4212 return false;
4215 BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs,
4216 const char *node_name, Error **errp)
4218 BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
4219 AioContext *aio_context;
4221 if (!to_replace_bs) {
4222 error_setg(errp, "Node name '%s' not found", node_name);
4223 return NULL;
4226 aio_context = bdrv_get_aio_context(to_replace_bs);
4227 aio_context_acquire(aio_context);
4229 if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) {
4230 to_replace_bs = NULL;
4231 goto out;
4234 /* We don't want arbitrary node of the BDS chain to be replaced only the top
4235 * most non filter in order to prevent data corruption.
4236 * Another benefit is that this tests exclude backing files which are
4237 * blocked by the backing blockers.
4239 if (!bdrv_recurse_is_first_non_filter(parent_bs, to_replace_bs)) {
4240 error_setg(errp, "Only top most non filter can be replaced");
4241 to_replace_bs = NULL;
4242 goto out;
4245 out:
4246 aio_context_release(aio_context);
4247 return to_replace_bs;
4250 static bool append_open_options(QDict *d, BlockDriverState *bs)
4252 const QDictEntry *entry;
4253 QemuOptDesc *desc;
4254 BdrvChild *child;
4255 bool found_any = false;
4256 const char *p;
4258 for (entry = qdict_first(bs->options); entry;
4259 entry = qdict_next(bs->options, entry))
4261 /* Exclude options for children */
4262 QLIST_FOREACH(child, &bs->children, next) {
4263 if (strstart(qdict_entry_key(entry), child->name, &p)
4264 && (!*p || *p == '.'))
4266 break;
4269 if (child) {
4270 continue;
4273 /* And exclude all non-driver-specific options */
4274 for (desc = bdrv_runtime_opts.desc; desc->name; desc++) {
4275 if (!strcmp(qdict_entry_key(entry), desc->name)) {
4276 break;
4279 if (desc->name) {
4280 continue;
4283 qobject_incref(qdict_entry_value(entry));
4284 qdict_put_obj(d, qdict_entry_key(entry), qdict_entry_value(entry));
4285 found_any = true;
4288 return found_any;
4291 /* Updates the following BDS fields:
4292 * - exact_filename: A filename which may be used for opening a block device
4293 * which (mostly) equals the given BDS (even without any
4294 * other options; so reading and writing must return the same
4295 * results, but caching etc. may be different)
4296 * - full_open_options: Options which, when given when opening a block device
4297 * (without a filename), result in a BDS (mostly)
4298 * equalling the given one
4299 * - filename: If exact_filename is set, it is copied here. Otherwise,
4300 * full_open_options is converted to a JSON object, prefixed with
4301 * "json:" (for use through the JSON pseudo protocol) and put here.
4303 void bdrv_refresh_filename(BlockDriverState *bs)
4305 BlockDriver *drv = bs->drv;
4306 QDict *opts;
4308 if (!drv) {
4309 return;
4312 /* This BDS's file name will most probably depend on its file's name, so
4313 * refresh that first */
4314 if (bs->file) {
4315 bdrv_refresh_filename(bs->file->bs);
4318 if (drv->bdrv_refresh_filename) {
4319 /* Obsolete information is of no use here, so drop the old file name
4320 * information before refreshing it */
4321 bs->exact_filename[0] = '\0';
4322 if (bs->full_open_options) {
4323 QDECREF(bs->full_open_options);
4324 bs->full_open_options = NULL;
4327 opts = qdict_new();
4328 append_open_options(opts, bs);
4329 drv->bdrv_refresh_filename(bs, opts);
4330 QDECREF(opts);
4331 } else if (bs->file) {
4332 /* Try to reconstruct valid information from the underlying file */
4333 bool has_open_options;
4335 bs->exact_filename[0] = '\0';
4336 if (bs->full_open_options) {
4337 QDECREF(bs->full_open_options);
4338 bs->full_open_options = NULL;
4341 opts = qdict_new();
4342 has_open_options = append_open_options(opts, bs);
4344 /* If no specific options have been given for this BDS, the filename of
4345 * the underlying file should suffice for this one as well */
4346 if (bs->file->bs->exact_filename[0] && !has_open_options) {
4347 strcpy(bs->exact_filename, bs->file->bs->exact_filename);
4349 /* Reconstructing the full options QDict is simple for most format block
4350 * drivers, as long as the full options are known for the underlying
4351 * file BDS. The full options QDict of that file BDS should somehow
4352 * contain a representation of the filename, therefore the following
4353 * suffices without querying the (exact_)filename of this BDS. */
4354 if (bs->file->bs->full_open_options) {
4355 qdict_put_obj(opts, "driver",
4356 QOBJECT(qstring_from_str(drv->format_name)));
4357 QINCREF(bs->file->bs->full_open_options);
4358 qdict_put_obj(opts, "file",
4359 QOBJECT(bs->file->bs->full_open_options));
4361 bs->full_open_options = opts;
4362 } else {
4363 QDECREF(opts);
4365 } else if (!bs->full_open_options && qdict_size(bs->options)) {
4366 /* There is no underlying file BDS (at least referenced by BDS.file),
4367 * so the full options QDict should be equal to the options given
4368 * specifically for this block device when it was opened (plus the
4369 * driver specification).
4370 * Because those options don't change, there is no need to update
4371 * full_open_options when it's already set. */
4373 opts = qdict_new();
4374 append_open_options(opts, bs);
4375 qdict_put_obj(opts, "driver",
4376 QOBJECT(qstring_from_str(drv->format_name)));
4378 if (bs->exact_filename[0]) {
4379 /* This may not work for all block protocol drivers (some may
4380 * require this filename to be parsed), but we have to find some
4381 * default solution here, so just include it. If some block driver
4382 * does not support pure options without any filename at all or
4383 * needs some special format of the options QDict, it needs to
4384 * implement the driver-specific bdrv_refresh_filename() function.
4386 qdict_put_obj(opts, "filename",
4387 QOBJECT(qstring_from_str(bs->exact_filename)));
4390 bs->full_open_options = opts;
4393 if (bs->exact_filename[0]) {
4394 pstrcpy(bs->filename, sizeof(bs->filename), bs->exact_filename);
4395 } else if (bs->full_open_options) {
4396 QString *json = qobject_to_json(QOBJECT(bs->full_open_options));
4397 snprintf(bs->filename, sizeof(bs->filename), "json:%s",
4398 qstring_get_str(json));
4399 QDECREF(json);