qcow: Fix bdrv_write_compressed error handling
[qemu/ar7.git] / block.c
blobd5ec0beaf01cf79b05a874c67f3f06dffd42870f
1 /*
2 * QEMU System Emulator block driver
4 * Copyright (c) 2003 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
24 #include "config-host.h"
25 #include "qemu-common.h"
26 #include "trace.h"
27 #include "monitor.h"
28 #include "block_int.h"
29 #include "module.h"
30 #include "qemu-objects.h"
31 #include "qemu-coroutine.h"
33 #ifdef CONFIG_BSD
34 #include <sys/types.h>
35 #include <sys/stat.h>
36 #include <sys/ioctl.h>
37 #include <sys/queue.h>
38 #ifndef __DragonFly__
39 #include <sys/disk.h>
40 #endif
41 #endif
43 #ifdef _WIN32
44 #include <windows.h>
45 #endif
47 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
49 static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
50 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
51 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
52 BlockDriverCompletionFunc *cb, void *opaque);
53 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
54 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
55 BlockDriverCompletionFunc *cb, void *opaque);
56 static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
57 int64_t sector_num, int nb_sectors,
58 QEMUIOVector *iov);
59 static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
60 int64_t sector_num, int nb_sectors,
61 QEMUIOVector *iov);
62 static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
63 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
64 static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
65 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
66 static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
67 int64_t sector_num,
68 QEMUIOVector *qiov,
69 int nb_sectors,
70 BlockDriverCompletionFunc *cb,
71 void *opaque,
72 bool is_write);
73 static void coroutine_fn bdrv_co_do_rw(void *opaque);
75 static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
76 QTAILQ_HEAD_INITIALIZER(bdrv_states);
78 static QLIST_HEAD(, BlockDriver) bdrv_drivers =
79 QLIST_HEAD_INITIALIZER(bdrv_drivers);
81 /* The device to use for VM snapshots */
82 static BlockDriverState *bs_snapshots;
84 /* If non-zero, use only whitelisted block drivers */
85 static int use_bdrv_whitelist;
87 #ifdef _WIN32
88 static int is_windows_drive_prefix(const char *filename)
90 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
91 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
92 filename[1] == ':');
95 int is_windows_drive(const char *filename)
97 if (is_windows_drive_prefix(filename) &&
98 filename[2] == '\0')
99 return 1;
100 if (strstart(filename, "\\\\.\\", NULL) ||
101 strstart(filename, "//./", NULL))
102 return 1;
103 return 0;
105 #endif
107 /* check if the path starts with "<protocol>:" */
108 static int path_has_protocol(const char *path)
110 #ifdef _WIN32
111 if (is_windows_drive(path) ||
112 is_windows_drive_prefix(path)) {
113 return 0;
115 #endif
117 return strchr(path, ':') != NULL;
120 int path_is_absolute(const char *path)
122 const char *p;
123 #ifdef _WIN32
124 /* specific case for names like: "\\.\d:" */
125 if (*path == '/' || *path == '\\')
126 return 1;
127 #endif
128 p = strchr(path, ':');
129 if (p)
130 p++;
131 else
132 p = path;
133 #ifdef _WIN32
134 return (*p == '/' || *p == '\\');
135 #else
136 return (*p == '/');
137 #endif
140 /* if filename is absolute, just copy it to dest. Otherwise, build a
141 path to it by considering it is relative to base_path. URL are
142 supported. */
143 void path_combine(char *dest, int dest_size,
144 const char *base_path,
145 const char *filename)
147 const char *p, *p1;
148 int len;
150 if (dest_size <= 0)
151 return;
152 if (path_is_absolute(filename)) {
153 pstrcpy(dest, dest_size, filename);
154 } else {
155 p = strchr(base_path, ':');
156 if (p)
157 p++;
158 else
159 p = base_path;
160 p1 = strrchr(base_path, '/');
161 #ifdef _WIN32
163 const char *p2;
164 p2 = strrchr(base_path, '\\');
165 if (!p1 || p2 > p1)
166 p1 = p2;
168 #endif
169 if (p1)
170 p1++;
171 else
172 p1 = base_path;
173 if (p1 > p)
174 p = p1;
175 len = p - base_path;
176 if (len > dest_size - 1)
177 len = dest_size - 1;
178 memcpy(dest, base_path, len);
179 dest[len] = '\0';
180 pstrcat(dest, dest_size, filename);
184 void bdrv_register(BlockDriver *bdrv)
186 /* Block drivers without coroutine functions need emulation */
187 if (!bdrv->bdrv_co_readv) {
188 bdrv->bdrv_co_readv = bdrv_co_readv_em;
189 bdrv->bdrv_co_writev = bdrv_co_writev_em;
191 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
192 * the block driver lacks aio we need to emulate that too.
194 if (!bdrv->bdrv_aio_readv) {
195 /* add AIO emulation layer */
196 bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
197 bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
201 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
204 /* create a new block device (by default it is empty) */
205 BlockDriverState *bdrv_new(const char *device_name)
207 BlockDriverState *bs;
209 bs = g_malloc0(sizeof(BlockDriverState));
210 pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
211 if (device_name[0] != '\0') {
212 QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
214 bdrv_iostatus_disable(bs);
215 return bs;
218 BlockDriver *bdrv_find_format(const char *format_name)
220 BlockDriver *drv1;
221 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
222 if (!strcmp(drv1->format_name, format_name)) {
223 return drv1;
226 return NULL;
229 static int bdrv_is_whitelisted(BlockDriver *drv)
231 static const char *whitelist[] = {
232 CONFIG_BDRV_WHITELIST
234 const char **p;
236 if (!whitelist[0])
237 return 1; /* no whitelist, anything goes */
239 for (p = whitelist; *p; p++) {
240 if (!strcmp(drv->format_name, *p)) {
241 return 1;
244 return 0;
247 BlockDriver *bdrv_find_whitelisted_format(const char *format_name)
249 BlockDriver *drv = bdrv_find_format(format_name);
250 return drv && bdrv_is_whitelisted(drv) ? drv : NULL;
253 int bdrv_create(BlockDriver *drv, const char* filename,
254 QEMUOptionParameter *options)
256 if (!drv->bdrv_create)
257 return -ENOTSUP;
259 return drv->bdrv_create(filename, options);
262 int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
264 BlockDriver *drv;
266 drv = bdrv_find_protocol(filename);
267 if (drv == NULL) {
268 return -ENOENT;
271 return bdrv_create(drv, filename, options);
274 #ifdef _WIN32
275 void get_tmp_filename(char *filename, int size)
277 char temp_dir[MAX_PATH];
279 GetTempPath(MAX_PATH, temp_dir);
280 GetTempFileName(temp_dir, "qem", 0, filename);
282 #else
283 void get_tmp_filename(char *filename, int size)
285 int fd;
286 const char *tmpdir;
287 /* XXX: race condition possible */
288 tmpdir = getenv("TMPDIR");
289 if (!tmpdir)
290 tmpdir = "/tmp";
291 snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
292 fd = mkstemp(filename);
293 close(fd);
295 #endif
298 * Detect host devices. By convention, /dev/cdrom[N] is always
299 * recognized as a host CDROM.
301 static BlockDriver *find_hdev_driver(const char *filename)
303 int score_max = 0, score;
304 BlockDriver *drv = NULL, *d;
306 QLIST_FOREACH(d, &bdrv_drivers, list) {
307 if (d->bdrv_probe_device) {
308 score = d->bdrv_probe_device(filename);
309 if (score > score_max) {
310 score_max = score;
311 drv = d;
316 return drv;
319 BlockDriver *bdrv_find_protocol(const char *filename)
321 BlockDriver *drv1;
322 char protocol[128];
323 int len;
324 const char *p;
326 /* TODO Drivers without bdrv_file_open must be specified explicitly */
329 * XXX(hch): we really should not let host device detection
330 * override an explicit protocol specification, but moving this
331 * later breaks access to device names with colons in them.
332 * Thanks to the brain-dead persistent naming schemes on udev-
333 * based Linux systems those actually are quite common.
335 drv1 = find_hdev_driver(filename);
336 if (drv1) {
337 return drv1;
340 if (!path_has_protocol(filename)) {
341 return bdrv_find_format("file");
343 p = strchr(filename, ':');
344 assert(p != NULL);
345 len = p - filename;
346 if (len > sizeof(protocol) - 1)
347 len = sizeof(protocol) - 1;
348 memcpy(protocol, filename, len);
349 protocol[len] = '\0';
350 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
351 if (drv1->protocol_name &&
352 !strcmp(drv1->protocol_name, protocol)) {
353 return drv1;
356 return NULL;
359 static int find_image_format(const char *filename, BlockDriver **pdrv)
361 int ret, score, score_max;
362 BlockDriver *drv1, *drv;
363 uint8_t buf[2048];
364 BlockDriverState *bs;
366 ret = bdrv_file_open(&bs, filename, 0);
367 if (ret < 0) {
368 *pdrv = NULL;
369 return ret;
372 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
373 if (bs->sg || !bdrv_is_inserted(bs)) {
374 bdrv_delete(bs);
375 drv = bdrv_find_format("raw");
376 if (!drv) {
377 ret = -ENOENT;
379 *pdrv = drv;
380 return ret;
383 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
384 bdrv_delete(bs);
385 if (ret < 0) {
386 *pdrv = NULL;
387 return ret;
390 score_max = 0;
391 drv = NULL;
392 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
393 if (drv1->bdrv_probe) {
394 score = drv1->bdrv_probe(buf, ret, filename);
395 if (score > score_max) {
396 score_max = score;
397 drv = drv1;
401 if (!drv) {
402 ret = -ENOENT;
404 *pdrv = drv;
405 return ret;
409 * Set the current 'total_sectors' value
411 static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
413 BlockDriver *drv = bs->drv;
415 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
416 if (bs->sg)
417 return 0;
419 /* query actual device if possible, otherwise just trust the hint */
420 if (drv->bdrv_getlength) {
421 int64_t length = drv->bdrv_getlength(bs);
422 if (length < 0) {
423 return length;
425 hint = length >> BDRV_SECTOR_BITS;
428 bs->total_sectors = hint;
429 return 0;
433 * Set open flags for a given cache mode
435 * Return 0 on success, -1 if the cache mode was invalid.
437 int bdrv_parse_cache_flags(const char *mode, int *flags)
439 *flags &= ~BDRV_O_CACHE_MASK;
441 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
442 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
443 } else if (!strcmp(mode, "directsync")) {
444 *flags |= BDRV_O_NOCACHE;
445 } else if (!strcmp(mode, "writeback")) {
446 *flags |= BDRV_O_CACHE_WB;
447 } else if (!strcmp(mode, "unsafe")) {
448 *flags |= BDRV_O_CACHE_WB;
449 *flags |= BDRV_O_NO_FLUSH;
450 } else if (!strcmp(mode, "writethrough")) {
451 /* this is the default */
452 } else {
453 return -1;
456 return 0;
460 * Common part for opening disk images and files
462 static int bdrv_open_common(BlockDriverState *bs, const char *filename,
463 int flags, BlockDriver *drv)
465 int ret, open_flags;
467 assert(drv != NULL);
469 trace_bdrv_open_common(bs, filename, flags, drv->format_name);
471 bs->file = NULL;
472 bs->total_sectors = 0;
473 bs->encrypted = 0;
474 bs->valid_key = 0;
475 bs->open_flags = flags;
476 bs->buffer_alignment = 512;
478 pstrcpy(bs->filename, sizeof(bs->filename), filename);
480 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) {
481 return -ENOTSUP;
484 bs->drv = drv;
485 bs->opaque = g_malloc0(drv->instance_size);
487 if (flags & BDRV_O_CACHE_WB)
488 bs->enable_write_cache = 1;
491 * Clear flags that are internal to the block layer before opening the
492 * image.
494 open_flags = flags & ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
497 * Snapshots should be writable.
499 if (bs->is_temporary) {
500 open_flags |= BDRV_O_RDWR;
503 /* Open the image, either directly or using a protocol */
504 if (drv->bdrv_file_open) {
505 ret = drv->bdrv_file_open(bs, filename, open_flags);
506 } else {
507 ret = bdrv_file_open(&bs->file, filename, open_flags);
508 if (ret >= 0) {
509 ret = drv->bdrv_open(bs, open_flags);
513 if (ret < 0) {
514 goto free_and_fail;
517 bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR);
519 ret = refresh_total_sectors(bs, bs->total_sectors);
520 if (ret < 0) {
521 goto free_and_fail;
524 #ifndef _WIN32
525 if (bs->is_temporary) {
526 unlink(filename);
528 #endif
529 return 0;
531 free_and_fail:
532 if (bs->file) {
533 bdrv_delete(bs->file);
534 bs->file = NULL;
536 g_free(bs->opaque);
537 bs->opaque = NULL;
538 bs->drv = NULL;
539 return ret;
543 * Opens a file using a protocol (file, host_device, nbd, ...)
545 int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
547 BlockDriverState *bs;
548 BlockDriver *drv;
549 int ret;
551 drv = bdrv_find_protocol(filename);
552 if (!drv) {
553 return -ENOENT;
556 bs = bdrv_new("");
557 ret = bdrv_open_common(bs, filename, flags, drv);
558 if (ret < 0) {
559 bdrv_delete(bs);
560 return ret;
562 bs->growable = 1;
563 *pbs = bs;
564 return 0;
568 * Opens a disk image (raw, qcow2, vmdk, ...)
570 int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
571 BlockDriver *drv)
573 int ret;
574 char tmp_filename[PATH_MAX];
576 if (flags & BDRV_O_SNAPSHOT) {
577 BlockDriverState *bs1;
578 int64_t total_size;
579 int is_protocol = 0;
580 BlockDriver *bdrv_qcow2;
581 QEMUOptionParameter *options;
582 char backing_filename[PATH_MAX];
584 /* if snapshot, we create a temporary backing file and open it
585 instead of opening 'filename' directly */
587 /* if there is a backing file, use it */
588 bs1 = bdrv_new("");
589 ret = bdrv_open(bs1, filename, 0, drv);
590 if (ret < 0) {
591 bdrv_delete(bs1);
592 return ret;
594 total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
596 if (bs1->drv && bs1->drv->protocol_name)
597 is_protocol = 1;
599 bdrv_delete(bs1);
601 get_tmp_filename(tmp_filename, sizeof(tmp_filename));
603 /* Real path is meaningless for protocols */
604 if (is_protocol)
605 snprintf(backing_filename, sizeof(backing_filename),
606 "%s", filename);
607 else if (!realpath(filename, backing_filename))
608 return -errno;
610 bdrv_qcow2 = bdrv_find_format("qcow2");
611 options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
613 set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size);
614 set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
615 if (drv) {
616 set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
617 drv->format_name);
620 ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
621 free_option_parameters(options);
622 if (ret < 0) {
623 return ret;
626 filename = tmp_filename;
627 drv = bdrv_qcow2;
628 bs->is_temporary = 1;
631 /* Find the right image format driver */
632 if (!drv) {
633 ret = find_image_format(filename, &drv);
636 if (!drv) {
637 goto unlink_and_fail;
640 /* Open the image */
641 ret = bdrv_open_common(bs, filename, flags, drv);
642 if (ret < 0) {
643 goto unlink_and_fail;
646 /* If there is a backing file, use it */
647 if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') {
648 char backing_filename[PATH_MAX];
649 int back_flags;
650 BlockDriver *back_drv = NULL;
652 bs->backing_hd = bdrv_new("");
654 if (path_has_protocol(bs->backing_file)) {
655 pstrcpy(backing_filename, sizeof(backing_filename),
656 bs->backing_file);
657 } else {
658 path_combine(backing_filename, sizeof(backing_filename),
659 filename, bs->backing_file);
662 if (bs->backing_format[0] != '\0') {
663 back_drv = bdrv_find_format(bs->backing_format);
666 /* backing files always opened read-only */
667 back_flags =
668 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
670 ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv);
671 if (ret < 0) {
672 bdrv_close(bs);
673 return ret;
675 if (bs->is_temporary) {
676 bs->backing_hd->keep_read_only = !(flags & BDRV_O_RDWR);
677 } else {
678 /* base image inherits from "parent" */
679 bs->backing_hd->keep_read_only = bs->keep_read_only;
683 if (!bdrv_key_required(bs)) {
684 bdrv_dev_change_media_cb(bs, true);
687 return 0;
689 unlink_and_fail:
690 if (bs->is_temporary) {
691 unlink(filename);
693 return ret;
696 void bdrv_close(BlockDriverState *bs)
698 if (bs->drv) {
699 if (bs == bs_snapshots) {
700 bs_snapshots = NULL;
702 if (bs->backing_hd) {
703 bdrv_delete(bs->backing_hd);
704 bs->backing_hd = NULL;
706 bs->drv->bdrv_close(bs);
707 g_free(bs->opaque);
708 #ifdef _WIN32
709 if (bs->is_temporary) {
710 unlink(bs->filename);
712 #endif
713 bs->opaque = NULL;
714 bs->drv = NULL;
716 if (bs->file != NULL) {
717 bdrv_close(bs->file);
720 bdrv_dev_change_media_cb(bs, false);
724 void bdrv_close_all(void)
726 BlockDriverState *bs;
728 QTAILQ_FOREACH(bs, &bdrv_states, list) {
729 bdrv_close(bs);
733 /* make a BlockDriverState anonymous by removing from bdrv_state list.
734 Also, NULL terminate the device_name to prevent double remove */
735 void bdrv_make_anon(BlockDriverState *bs)
737 if (bs->device_name[0] != '\0') {
738 QTAILQ_REMOVE(&bdrv_states, bs, list);
740 bs->device_name[0] = '\0';
743 void bdrv_delete(BlockDriverState *bs)
745 assert(!bs->dev);
747 /* remove from list, if necessary */
748 bdrv_make_anon(bs);
750 bdrv_close(bs);
751 if (bs->file != NULL) {
752 bdrv_delete(bs->file);
755 assert(bs != bs_snapshots);
756 g_free(bs);
759 int bdrv_attach_dev(BlockDriverState *bs, void *dev)
760 /* TODO change to DeviceState *dev when all users are qdevified */
762 if (bs->dev) {
763 return -EBUSY;
765 bs->dev = dev;
766 bdrv_iostatus_reset(bs);
767 return 0;
770 /* TODO qdevified devices don't use this, remove when devices are qdevified */
771 void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
773 if (bdrv_attach_dev(bs, dev) < 0) {
774 abort();
778 void bdrv_detach_dev(BlockDriverState *bs, void *dev)
779 /* TODO change to DeviceState *dev when all users are qdevified */
781 assert(bs->dev == dev);
782 bs->dev = NULL;
783 bs->dev_ops = NULL;
784 bs->dev_opaque = NULL;
785 bs->buffer_alignment = 512;
788 /* TODO change to return DeviceState * when all users are qdevified */
789 void *bdrv_get_attached_dev(BlockDriverState *bs)
791 return bs->dev;
794 void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
795 void *opaque)
797 bs->dev_ops = ops;
798 bs->dev_opaque = opaque;
799 if (bdrv_dev_has_removable_media(bs) && bs == bs_snapshots) {
800 bs_snapshots = NULL;
804 static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load)
806 if (bs->dev_ops && bs->dev_ops->change_media_cb) {
807 bs->dev_ops->change_media_cb(bs->dev_opaque, load);
811 bool bdrv_dev_has_removable_media(BlockDriverState *bs)
813 return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb);
816 bool bdrv_dev_is_tray_open(BlockDriverState *bs)
818 if (bs->dev_ops && bs->dev_ops->is_tray_open) {
819 return bs->dev_ops->is_tray_open(bs->dev_opaque);
821 return false;
824 static void bdrv_dev_resize_cb(BlockDriverState *bs)
826 if (bs->dev_ops && bs->dev_ops->resize_cb) {
827 bs->dev_ops->resize_cb(bs->dev_opaque);
831 bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
833 if (bs->dev_ops && bs->dev_ops->is_medium_locked) {
834 return bs->dev_ops->is_medium_locked(bs->dev_opaque);
836 return false;
840 * Run consistency checks on an image
842 * Returns 0 if the check could be completed (it doesn't mean that the image is
843 * free of errors) or -errno when an internal error occurred. The results of the
844 * check are stored in res.
846 int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res)
848 if (bs->drv->bdrv_check == NULL) {
849 return -ENOTSUP;
852 memset(res, 0, sizeof(*res));
853 return bs->drv->bdrv_check(bs, res);
856 #define COMMIT_BUF_SECTORS 2048
858 /* commit COW file into the raw image */
859 int bdrv_commit(BlockDriverState *bs)
861 BlockDriver *drv = bs->drv;
862 BlockDriver *backing_drv;
863 int64_t sector, total_sectors;
864 int n, ro, open_flags;
865 int ret = 0, rw_ret = 0;
866 uint8_t *buf;
867 char filename[1024];
868 BlockDriverState *bs_rw, *bs_ro;
870 if (!drv)
871 return -ENOMEDIUM;
873 if (!bs->backing_hd) {
874 return -ENOTSUP;
877 if (bs->backing_hd->keep_read_only) {
878 return -EACCES;
881 backing_drv = bs->backing_hd->drv;
882 ro = bs->backing_hd->read_only;
883 strncpy(filename, bs->backing_hd->filename, sizeof(filename));
884 open_flags = bs->backing_hd->open_flags;
886 if (ro) {
887 /* re-open as RW */
888 bdrv_delete(bs->backing_hd);
889 bs->backing_hd = NULL;
890 bs_rw = bdrv_new("");
891 rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR,
892 backing_drv);
893 if (rw_ret < 0) {
894 bdrv_delete(bs_rw);
895 /* try to re-open read-only */
896 bs_ro = bdrv_new("");
897 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
898 backing_drv);
899 if (ret < 0) {
900 bdrv_delete(bs_ro);
901 /* drive not functional anymore */
902 bs->drv = NULL;
903 return ret;
905 bs->backing_hd = bs_ro;
906 return rw_ret;
908 bs->backing_hd = bs_rw;
911 total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
912 buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
914 for (sector = 0; sector < total_sectors; sector += n) {
915 if (drv->bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) {
917 if (bdrv_read(bs, sector, buf, n) != 0) {
918 ret = -EIO;
919 goto ro_cleanup;
922 if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) {
923 ret = -EIO;
924 goto ro_cleanup;
929 if (drv->bdrv_make_empty) {
930 ret = drv->bdrv_make_empty(bs);
931 bdrv_flush(bs);
935 * Make sure all data we wrote to the backing device is actually
936 * stable on disk.
938 if (bs->backing_hd)
939 bdrv_flush(bs->backing_hd);
941 ro_cleanup:
942 g_free(buf);
944 if (ro) {
945 /* re-open as RO */
946 bdrv_delete(bs->backing_hd);
947 bs->backing_hd = NULL;
948 bs_ro = bdrv_new("");
949 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
950 backing_drv);
951 if (ret < 0) {
952 bdrv_delete(bs_ro);
953 /* drive not functional anymore */
954 bs->drv = NULL;
955 return ret;
957 bs->backing_hd = bs_ro;
958 bs->backing_hd->keep_read_only = 0;
961 return ret;
964 void bdrv_commit_all(void)
966 BlockDriverState *bs;
968 QTAILQ_FOREACH(bs, &bdrv_states, list) {
969 bdrv_commit(bs);
974 * Return values:
975 * 0 - success
976 * -EINVAL - backing format specified, but no file
977 * -ENOSPC - can't update the backing file because no space is left in the
978 * image file header
979 * -ENOTSUP - format driver doesn't support changing the backing file
981 int bdrv_change_backing_file(BlockDriverState *bs,
982 const char *backing_file, const char *backing_fmt)
984 BlockDriver *drv = bs->drv;
986 if (drv->bdrv_change_backing_file != NULL) {
987 return drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
988 } else {
989 return -ENOTSUP;
993 static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
994 size_t size)
996 int64_t len;
998 if (!bdrv_is_inserted(bs))
999 return -ENOMEDIUM;
1001 if (bs->growable)
1002 return 0;
1004 len = bdrv_getlength(bs);
1006 if (offset < 0)
1007 return -EIO;
1009 if ((offset > len) || (len - offset < size))
1010 return -EIO;
1012 return 0;
1015 static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
1016 int nb_sectors)
1018 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
1019 nb_sectors * BDRV_SECTOR_SIZE);
1022 typedef struct RwCo {
1023 BlockDriverState *bs;
1024 int64_t sector_num;
1025 int nb_sectors;
1026 QEMUIOVector *qiov;
1027 bool is_write;
1028 int ret;
1029 } RwCo;
1031 static void coroutine_fn bdrv_rw_co_entry(void *opaque)
1033 RwCo *rwco = opaque;
1035 if (!rwco->is_write) {
1036 rwco->ret = bdrv_co_do_readv(rwco->bs, rwco->sector_num,
1037 rwco->nb_sectors, rwco->qiov);
1038 } else {
1039 rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num,
1040 rwco->nb_sectors, rwco->qiov);
1045 * Process a synchronous request using coroutines
1047 static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
1048 int nb_sectors, bool is_write)
1050 QEMUIOVector qiov;
1051 struct iovec iov = {
1052 .iov_base = (void *)buf,
1053 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
1055 Coroutine *co;
1056 RwCo rwco = {
1057 .bs = bs,
1058 .sector_num = sector_num,
1059 .nb_sectors = nb_sectors,
1060 .qiov = &qiov,
1061 .is_write = is_write,
1062 .ret = NOT_DONE,
1065 qemu_iovec_init_external(&qiov, &iov, 1);
1067 if (qemu_in_coroutine()) {
1068 /* Fast-path if already in coroutine context */
1069 bdrv_rw_co_entry(&rwco);
1070 } else {
1071 co = qemu_coroutine_create(bdrv_rw_co_entry);
1072 qemu_coroutine_enter(co, &rwco);
1073 while (rwco.ret == NOT_DONE) {
1074 qemu_aio_wait();
1077 return rwco.ret;
1080 /* return < 0 if error. See bdrv_write() for the return codes */
1081 int bdrv_read(BlockDriverState *bs, int64_t sector_num,
1082 uint8_t *buf, int nb_sectors)
1084 return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false);
1087 static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
1088 int nb_sectors, int dirty)
1090 int64_t start, end;
1091 unsigned long val, idx, bit;
1093 start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
1094 end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
1096 for (; start <= end; start++) {
1097 idx = start / (sizeof(unsigned long) * 8);
1098 bit = start % (sizeof(unsigned long) * 8);
1099 val = bs->dirty_bitmap[idx];
1100 if (dirty) {
1101 if (!(val & (1UL << bit))) {
1102 bs->dirty_count++;
1103 val |= 1UL << bit;
1105 } else {
1106 if (val & (1UL << bit)) {
1107 bs->dirty_count--;
1108 val &= ~(1UL << bit);
1111 bs->dirty_bitmap[idx] = val;
1115 /* Return < 0 if error. Important errors are:
1116 -EIO generic I/O error (may happen for all errors)
1117 -ENOMEDIUM No media inserted.
1118 -EINVAL Invalid sector number or nb_sectors
1119 -EACCES Trying to write a read-only device
1121 int bdrv_write(BlockDriverState *bs, int64_t sector_num,
1122 const uint8_t *buf, int nb_sectors)
1124 return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true);
1127 int bdrv_pread(BlockDriverState *bs, int64_t offset,
1128 void *buf, int count1)
1130 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
1131 int len, nb_sectors, count;
1132 int64_t sector_num;
1133 int ret;
1135 count = count1;
1136 /* first read to align to sector start */
1137 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
1138 if (len > count)
1139 len = count;
1140 sector_num = offset >> BDRV_SECTOR_BITS;
1141 if (len > 0) {
1142 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1143 return ret;
1144 memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
1145 count -= len;
1146 if (count == 0)
1147 return count1;
1148 sector_num++;
1149 buf += len;
1152 /* read the sectors "in place" */
1153 nb_sectors = count >> BDRV_SECTOR_BITS;
1154 if (nb_sectors > 0) {
1155 if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
1156 return ret;
1157 sector_num += nb_sectors;
1158 len = nb_sectors << BDRV_SECTOR_BITS;
1159 buf += len;
1160 count -= len;
1163 /* add data from the last sector */
1164 if (count > 0) {
1165 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1166 return ret;
1167 memcpy(buf, tmp_buf, count);
1169 return count1;
1172 int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
1173 const void *buf, int count1)
1175 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
1176 int len, nb_sectors, count;
1177 int64_t sector_num;
1178 int ret;
1180 count = count1;
1181 /* first write to align to sector start */
1182 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
1183 if (len > count)
1184 len = count;
1185 sector_num = offset >> BDRV_SECTOR_BITS;
1186 if (len > 0) {
1187 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1188 return ret;
1189 memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len);
1190 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1191 return ret;
1192 count -= len;
1193 if (count == 0)
1194 return count1;
1195 sector_num++;
1196 buf += len;
1199 /* write the sectors "in place" */
1200 nb_sectors = count >> BDRV_SECTOR_BITS;
1201 if (nb_sectors > 0) {
1202 if ((ret = bdrv_write(bs, sector_num, buf, nb_sectors)) < 0)
1203 return ret;
1204 sector_num += nb_sectors;
1205 len = nb_sectors << BDRV_SECTOR_BITS;
1206 buf += len;
1207 count -= len;
1210 /* add data from the last sector */
1211 if (count > 0) {
1212 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1213 return ret;
1214 memcpy(tmp_buf, buf, count);
1215 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1216 return ret;
1218 return count1;
1222 * Writes to the file and ensures that no writes are reordered across this
1223 * request (acts as a barrier)
1225 * Returns 0 on success, -errno in error cases.
1227 int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
1228 const void *buf, int count)
1230 int ret;
1232 ret = bdrv_pwrite(bs, offset, buf, count);
1233 if (ret < 0) {
1234 return ret;
1237 /* No flush needed for cache modes that use O_DSYNC */
1238 if ((bs->open_flags & BDRV_O_CACHE_WB) != 0) {
1239 bdrv_flush(bs);
1242 return 0;
1246 * Handle a read request in coroutine context
1248 static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
1249 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1251 BlockDriver *drv = bs->drv;
1253 if (!drv) {
1254 return -ENOMEDIUM;
1256 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1257 return -EIO;
1260 return drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
1263 int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
1264 int nb_sectors, QEMUIOVector *qiov)
1266 trace_bdrv_co_readv(bs, sector_num, nb_sectors);
1268 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov);
1272 * Handle a write request in coroutine context
1274 static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
1275 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1277 BlockDriver *drv = bs->drv;
1278 int ret;
1280 if (!bs->drv) {
1281 return -ENOMEDIUM;
1283 if (bs->read_only) {
1284 return -EACCES;
1286 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1287 return -EIO;
1290 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
1292 if (bs->dirty_bitmap) {
1293 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1296 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
1297 bs->wr_highest_sector = sector_num + nb_sectors - 1;
1300 return ret;
1303 int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
1304 int nb_sectors, QEMUIOVector *qiov)
1306 trace_bdrv_co_writev(bs, sector_num, nb_sectors);
1308 return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov);
1312 * Truncate file to 'offset' bytes (needed only for file protocols)
1314 int bdrv_truncate(BlockDriverState *bs, int64_t offset)
1316 BlockDriver *drv = bs->drv;
1317 int ret;
1318 if (!drv)
1319 return -ENOMEDIUM;
1320 if (!drv->bdrv_truncate)
1321 return -ENOTSUP;
1322 if (bs->read_only)
1323 return -EACCES;
1324 if (bdrv_in_use(bs))
1325 return -EBUSY;
1326 ret = drv->bdrv_truncate(bs, offset);
1327 if (ret == 0) {
1328 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
1329 bdrv_dev_resize_cb(bs);
1331 return ret;
1335 * Length of a allocated file in bytes. Sparse files are counted by actual
1336 * allocated space. Return < 0 if error or unknown.
1338 int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
1340 BlockDriver *drv = bs->drv;
1341 if (!drv) {
1342 return -ENOMEDIUM;
1344 if (drv->bdrv_get_allocated_file_size) {
1345 return drv->bdrv_get_allocated_file_size(bs);
1347 if (bs->file) {
1348 return bdrv_get_allocated_file_size(bs->file);
1350 return -ENOTSUP;
1354 * Length of a file in bytes. Return < 0 if error or unknown.
1356 int64_t bdrv_getlength(BlockDriverState *bs)
1358 BlockDriver *drv = bs->drv;
1359 if (!drv)
1360 return -ENOMEDIUM;
1362 if (bs->growable || bdrv_dev_has_removable_media(bs)) {
1363 if (drv->bdrv_getlength) {
1364 return drv->bdrv_getlength(bs);
1367 return bs->total_sectors * BDRV_SECTOR_SIZE;
1370 /* return 0 as number of sectors if no device present or error */
1371 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
1373 int64_t length;
1374 length = bdrv_getlength(bs);
1375 if (length < 0)
1376 length = 0;
1377 else
1378 length = length >> BDRV_SECTOR_BITS;
1379 *nb_sectors_ptr = length;
1382 struct partition {
1383 uint8_t boot_ind; /* 0x80 - active */
1384 uint8_t head; /* starting head */
1385 uint8_t sector; /* starting sector */
1386 uint8_t cyl; /* starting cylinder */
1387 uint8_t sys_ind; /* What partition type */
1388 uint8_t end_head; /* end head */
1389 uint8_t end_sector; /* end sector */
1390 uint8_t end_cyl; /* end cylinder */
1391 uint32_t start_sect; /* starting sector counting from 0 */
1392 uint32_t nr_sects; /* nr of sectors in partition */
1393 } QEMU_PACKED;
1395 /* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
1396 static int guess_disk_lchs(BlockDriverState *bs,
1397 int *pcylinders, int *pheads, int *psectors)
1399 uint8_t buf[BDRV_SECTOR_SIZE];
1400 int ret, i, heads, sectors, cylinders;
1401 struct partition *p;
1402 uint32_t nr_sects;
1403 uint64_t nb_sectors;
1405 bdrv_get_geometry(bs, &nb_sectors);
1407 ret = bdrv_read(bs, 0, buf, 1);
1408 if (ret < 0)
1409 return -1;
1410 /* test msdos magic */
1411 if (buf[510] != 0x55 || buf[511] != 0xaa)
1412 return -1;
1413 for(i = 0; i < 4; i++) {
1414 p = ((struct partition *)(buf + 0x1be)) + i;
1415 nr_sects = le32_to_cpu(p->nr_sects);
1416 if (nr_sects && p->end_head) {
1417 /* We make the assumption that the partition terminates on
1418 a cylinder boundary */
1419 heads = p->end_head + 1;
1420 sectors = p->end_sector & 63;
1421 if (sectors == 0)
1422 continue;
1423 cylinders = nb_sectors / (heads * sectors);
1424 if (cylinders < 1 || cylinders > 16383)
1425 continue;
1426 *pheads = heads;
1427 *psectors = sectors;
1428 *pcylinders = cylinders;
1429 #if 0
1430 printf("guessed geometry: LCHS=%d %d %d\n",
1431 cylinders, heads, sectors);
1432 #endif
1433 return 0;
1436 return -1;
1439 void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
1441 int translation, lba_detected = 0;
1442 int cylinders, heads, secs;
1443 uint64_t nb_sectors;
1445 /* if a geometry hint is available, use it */
1446 bdrv_get_geometry(bs, &nb_sectors);
1447 bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
1448 translation = bdrv_get_translation_hint(bs);
1449 if (cylinders != 0) {
1450 *pcyls = cylinders;
1451 *pheads = heads;
1452 *psecs = secs;
1453 } else {
1454 if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
1455 if (heads > 16) {
1456 /* if heads > 16, it means that a BIOS LBA
1457 translation was active, so the default
1458 hardware geometry is OK */
1459 lba_detected = 1;
1460 goto default_geometry;
1461 } else {
1462 *pcyls = cylinders;
1463 *pheads = heads;
1464 *psecs = secs;
1465 /* disable any translation to be in sync with
1466 the logical geometry */
1467 if (translation == BIOS_ATA_TRANSLATION_AUTO) {
1468 bdrv_set_translation_hint(bs,
1469 BIOS_ATA_TRANSLATION_NONE);
1472 } else {
1473 default_geometry:
1474 /* if no geometry, use a standard physical disk geometry */
1475 cylinders = nb_sectors / (16 * 63);
1477 if (cylinders > 16383)
1478 cylinders = 16383;
1479 else if (cylinders < 2)
1480 cylinders = 2;
1481 *pcyls = cylinders;
1482 *pheads = 16;
1483 *psecs = 63;
1484 if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
1485 if ((*pcyls * *pheads) <= 131072) {
1486 bdrv_set_translation_hint(bs,
1487 BIOS_ATA_TRANSLATION_LARGE);
1488 } else {
1489 bdrv_set_translation_hint(bs,
1490 BIOS_ATA_TRANSLATION_LBA);
1494 bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
1498 void bdrv_set_geometry_hint(BlockDriverState *bs,
1499 int cyls, int heads, int secs)
1501 bs->cyls = cyls;
1502 bs->heads = heads;
1503 bs->secs = secs;
1506 void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
1508 bs->translation = translation;
1511 void bdrv_get_geometry_hint(BlockDriverState *bs,
1512 int *pcyls, int *pheads, int *psecs)
1514 *pcyls = bs->cyls;
1515 *pheads = bs->heads;
1516 *psecs = bs->secs;
1519 /* Recognize floppy formats */
1520 typedef struct FDFormat {
1521 FDriveType drive;
1522 uint8_t last_sect;
1523 uint8_t max_track;
1524 uint8_t max_head;
1525 } FDFormat;
1527 static const FDFormat fd_formats[] = {
1528 /* First entry is default format */
1529 /* 1.44 MB 3"1/2 floppy disks */
1530 { FDRIVE_DRV_144, 18, 80, 1, },
1531 { FDRIVE_DRV_144, 20, 80, 1, },
1532 { FDRIVE_DRV_144, 21, 80, 1, },
1533 { FDRIVE_DRV_144, 21, 82, 1, },
1534 { FDRIVE_DRV_144, 21, 83, 1, },
1535 { FDRIVE_DRV_144, 22, 80, 1, },
1536 { FDRIVE_DRV_144, 23, 80, 1, },
1537 { FDRIVE_DRV_144, 24, 80, 1, },
1538 /* 2.88 MB 3"1/2 floppy disks */
1539 { FDRIVE_DRV_288, 36, 80, 1, },
1540 { FDRIVE_DRV_288, 39, 80, 1, },
1541 { FDRIVE_DRV_288, 40, 80, 1, },
1542 { FDRIVE_DRV_288, 44, 80, 1, },
1543 { FDRIVE_DRV_288, 48, 80, 1, },
1544 /* 720 kB 3"1/2 floppy disks */
1545 { FDRIVE_DRV_144, 9, 80, 1, },
1546 { FDRIVE_DRV_144, 10, 80, 1, },
1547 { FDRIVE_DRV_144, 10, 82, 1, },
1548 { FDRIVE_DRV_144, 10, 83, 1, },
1549 { FDRIVE_DRV_144, 13, 80, 1, },
1550 { FDRIVE_DRV_144, 14, 80, 1, },
1551 /* 1.2 MB 5"1/4 floppy disks */
1552 { FDRIVE_DRV_120, 15, 80, 1, },
1553 { FDRIVE_DRV_120, 18, 80, 1, },
1554 { FDRIVE_DRV_120, 18, 82, 1, },
1555 { FDRIVE_DRV_120, 18, 83, 1, },
1556 { FDRIVE_DRV_120, 20, 80, 1, },
1557 /* 720 kB 5"1/4 floppy disks */
1558 { FDRIVE_DRV_120, 9, 80, 1, },
1559 { FDRIVE_DRV_120, 11, 80, 1, },
1560 /* 360 kB 5"1/4 floppy disks */
1561 { FDRIVE_DRV_120, 9, 40, 1, },
1562 { FDRIVE_DRV_120, 9, 40, 0, },
1563 { FDRIVE_DRV_120, 10, 41, 1, },
1564 { FDRIVE_DRV_120, 10, 42, 1, },
1565 /* 320 kB 5"1/4 floppy disks */
1566 { FDRIVE_DRV_120, 8, 40, 1, },
1567 { FDRIVE_DRV_120, 8, 40, 0, },
1568 /* 360 kB must match 5"1/4 better than 3"1/2... */
1569 { FDRIVE_DRV_144, 9, 80, 0, },
1570 /* end */
1571 { FDRIVE_DRV_NONE, -1, -1, 0, },
1574 void bdrv_get_floppy_geometry_hint(BlockDriverState *bs, int *nb_heads,
1575 int *max_track, int *last_sect,
1576 FDriveType drive_in, FDriveType *drive)
1578 const FDFormat *parse;
1579 uint64_t nb_sectors, size;
1580 int i, first_match, match;
1582 bdrv_get_geometry_hint(bs, nb_heads, max_track, last_sect);
1583 if (*nb_heads != 0 && *max_track != 0 && *last_sect != 0) {
1584 /* User defined disk */
1585 } else {
1586 bdrv_get_geometry(bs, &nb_sectors);
1587 match = -1;
1588 first_match = -1;
1589 for (i = 0; ; i++) {
1590 parse = &fd_formats[i];
1591 if (parse->drive == FDRIVE_DRV_NONE) {
1592 break;
1594 if (drive_in == parse->drive ||
1595 drive_in == FDRIVE_DRV_NONE) {
1596 size = (parse->max_head + 1) * parse->max_track *
1597 parse->last_sect;
1598 if (nb_sectors == size) {
1599 match = i;
1600 break;
1602 if (first_match == -1) {
1603 first_match = i;
1607 if (match == -1) {
1608 if (first_match == -1) {
1609 match = 1;
1610 } else {
1611 match = first_match;
1613 parse = &fd_formats[match];
1615 *nb_heads = parse->max_head + 1;
1616 *max_track = parse->max_track;
1617 *last_sect = parse->last_sect;
1618 *drive = parse->drive;
1622 int bdrv_get_translation_hint(BlockDriverState *bs)
1624 return bs->translation;
1627 void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error,
1628 BlockErrorAction on_write_error)
1630 bs->on_read_error = on_read_error;
1631 bs->on_write_error = on_write_error;
1634 BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read)
1636 return is_read ? bs->on_read_error : bs->on_write_error;
1639 int bdrv_is_read_only(BlockDriverState *bs)
1641 return bs->read_only;
1644 int bdrv_is_sg(BlockDriverState *bs)
1646 return bs->sg;
1649 int bdrv_enable_write_cache(BlockDriverState *bs)
1651 return bs->enable_write_cache;
1654 int bdrv_is_encrypted(BlockDriverState *bs)
1656 if (bs->backing_hd && bs->backing_hd->encrypted)
1657 return 1;
1658 return bs->encrypted;
1661 int bdrv_key_required(BlockDriverState *bs)
1663 BlockDriverState *backing_hd = bs->backing_hd;
1665 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
1666 return 1;
1667 return (bs->encrypted && !bs->valid_key);
1670 int bdrv_set_key(BlockDriverState *bs, const char *key)
1672 int ret;
1673 if (bs->backing_hd && bs->backing_hd->encrypted) {
1674 ret = bdrv_set_key(bs->backing_hd, key);
1675 if (ret < 0)
1676 return ret;
1677 if (!bs->encrypted)
1678 return 0;
1680 if (!bs->encrypted) {
1681 return -EINVAL;
1682 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
1683 return -ENOMEDIUM;
1685 ret = bs->drv->bdrv_set_key(bs, key);
1686 if (ret < 0) {
1687 bs->valid_key = 0;
1688 } else if (!bs->valid_key) {
1689 bs->valid_key = 1;
1690 /* call the change callback now, we skipped it on open */
1691 bdrv_dev_change_media_cb(bs, true);
1693 return ret;
1696 void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
1698 if (!bs->drv) {
1699 buf[0] = '\0';
1700 } else {
1701 pstrcpy(buf, buf_size, bs->drv->format_name);
1705 void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
1706 void *opaque)
1708 BlockDriver *drv;
1710 QLIST_FOREACH(drv, &bdrv_drivers, list) {
1711 it(opaque, drv->format_name);
1715 BlockDriverState *bdrv_find(const char *name)
1717 BlockDriverState *bs;
1719 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1720 if (!strcmp(name, bs->device_name)) {
1721 return bs;
1724 return NULL;
1727 BlockDriverState *bdrv_next(BlockDriverState *bs)
1729 if (!bs) {
1730 return QTAILQ_FIRST(&bdrv_states);
1732 return QTAILQ_NEXT(bs, list);
1735 void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
1737 BlockDriverState *bs;
1739 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1740 it(opaque, bs);
1744 const char *bdrv_get_device_name(BlockDriverState *bs)
1746 return bs->device_name;
1749 void bdrv_flush_all(void)
1751 BlockDriverState *bs;
1753 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1754 if (!bdrv_is_read_only(bs) && bdrv_is_inserted(bs)) {
1755 bdrv_flush(bs);
1760 int bdrv_has_zero_init(BlockDriverState *bs)
1762 assert(bs->drv);
1764 if (bs->drv->bdrv_has_zero_init) {
1765 return bs->drv->bdrv_has_zero_init(bs);
1768 return 1;
1772 * Returns true iff the specified sector is present in the disk image. Drivers
1773 * not implementing the functionality are assumed to not support backing files,
1774 * hence all their sectors are reported as allocated.
1776 * 'pnum' is set to the number of sectors (including and immediately following
1777 * the specified sector) that are known to be in the same
1778 * allocated/unallocated state.
1780 * 'nb_sectors' is the max value 'pnum' should be set to.
1782 int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
1783 int *pnum)
1785 int64_t n;
1786 if (!bs->drv->bdrv_is_allocated) {
1787 if (sector_num >= bs->total_sectors) {
1788 *pnum = 0;
1789 return 0;
1791 n = bs->total_sectors - sector_num;
1792 *pnum = (n < nb_sectors) ? (n) : (nb_sectors);
1793 return 1;
1795 return bs->drv->bdrv_is_allocated(bs, sector_num, nb_sectors, pnum);
1798 void bdrv_mon_event(const BlockDriverState *bdrv,
1799 BlockMonEventAction action, int is_read)
1801 QObject *data;
1802 const char *action_str;
1804 switch (action) {
1805 case BDRV_ACTION_REPORT:
1806 action_str = "report";
1807 break;
1808 case BDRV_ACTION_IGNORE:
1809 action_str = "ignore";
1810 break;
1811 case BDRV_ACTION_STOP:
1812 action_str = "stop";
1813 break;
1814 default:
1815 abort();
1818 data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
1819 bdrv->device_name,
1820 action_str,
1821 is_read ? "read" : "write");
1822 monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data);
1824 qobject_decref(data);
1827 static void bdrv_print_dict(QObject *obj, void *opaque)
1829 QDict *bs_dict;
1830 Monitor *mon = opaque;
1832 bs_dict = qobject_to_qdict(obj);
1834 monitor_printf(mon, "%s: removable=%d",
1835 qdict_get_str(bs_dict, "device"),
1836 qdict_get_bool(bs_dict, "removable"));
1838 if (qdict_get_bool(bs_dict, "removable")) {
1839 monitor_printf(mon, " locked=%d", qdict_get_bool(bs_dict, "locked"));
1840 monitor_printf(mon, " tray-open=%d",
1841 qdict_get_bool(bs_dict, "tray-open"));
1844 if (qdict_haskey(bs_dict, "io-status")) {
1845 monitor_printf(mon, " io-status=%s", qdict_get_str(bs_dict, "io-status"));
1848 if (qdict_haskey(bs_dict, "inserted")) {
1849 QDict *qdict = qobject_to_qdict(qdict_get(bs_dict, "inserted"));
1851 monitor_printf(mon, " file=");
1852 monitor_print_filename(mon, qdict_get_str(qdict, "file"));
1853 if (qdict_haskey(qdict, "backing_file")) {
1854 monitor_printf(mon, " backing_file=");
1855 monitor_print_filename(mon, qdict_get_str(qdict, "backing_file"));
1857 monitor_printf(mon, " ro=%d drv=%s encrypted=%d",
1858 qdict_get_bool(qdict, "ro"),
1859 qdict_get_str(qdict, "drv"),
1860 qdict_get_bool(qdict, "encrypted"));
1861 } else {
1862 monitor_printf(mon, " [not inserted]");
1865 monitor_printf(mon, "\n");
1868 void bdrv_info_print(Monitor *mon, const QObject *data)
1870 qlist_iter(qobject_to_qlist(data), bdrv_print_dict, mon);
1873 static const char *const io_status_name[BDRV_IOS_MAX] = {
1874 [BDRV_IOS_OK] = "ok",
1875 [BDRV_IOS_FAILED] = "failed",
1876 [BDRV_IOS_ENOSPC] = "nospace",
1879 void bdrv_info(Monitor *mon, QObject **ret_data)
1881 QList *bs_list;
1882 BlockDriverState *bs;
1884 bs_list = qlist_new();
1886 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1887 QObject *bs_obj;
1888 QDict *bs_dict;
1890 bs_obj = qobject_from_jsonf("{ 'device': %s, 'type': 'unknown', "
1891 "'removable': %i, 'locked': %i }",
1892 bs->device_name,
1893 bdrv_dev_has_removable_media(bs),
1894 bdrv_dev_is_medium_locked(bs));
1895 bs_dict = qobject_to_qdict(bs_obj);
1897 if (bdrv_dev_has_removable_media(bs)) {
1898 qdict_put(bs_dict, "tray-open",
1899 qbool_from_int(bdrv_dev_is_tray_open(bs)));
1902 if (bdrv_iostatus_is_enabled(bs)) {
1903 qdict_put(bs_dict, "io-status",
1904 qstring_from_str(io_status_name[bs->iostatus]));
1907 if (bs->drv) {
1908 QObject *obj;
1910 obj = qobject_from_jsonf("{ 'file': %s, 'ro': %i, 'drv': %s, "
1911 "'encrypted': %i }",
1912 bs->filename, bs->read_only,
1913 bs->drv->format_name,
1914 bdrv_is_encrypted(bs));
1915 if (bs->backing_file[0] != '\0') {
1916 QDict *qdict = qobject_to_qdict(obj);
1917 qdict_put(qdict, "backing_file",
1918 qstring_from_str(bs->backing_file));
1921 qdict_put_obj(bs_dict, "inserted", obj);
1923 qlist_append_obj(bs_list, bs_obj);
1926 *ret_data = QOBJECT(bs_list);
1929 static void bdrv_stats_iter(QObject *data, void *opaque)
1931 QDict *qdict;
1932 Monitor *mon = opaque;
1934 qdict = qobject_to_qdict(data);
1935 monitor_printf(mon, "%s:", qdict_get_str(qdict, "device"));
1937 qdict = qobject_to_qdict(qdict_get(qdict, "stats"));
1938 monitor_printf(mon, " rd_bytes=%" PRId64
1939 " wr_bytes=%" PRId64
1940 " rd_operations=%" PRId64
1941 " wr_operations=%" PRId64
1942 " flush_operations=%" PRId64
1943 " wr_total_time_ns=%" PRId64
1944 " rd_total_time_ns=%" PRId64
1945 " flush_total_time_ns=%" PRId64
1946 "\n",
1947 qdict_get_int(qdict, "rd_bytes"),
1948 qdict_get_int(qdict, "wr_bytes"),
1949 qdict_get_int(qdict, "rd_operations"),
1950 qdict_get_int(qdict, "wr_operations"),
1951 qdict_get_int(qdict, "flush_operations"),
1952 qdict_get_int(qdict, "wr_total_time_ns"),
1953 qdict_get_int(qdict, "rd_total_time_ns"),
1954 qdict_get_int(qdict, "flush_total_time_ns"));
1957 void bdrv_stats_print(Monitor *mon, const QObject *data)
1959 qlist_iter(qobject_to_qlist(data), bdrv_stats_iter, mon);
1962 static QObject* bdrv_info_stats_bs(BlockDriverState *bs)
1964 QObject *res;
1965 QDict *dict;
1967 res = qobject_from_jsonf("{ 'stats': {"
1968 "'rd_bytes': %" PRId64 ","
1969 "'wr_bytes': %" PRId64 ","
1970 "'rd_operations': %" PRId64 ","
1971 "'wr_operations': %" PRId64 ","
1972 "'wr_highest_offset': %" PRId64 ","
1973 "'flush_operations': %" PRId64 ","
1974 "'wr_total_time_ns': %" PRId64 ","
1975 "'rd_total_time_ns': %" PRId64 ","
1976 "'flush_total_time_ns': %" PRId64
1977 "} }",
1978 bs->nr_bytes[BDRV_ACCT_READ],
1979 bs->nr_bytes[BDRV_ACCT_WRITE],
1980 bs->nr_ops[BDRV_ACCT_READ],
1981 bs->nr_ops[BDRV_ACCT_WRITE],
1982 bs->wr_highest_sector *
1983 (uint64_t)BDRV_SECTOR_SIZE,
1984 bs->nr_ops[BDRV_ACCT_FLUSH],
1985 bs->total_time_ns[BDRV_ACCT_WRITE],
1986 bs->total_time_ns[BDRV_ACCT_READ],
1987 bs->total_time_ns[BDRV_ACCT_FLUSH]);
1988 dict = qobject_to_qdict(res);
1990 if (*bs->device_name) {
1991 qdict_put(dict, "device", qstring_from_str(bs->device_name));
1994 if (bs->file) {
1995 QObject *parent = bdrv_info_stats_bs(bs->file);
1996 qdict_put_obj(dict, "parent", parent);
1999 return res;
2002 void bdrv_info_stats(Monitor *mon, QObject **ret_data)
2004 QObject *obj;
2005 QList *devices;
2006 BlockDriverState *bs;
2008 devices = qlist_new();
2010 QTAILQ_FOREACH(bs, &bdrv_states, list) {
2011 obj = bdrv_info_stats_bs(bs);
2012 qlist_append_obj(devices, obj);
2015 *ret_data = QOBJECT(devices);
2018 const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
2020 if (bs->backing_hd && bs->backing_hd->encrypted)
2021 return bs->backing_file;
2022 else if (bs->encrypted)
2023 return bs->filename;
2024 else
2025 return NULL;
2028 void bdrv_get_backing_filename(BlockDriverState *bs,
2029 char *filename, int filename_size)
2031 pstrcpy(filename, filename_size, bs->backing_file);
2034 int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
2035 const uint8_t *buf, int nb_sectors)
2037 BlockDriver *drv = bs->drv;
2038 if (!drv)
2039 return -ENOMEDIUM;
2040 if (!drv->bdrv_write_compressed)
2041 return -ENOTSUP;
2042 if (bdrv_check_request(bs, sector_num, nb_sectors))
2043 return -EIO;
2045 if (bs->dirty_bitmap) {
2046 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
2049 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
2052 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2054 BlockDriver *drv = bs->drv;
2055 if (!drv)
2056 return -ENOMEDIUM;
2057 if (!drv->bdrv_get_info)
2058 return -ENOTSUP;
2059 memset(bdi, 0, sizeof(*bdi));
2060 return drv->bdrv_get_info(bs, bdi);
2063 int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
2064 int64_t pos, int size)
2066 BlockDriver *drv = bs->drv;
2067 if (!drv)
2068 return -ENOMEDIUM;
2069 if (drv->bdrv_save_vmstate)
2070 return drv->bdrv_save_vmstate(bs, buf, pos, size);
2071 if (bs->file)
2072 return bdrv_save_vmstate(bs->file, buf, pos, size);
2073 return -ENOTSUP;
2076 int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
2077 int64_t pos, int size)
2079 BlockDriver *drv = bs->drv;
2080 if (!drv)
2081 return -ENOMEDIUM;
2082 if (drv->bdrv_load_vmstate)
2083 return drv->bdrv_load_vmstate(bs, buf, pos, size);
2084 if (bs->file)
2085 return bdrv_load_vmstate(bs->file, buf, pos, size);
2086 return -ENOTSUP;
2089 void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
2091 BlockDriver *drv = bs->drv;
2093 if (!drv || !drv->bdrv_debug_event) {
2094 return;
2097 return drv->bdrv_debug_event(bs, event);
2101 /**************************************************************/
2102 /* handling of snapshots */
2104 int bdrv_can_snapshot(BlockDriverState *bs)
2106 BlockDriver *drv = bs->drv;
2107 if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
2108 return 0;
2111 if (!drv->bdrv_snapshot_create) {
2112 if (bs->file != NULL) {
2113 return bdrv_can_snapshot(bs->file);
2115 return 0;
2118 return 1;
2121 int bdrv_is_snapshot(BlockDriverState *bs)
2123 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
2126 BlockDriverState *bdrv_snapshots(void)
2128 BlockDriverState *bs;
2130 if (bs_snapshots) {
2131 return bs_snapshots;
2134 bs = NULL;
2135 while ((bs = bdrv_next(bs))) {
2136 if (bdrv_can_snapshot(bs)) {
2137 bs_snapshots = bs;
2138 return bs;
2141 return NULL;
2144 int bdrv_snapshot_create(BlockDriverState *bs,
2145 QEMUSnapshotInfo *sn_info)
2147 BlockDriver *drv = bs->drv;
2148 if (!drv)
2149 return -ENOMEDIUM;
2150 if (drv->bdrv_snapshot_create)
2151 return drv->bdrv_snapshot_create(bs, sn_info);
2152 if (bs->file)
2153 return bdrv_snapshot_create(bs->file, sn_info);
2154 return -ENOTSUP;
2157 int bdrv_snapshot_goto(BlockDriverState *bs,
2158 const char *snapshot_id)
2160 BlockDriver *drv = bs->drv;
2161 int ret, open_ret;
2163 if (!drv)
2164 return -ENOMEDIUM;
2165 if (drv->bdrv_snapshot_goto)
2166 return drv->bdrv_snapshot_goto(bs, snapshot_id);
2168 if (bs->file) {
2169 drv->bdrv_close(bs);
2170 ret = bdrv_snapshot_goto(bs->file, snapshot_id);
2171 open_ret = drv->bdrv_open(bs, bs->open_flags);
2172 if (open_ret < 0) {
2173 bdrv_delete(bs->file);
2174 bs->drv = NULL;
2175 return open_ret;
2177 return ret;
2180 return -ENOTSUP;
2183 int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
2185 BlockDriver *drv = bs->drv;
2186 if (!drv)
2187 return -ENOMEDIUM;
2188 if (drv->bdrv_snapshot_delete)
2189 return drv->bdrv_snapshot_delete(bs, snapshot_id);
2190 if (bs->file)
2191 return bdrv_snapshot_delete(bs->file, snapshot_id);
2192 return -ENOTSUP;
2195 int bdrv_snapshot_list(BlockDriverState *bs,
2196 QEMUSnapshotInfo **psn_info)
2198 BlockDriver *drv = bs->drv;
2199 if (!drv)
2200 return -ENOMEDIUM;
2201 if (drv->bdrv_snapshot_list)
2202 return drv->bdrv_snapshot_list(bs, psn_info);
2203 if (bs->file)
2204 return bdrv_snapshot_list(bs->file, psn_info);
2205 return -ENOTSUP;
2208 int bdrv_snapshot_load_tmp(BlockDriverState *bs,
2209 const char *snapshot_name)
2211 BlockDriver *drv = bs->drv;
2212 if (!drv) {
2213 return -ENOMEDIUM;
2215 if (!bs->read_only) {
2216 return -EINVAL;
2218 if (drv->bdrv_snapshot_load_tmp) {
2219 return drv->bdrv_snapshot_load_tmp(bs, snapshot_name);
2221 return -ENOTSUP;
2224 #define NB_SUFFIXES 4
2226 char *get_human_readable_size(char *buf, int buf_size, int64_t size)
2228 static const char suffixes[NB_SUFFIXES] = "KMGT";
2229 int64_t base;
2230 int i;
2232 if (size <= 999) {
2233 snprintf(buf, buf_size, "%" PRId64, size);
2234 } else {
2235 base = 1024;
2236 for(i = 0; i < NB_SUFFIXES; i++) {
2237 if (size < (10 * base)) {
2238 snprintf(buf, buf_size, "%0.1f%c",
2239 (double)size / base,
2240 suffixes[i]);
2241 break;
2242 } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
2243 snprintf(buf, buf_size, "%" PRId64 "%c",
2244 ((size + (base >> 1)) / base),
2245 suffixes[i]);
2246 break;
2248 base = base * 1024;
2251 return buf;
2254 char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
2256 char buf1[128], date_buf[128], clock_buf[128];
2257 #ifdef _WIN32
2258 struct tm *ptm;
2259 #else
2260 struct tm tm;
2261 #endif
2262 time_t ti;
2263 int64_t secs;
2265 if (!sn) {
2266 snprintf(buf, buf_size,
2267 "%-10s%-20s%7s%20s%15s",
2268 "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
2269 } else {
2270 ti = sn->date_sec;
2271 #ifdef _WIN32
2272 ptm = localtime(&ti);
2273 strftime(date_buf, sizeof(date_buf),
2274 "%Y-%m-%d %H:%M:%S", ptm);
2275 #else
2276 localtime_r(&ti, &tm);
2277 strftime(date_buf, sizeof(date_buf),
2278 "%Y-%m-%d %H:%M:%S", &tm);
2279 #endif
2280 secs = sn->vm_clock_nsec / 1000000000;
2281 snprintf(clock_buf, sizeof(clock_buf),
2282 "%02d:%02d:%02d.%03d",
2283 (int)(secs / 3600),
2284 (int)((secs / 60) % 60),
2285 (int)(secs % 60),
2286 (int)((sn->vm_clock_nsec / 1000000) % 1000));
2287 snprintf(buf, buf_size,
2288 "%-10s%-20s%7s%20s%15s",
2289 sn->id_str, sn->name,
2290 get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
2291 date_buf,
2292 clock_buf);
2294 return buf;
2297 /**************************************************************/
2298 /* async I/Os */
2300 BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
2301 QEMUIOVector *qiov, int nb_sectors,
2302 BlockDriverCompletionFunc *cb, void *opaque)
2304 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
2306 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
2307 cb, opaque, false);
2310 BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
2311 QEMUIOVector *qiov, int nb_sectors,
2312 BlockDriverCompletionFunc *cb, void *opaque)
2314 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
2316 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
2317 cb, opaque, true);
2321 typedef struct MultiwriteCB {
2322 int error;
2323 int num_requests;
2324 int num_callbacks;
2325 struct {
2326 BlockDriverCompletionFunc *cb;
2327 void *opaque;
2328 QEMUIOVector *free_qiov;
2329 void *free_buf;
2330 } callbacks[];
2331 } MultiwriteCB;
2333 static void multiwrite_user_cb(MultiwriteCB *mcb)
2335 int i;
2337 for (i = 0; i < mcb->num_callbacks; i++) {
2338 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
2339 if (mcb->callbacks[i].free_qiov) {
2340 qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
2342 g_free(mcb->callbacks[i].free_qiov);
2343 qemu_vfree(mcb->callbacks[i].free_buf);
2347 static void multiwrite_cb(void *opaque, int ret)
2349 MultiwriteCB *mcb = opaque;
2351 trace_multiwrite_cb(mcb, ret);
2353 if (ret < 0 && !mcb->error) {
2354 mcb->error = ret;
2357 mcb->num_requests--;
2358 if (mcb->num_requests == 0) {
2359 multiwrite_user_cb(mcb);
2360 g_free(mcb);
2364 static int multiwrite_req_compare(const void *a, const void *b)
2366 const BlockRequest *req1 = a, *req2 = b;
2369 * Note that we can't simply subtract req2->sector from req1->sector
2370 * here as that could overflow the return value.
2372 if (req1->sector > req2->sector) {
2373 return 1;
2374 } else if (req1->sector < req2->sector) {
2375 return -1;
2376 } else {
2377 return 0;
2382 * Takes a bunch of requests and tries to merge them. Returns the number of
2383 * requests that remain after merging.
2385 static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
2386 int num_reqs, MultiwriteCB *mcb)
2388 int i, outidx;
2390 // Sort requests by start sector
2391 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
2393 // Check if adjacent requests touch the same clusters. If so, combine them,
2394 // filling up gaps with zero sectors.
2395 outidx = 0;
2396 for (i = 1; i < num_reqs; i++) {
2397 int merge = 0;
2398 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
2400 // This handles the cases that are valid for all block drivers, namely
2401 // exactly sequential writes and overlapping writes.
2402 if (reqs[i].sector <= oldreq_last) {
2403 merge = 1;
2406 // The block driver may decide that it makes sense to combine requests
2407 // even if there is a gap of some sectors between them. In this case,
2408 // the gap is filled with zeros (therefore only applicable for yet
2409 // unused space in format like qcow2).
2410 if (!merge && bs->drv->bdrv_merge_requests) {
2411 merge = bs->drv->bdrv_merge_requests(bs, &reqs[outidx], &reqs[i]);
2414 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
2415 merge = 0;
2418 if (merge) {
2419 size_t size;
2420 QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
2421 qemu_iovec_init(qiov,
2422 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
2424 // Add the first request to the merged one. If the requests are
2425 // overlapping, drop the last sectors of the first request.
2426 size = (reqs[i].sector - reqs[outidx].sector) << 9;
2427 qemu_iovec_concat(qiov, reqs[outidx].qiov, size);
2429 // We might need to add some zeros between the two requests
2430 if (reqs[i].sector > oldreq_last) {
2431 size_t zero_bytes = (reqs[i].sector - oldreq_last) << 9;
2432 uint8_t *buf = qemu_blockalign(bs, zero_bytes);
2433 memset(buf, 0, zero_bytes);
2434 qemu_iovec_add(qiov, buf, zero_bytes);
2435 mcb->callbacks[i].free_buf = buf;
2438 // Add the second request
2439 qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size);
2441 reqs[outidx].nb_sectors = qiov->size >> 9;
2442 reqs[outidx].qiov = qiov;
2444 mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
2445 } else {
2446 outidx++;
2447 reqs[outidx].sector = reqs[i].sector;
2448 reqs[outidx].nb_sectors = reqs[i].nb_sectors;
2449 reqs[outidx].qiov = reqs[i].qiov;
2453 return outidx + 1;
2457 * Submit multiple AIO write requests at once.
2459 * On success, the function returns 0 and all requests in the reqs array have
2460 * been submitted. In error case this function returns -1, and any of the
2461 * requests may or may not be submitted yet. In particular, this means that the
2462 * callback will be called for some of the requests, for others it won't. The
2463 * caller must check the error field of the BlockRequest to wait for the right
2464 * callbacks (if error != 0, no callback will be called).
2466 * The implementation may modify the contents of the reqs array, e.g. to merge
2467 * requests. However, the fields opaque and error are left unmodified as they
2468 * are used to signal failure for a single request to the caller.
2470 int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
2472 BlockDriverAIOCB *acb;
2473 MultiwriteCB *mcb;
2474 int i;
2476 /* don't submit writes if we don't have a medium */
2477 if (bs->drv == NULL) {
2478 for (i = 0; i < num_reqs; i++) {
2479 reqs[i].error = -ENOMEDIUM;
2481 return -1;
2484 if (num_reqs == 0) {
2485 return 0;
2488 // Create MultiwriteCB structure
2489 mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
2490 mcb->num_requests = 0;
2491 mcb->num_callbacks = num_reqs;
2493 for (i = 0; i < num_reqs; i++) {
2494 mcb->callbacks[i].cb = reqs[i].cb;
2495 mcb->callbacks[i].opaque = reqs[i].opaque;
2498 // Check for mergable requests
2499 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
2501 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
2504 * Run the aio requests. As soon as one request can't be submitted
2505 * successfully, fail all requests that are not yet submitted (we must
2506 * return failure for all requests anyway)
2508 * num_requests cannot be set to the right value immediately: If
2509 * bdrv_aio_writev fails for some request, num_requests would be too high
2510 * and therefore multiwrite_cb() would never recognize the multiwrite
2511 * request as completed. We also cannot use the loop variable i to set it
2512 * when the first request fails because the callback may already have been
2513 * called for previously submitted requests. Thus, num_requests must be
2514 * incremented for each request that is submitted.
2516 * The problem that callbacks may be called early also means that we need
2517 * to take care that num_requests doesn't become 0 before all requests are
2518 * submitted - multiwrite_cb() would consider the multiwrite request
2519 * completed. A dummy request that is "completed" by a manual call to
2520 * multiwrite_cb() takes care of this.
2522 mcb->num_requests = 1;
2524 // Run the aio requests
2525 for (i = 0; i < num_reqs; i++) {
2526 mcb->num_requests++;
2527 acb = bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
2528 reqs[i].nb_sectors, multiwrite_cb, mcb);
2530 if (acb == NULL) {
2531 // We can only fail the whole thing if no request has been
2532 // submitted yet. Otherwise we'll wait for the submitted AIOs to
2533 // complete and report the error in the callback.
2534 if (i == 0) {
2535 trace_bdrv_aio_multiwrite_earlyfail(mcb);
2536 goto fail;
2537 } else {
2538 trace_bdrv_aio_multiwrite_latefail(mcb, i);
2539 multiwrite_cb(mcb, -EIO);
2540 break;
2545 /* Complete the dummy request */
2546 multiwrite_cb(mcb, 0);
2548 return 0;
2550 fail:
2551 for (i = 0; i < mcb->num_callbacks; i++) {
2552 reqs[i].error = -EIO;
2554 g_free(mcb);
2555 return -1;
2558 void bdrv_aio_cancel(BlockDriverAIOCB *acb)
2560 acb->pool->cancel(acb);
2564 /**************************************************************/
2565 /* async block device emulation */
2567 typedef struct BlockDriverAIOCBSync {
2568 BlockDriverAIOCB common;
2569 QEMUBH *bh;
2570 int ret;
2571 /* vector translation state */
2572 QEMUIOVector *qiov;
2573 uint8_t *bounce;
2574 int is_write;
2575 } BlockDriverAIOCBSync;
2577 static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
2579 BlockDriverAIOCBSync *acb =
2580 container_of(blockacb, BlockDriverAIOCBSync, common);
2581 qemu_bh_delete(acb->bh);
2582 acb->bh = NULL;
2583 qemu_aio_release(acb);
2586 static AIOPool bdrv_em_aio_pool = {
2587 .aiocb_size = sizeof(BlockDriverAIOCBSync),
2588 .cancel = bdrv_aio_cancel_em,
2591 static void bdrv_aio_bh_cb(void *opaque)
2593 BlockDriverAIOCBSync *acb = opaque;
2595 if (!acb->is_write)
2596 qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
2597 qemu_vfree(acb->bounce);
2598 acb->common.cb(acb->common.opaque, acb->ret);
2599 qemu_bh_delete(acb->bh);
2600 acb->bh = NULL;
2601 qemu_aio_release(acb);
2604 static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
2605 int64_t sector_num,
2606 QEMUIOVector *qiov,
2607 int nb_sectors,
2608 BlockDriverCompletionFunc *cb,
2609 void *opaque,
2610 int is_write)
2613 BlockDriverAIOCBSync *acb;
2615 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2616 acb->is_write = is_write;
2617 acb->qiov = qiov;
2618 acb->bounce = qemu_blockalign(bs, qiov->size);
2620 if (!acb->bh)
2621 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2623 if (is_write) {
2624 qemu_iovec_to_buffer(acb->qiov, acb->bounce);
2625 acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
2626 } else {
2627 acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
2630 qemu_bh_schedule(acb->bh);
2632 return &acb->common;
2635 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
2636 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2637 BlockDriverCompletionFunc *cb, void *opaque)
2639 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
2642 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
2643 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2644 BlockDriverCompletionFunc *cb, void *opaque)
2646 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
2650 typedef struct BlockDriverAIOCBCoroutine {
2651 BlockDriverAIOCB common;
2652 BlockRequest req;
2653 bool is_write;
2654 QEMUBH* bh;
2655 } BlockDriverAIOCBCoroutine;
2657 static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
2659 qemu_aio_flush();
2662 static AIOPool bdrv_em_co_aio_pool = {
2663 .aiocb_size = sizeof(BlockDriverAIOCBCoroutine),
2664 .cancel = bdrv_aio_co_cancel_em,
2667 static void bdrv_co_em_bh(void *opaque)
2669 BlockDriverAIOCBCoroutine *acb = opaque;
2671 acb->common.cb(acb->common.opaque, acb->req.error);
2672 qemu_bh_delete(acb->bh);
2673 qemu_aio_release(acb);
2676 /* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
2677 static void coroutine_fn bdrv_co_do_rw(void *opaque)
2679 BlockDriverAIOCBCoroutine *acb = opaque;
2680 BlockDriverState *bs = acb->common.bs;
2682 if (!acb->is_write) {
2683 acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
2684 acb->req.nb_sectors, acb->req.qiov);
2685 } else {
2686 acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
2687 acb->req.nb_sectors, acb->req.qiov);
2690 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
2691 qemu_bh_schedule(acb->bh);
2694 static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
2695 int64_t sector_num,
2696 QEMUIOVector *qiov,
2697 int nb_sectors,
2698 BlockDriverCompletionFunc *cb,
2699 void *opaque,
2700 bool is_write)
2702 Coroutine *co;
2703 BlockDriverAIOCBCoroutine *acb;
2705 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
2706 acb->req.sector = sector_num;
2707 acb->req.nb_sectors = nb_sectors;
2708 acb->req.qiov = qiov;
2709 acb->is_write = is_write;
2711 co = qemu_coroutine_create(bdrv_co_do_rw);
2712 qemu_coroutine_enter(co, acb);
2714 return &acb->common;
2717 static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
2719 BlockDriverAIOCBCoroutine *acb = opaque;
2720 BlockDriverState *bs = acb->common.bs;
2722 acb->req.error = bdrv_co_flush(bs);
2723 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
2724 qemu_bh_schedule(acb->bh);
2727 BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
2728 BlockDriverCompletionFunc *cb, void *opaque)
2730 trace_bdrv_aio_flush(bs, opaque);
2732 Coroutine *co;
2733 BlockDriverAIOCBCoroutine *acb;
2735 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
2736 co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
2737 qemu_coroutine_enter(co, acb);
2739 return &acb->common;
2742 static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
2744 BlockDriverAIOCBCoroutine *acb = opaque;
2745 BlockDriverState *bs = acb->common.bs;
2747 acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
2748 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
2749 qemu_bh_schedule(acb->bh);
2752 BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs,
2753 int64_t sector_num, int nb_sectors,
2754 BlockDriverCompletionFunc *cb, void *opaque)
2756 Coroutine *co;
2757 BlockDriverAIOCBCoroutine *acb;
2759 trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
2761 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
2762 acb->req.sector = sector_num;
2763 acb->req.nb_sectors = nb_sectors;
2764 co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
2765 qemu_coroutine_enter(co, acb);
2767 return &acb->common;
2770 void bdrv_init(void)
2772 module_call_init(MODULE_INIT_BLOCK);
2775 void bdrv_init_with_whitelist(void)
2777 use_bdrv_whitelist = 1;
2778 bdrv_init();
2781 void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
2782 BlockDriverCompletionFunc *cb, void *opaque)
2784 BlockDriverAIOCB *acb;
2786 if (pool->free_aiocb) {
2787 acb = pool->free_aiocb;
2788 pool->free_aiocb = acb->next;
2789 } else {
2790 acb = g_malloc0(pool->aiocb_size);
2791 acb->pool = pool;
2793 acb->bs = bs;
2794 acb->cb = cb;
2795 acb->opaque = opaque;
2796 return acb;
2799 void qemu_aio_release(void *p)
2801 BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
2802 AIOPool *pool = acb->pool;
2803 acb->next = pool->free_aiocb;
2804 pool->free_aiocb = acb;
2807 /**************************************************************/
2808 /* Coroutine block device emulation */
2810 typedef struct CoroutineIOCompletion {
2811 Coroutine *coroutine;
2812 int ret;
2813 } CoroutineIOCompletion;
2815 static void bdrv_co_io_em_complete(void *opaque, int ret)
2817 CoroutineIOCompletion *co = opaque;
2819 co->ret = ret;
2820 qemu_coroutine_enter(co->coroutine, NULL);
2823 static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
2824 int nb_sectors, QEMUIOVector *iov,
2825 bool is_write)
2827 CoroutineIOCompletion co = {
2828 .coroutine = qemu_coroutine_self(),
2830 BlockDriverAIOCB *acb;
2832 if (is_write) {
2833 acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
2834 bdrv_co_io_em_complete, &co);
2835 } else {
2836 acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
2837 bdrv_co_io_em_complete, &co);
2840 trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
2841 if (!acb) {
2842 return -EIO;
2844 qemu_coroutine_yield();
2846 return co.ret;
2849 static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
2850 int64_t sector_num, int nb_sectors,
2851 QEMUIOVector *iov)
2853 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
2856 static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
2857 int64_t sector_num, int nb_sectors,
2858 QEMUIOVector *iov)
2860 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
2863 static void coroutine_fn bdrv_flush_co_entry(void *opaque)
2865 RwCo *rwco = opaque;
2867 rwco->ret = bdrv_co_flush(rwco->bs);
2870 int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
2872 if (bs->open_flags & BDRV_O_NO_FLUSH) {
2873 return 0;
2874 } else if (!bs->drv) {
2875 return 0;
2876 } else if (bs->drv->bdrv_co_flush) {
2877 return bs->drv->bdrv_co_flush(bs);
2878 } else if (bs->drv->bdrv_aio_flush) {
2879 BlockDriverAIOCB *acb;
2880 CoroutineIOCompletion co = {
2881 .coroutine = qemu_coroutine_self(),
2884 acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
2885 if (acb == NULL) {
2886 return -EIO;
2887 } else {
2888 qemu_coroutine_yield();
2889 return co.ret;
2891 } else {
2893 * Some block drivers always operate in either writethrough or unsafe
2894 * mode and don't support bdrv_flush therefore. Usually qemu doesn't
2895 * know how the server works (because the behaviour is hardcoded or
2896 * depends on server-side configuration), so we can't ensure that
2897 * everything is safe on disk. Returning an error doesn't work because
2898 * that would break guests even if the server operates in writethrough
2899 * mode.
2901 * Let's hope the user knows what he's doing.
2903 return 0;
2907 int bdrv_flush(BlockDriverState *bs)
2909 Coroutine *co;
2910 RwCo rwco = {
2911 .bs = bs,
2912 .ret = NOT_DONE,
2915 if (qemu_in_coroutine()) {
2916 /* Fast-path if already in coroutine context */
2917 bdrv_flush_co_entry(&rwco);
2918 } else {
2919 co = qemu_coroutine_create(bdrv_flush_co_entry);
2920 qemu_coroutine_enter(co, &rwco);
2921 while (rwco.ret == NOT_DONE) {
2922 qemu_aio_wait();
2926 return rwco.ret;
2929 static void coroutine_fn bdrv_discard_co_entry(void *opaque)
2931 RwCo *rwco = opaque;
2933 rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
2936 int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
2937 int nb_sectors)
2939 if (!bs->drv) {
2940 return -ENOMEDIUM;
2941 } else if (bdrv_check_request(bs, sector_num, nb_sectors)) {
2942 return -EIO;
2943 } else if (bs->read_only) {
2944 return -EROFS;
2945 } else if (bs->drv->bdrv_co_discard) {
2946 return bs->drv->bdrv_co_discard(bs, sector_num, nb_sectors);
2947 } else if (bs->drv->bdrv_aio_discard) {
2948 BlockDriverAIOCB *acb;
2949 CoroutineIOCompletion co = {
2950 .coroutine = qemu_coroutine_self(),
2953 acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
2954 bdrv_co_io_em_complete, &co);
2955 if (acb == NULL) {
2956 return -EIO;
2957 } else {
2958 qemu_coroutine_yield();
2959 return co.ret;
2961 } else {
2962 return 0;
2966 int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
2968 Coroutine *co;
2969 RwCo rwco = {
2970 .bs = bs,
2971 .sector_num = sector_num,
2972 .nb_sectors = nb_sectors,
2973 .ret = NOT_DONE,
2976 if (qemu_in_coroutine()) {
2977 /* Fast-path if already in coroutine context */
2978 bdrv_discard_co_entry(&rwco);
2979 } else {
2980 co = qemu_coroutine_create(bdrv_discard_co_entry);
2981 qemu_coroutine_enter(co, &rwco);
2982 while (rwco.ret == NOT_DONE) {
2983 qemu_aio_wait();
2987 return rwco.ret;
2990 /**************************************************************/
2991 /* removable device support */
2994 * Return TRUE if the media is present
2996 int bdrv_is_inserted(BlockDriverState *bs)
2998 BlockDriver *drv = bs->drv;
3000 if (!drv)
3001 return 0;
3002 if (!drv->bdrv_is_inserted)
3003 return 1;
3004 return drv->bdrv_is_inserted(bs);
3008 * Return whether the media changed since the last call to this
3009 * function, or -ENOTSUP if we don't know. Most drivers don't know.
3011 int bdrv_media_changed(BlockDriverState *bs)
3013 BlockDriver *drv = bs->drv;
3015 if (drv && drv->bdrv_media_changed) {
3016 return drv->bdrv_media_changed(bs);
3018 return -ENOTSUP;
3022 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3024 void bdrv_eject(BlockDriverState *bs, int eject_flag)
3026 BlockDriver *drv = bs->drv;
3028 if (drv && drv->bdrv_eject) {
3029 drv->bdrv_eject(bs, eject_flag);
3034 * Lock or unlock the media (if it is locked, the user won't be able
3035 * to eject it manually).
3037 void bdrv_lock_medium(BlockDriverState *bs, bool locked)
3039 BlockDriver *drv = bs->drv;
3041 trace_bdrv_lock_medium(bs, locked);
3043 if (drv && drv->bdrv_lock_medium) {
3044 drv->bdrv_lock_medium(bs, locked);
3048 /* needed for generic scsi interface */
3050 int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
3052 BlockDriver *drv = bs->drv;
3054 if (drv && drv->bdrv_ioctl)
3055 return drv->bdrv_ioctl(bs, req, buf);
3056 return -ENOTSUP;
3059 BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
3060 unsigned long int req, void *buf,
3061 BlockDriverCompletionFunc *cb, void *opaque)
3063 BlockDriver *drv = bs->drv;
3065 if (drv && drv->bdrv_aio_ioctl)
3066 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
3067 return NULL;
3070 void bdrv_set_buffer_alignment(BlockDriverState *bs, int align)
3072 bs->buffer_alignment = align;
3075 void *qemu_blockalign(BlockDriverState *bs, size_t size)
3077 return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
3080 void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
3082 int64_t bitmap_size;
3084 bs->dirty_count = 0;
3085 if (enable) {
3086 if (!bs->dirty_bitmap) {
3087 bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
3088 BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
3089 bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
3091 bs->dirty_bitmap = g_malloc0(bitmap_size);
3093 } else {
3094 if (bs->dirty_bitmap) {
3095 g_free(bs->dirty_bitmap);
3096 bs->dirty_bitmap = NULL;
3101 int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
3103 int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
3105 if (bs->dirty_bitmap &&
3106 (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
3107 return !!(bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] &
3108 (1UL << (chunk % (sizeof(unsigned long) * 8))));
3109 } else {
3110 return 0;
3114 void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
3115 int nr_sectors)
3117 set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
3120 int64_t bdrv_get_dirty_count(BlockDriverState *bs)
3122 return bs->dirty_count;
3125 void bdrv_set_in_use(BlockDriverState *bs, int in_use)
3127 assert(bs->in_use != in_use);
3128 bs->in_use = in_use;
3131 int bdrv_in_use(BlockDriverState *bs)
3133 return bs->in_use;
3136 void bdrv_iostatus_enable(BlockDriverState *bs)
3138 bs->iostatus = BDRV_IOS_OK;
3141 /* The I/O status is only enabled if the drive explicitly
3142 * enables it _and_ the VM is configured to stop on errors */
3143 bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
3145 return (bs->iostatus != BDRV_IOS_INVAL &&
3146 (bs->on_write_error == BLOCK_ERR_STOP_ENOSPC ||
3147 bs->on_write_error == BLOCK_ERR_STOP_ANY ||
3148 bs->on_read_error == BLOCK_ERR_STOP_ANY));
3151 void bdrv_iostatus_disable(BlockDriverState *bs)
3153 bs->iostatus = BDRV_IOS_INVAL;
3156 void bdrv_iostatus_reset(BlockDriverState *bs)
3158 if (bdrv_iostatus_is_enabled(bs)) {
3159 bs->iostatus = BDRV_IOS_OK;
3163 /* XXX: Today this is set by device models because it makes the implementation
3164 quite simple. However, the block layer knows about the error, so it's
3165 possible to implement this without device models being involved */
3166 void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
3168 if (bdrv_iostatus_is_enabled(bs) && bs->iostatus == BDRV_IOS_OK) {
3169 assert(error >= 0);
3170 bs->iostatus = error == ENOSPC ? BDRV_IOS_ENOSPC : BDRV_IOS_FAILED;
3174 void
3175 bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
3176 enum BlockAcctType type)
3178 assert(type < BDRV_MAX_IOTYPE);
3180 cookie->bytes = bytes;
3181 cookie->start_time_ns = get_clock();
3182 cookie->type = type;
3185 void
3186 bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie)
3188 assert(cookie->type < BDRV_MAX_IOTYPE);
3190 bs->nr_bytes[cookie->type] += cookie->bytes;
3191 bs->nr_ops[cookie->type]++;
3192 bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
3195 int bdrv_img_create(const char *filename, const char *fmt,
3196 const char *base_filename, const char *base_fmt,
3197 char *options, uint64_t img_size, int flags)
3199 QEMUOptionParameter *param = NULL, *create_options = NULL;
3200 QEMUOptionParameter *backing_fmt, *backing_file, *size;
3201 BlockDriverState *bs = NULL;
3202 BlockDriver *drv, *proto_drv;
3203 BlockDriver *backing_drv = NULL;
3204 int ret = 0;
3206 /* Find driver and parse its options */
3207 drv = bdrv_find_format(fmt);
3208 if (!drv) {
3209 error_report("Unknown file format '%s'", fmt);
3210 ret = -EINVAL;
3211 goto out;
3214 proto_drv = bdrv_find_protocol(filename);
3215 if (!proto_drv) {
3216 error_report("Unknown protocol '%s'", filename);
3217 ret = -EINVAL;
3218 goto out;
3221 create_options = append_option_parameters(create_options,
3222 drv->create_options);
3223 create_options = append_option_parameters(create_options,
3224 proto_drv->create_options);
3226 /* Create parameter list with default values */
3227 param = parse_option_parameters("", create_options, param);
3229 set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
3231 /* Parse -o options */
3232 if (options) {
3233 param = parse_option_parameters(options, create_options, param);
3234 if (param == NULL) {
3235 error_report("Invalid options for file format '%s'.", fmt);
3236 ret = -EINVAL;
3237 goto out;
3241 if (base_filename) {
3242 if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
3243 base_filename)) {
3244 error_report("Backing file not supported for file format '%s'",
3245 fmt);
3246 ret = -EINVAL;
3247 goto out;
3251 if (base_fmt) {
3252 if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
3253 error_report("Backing file format not supported for file "
3254 "format '%s'", fmt);
3255 ret = -EINVAL;
3256 goto out;
3260 backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
3261 if (backing_file && backing_file->value.s) {
3262 if (!strcmp(filename, backing_file->value.s)) {
3263 error_report("Error: Trying to create an image with the "
3264 "same filename as the backing file");
3265 ret = -EINVAL;
3266 goto out;
3270 backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
3271 if (backing_fmt && backing_fmt->value.s) {
3272 backing_drv = bdrv_find_format(backing_fmt->value.s);
3273 if (!backing_drv) {
3274 error_report("Unknown backing file format '%s'",
3275 backing_fmt->value.s);
3276 ret = -EINVAL;
3277 goto out;
3281 // The size for the image must always be specified, with one exception:
3282 // If we are using a backing file, we can obtain the size from there
3283 size = get_option_parameter(param, BLOCK_OPT_SIZE);
3284 if (size && size->value.n == -1) {
3285 if (backing_file && backing_file->value.s) {
3286 uint64_t size;
3287 char buf[32];
3289 bs = bdrv_new("");
3291 ret = bdrv_open(bs, backing_file->value.s, flags, backing_drv);
3292 if (ret < 0) {
3293 error_report("Could not open '%s'", backing_file->value.s);
3294 goto out;
3296 bdrv_get_geometry(bs, &size);
3297 size *= 512;
3299 snprintf(buf, sizeof(buf), "%" PRId64, size);
3300 set_option_parameter(param, BLOCK_OPT_SIZE, buf);
3301 } else {
3302 error_report("Image creation needs a size parameter");
3303 ret = -EINVAL;
3304 goto out;
3308 printf("Formatting '%s', fmt=%s ", filename, fmt);
3309 print_option_parameters(param);
3310 puts("");
3312 ret = bdrv_create(drv, filename, param);
3314 if (ret < 0) {
3315 if (ret == -ENOTSUP) {
3316 error_report("Formatting or formatting option not supported for "
3317 "file format '%s'", fmt);
3318 } else if (ret == -EFBIG) {
3319 error_report("The image size is too large for file format '%s'",
3320 fmt);
3321 } else {
3322 error_report("%s: error while creating %s: %s", filename, fmt,
3323 strerror(-ret));
3327 out:
3328 free_option_parameters(create_options);
3329 free_option_parameters(param);
3331 if (bs) {
3332 bdrv_delete(bs);
3335 return ret;