block: directly invoke .bdrv_* from emulation functions
[qemu.git] / block.c
blob7c01f72f99c02fe9f5e650b361d21068748fa65f
1 /*
2 * QEMU System Emulator block driver
4 * Copyright (c) 2003 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
24 #include "config-host.h"
25 #include "qemu-common.h"
26 #include "trace.h"
27 #include "monitor.h"
28 #include "block_int.h"
29 #include "module.h"
30 #include "qemu-objects.h"
31 #include "qemu-coroutine.h"
33 #ifdef CONFIG_BSD
34 #include <sys/types.h>
35 #include <sys/stat.h>
36 #include <sys/ioctl.h>
37 #include <sys/queue.h>
38 #ifndef __DragonFly__
39 #include <sys/disk.h>
40 #endif
41 #endif
43 #ifdef _WIN32
44 #include <windows.h>
45 #endif
47 static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
48 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
49 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
50 BlockDriverCompletionFunc *cb, void *opaque);
51 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
52 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
53 BlockDriverCompletionFunc *cb, void *opaque);
54 static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
55 BlockDriverCompletionFunc *cb, void *opaque);
56 static BlockDriverAIOCB *bdrv_aio_noop_em(BlockDriverState *bs,
57 BlockDriverCompletionFunc *cb, void *opaque);
58 static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
59 uint8_t *buf, int nb_sectors);
60 static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
61 const uint8_t *buf, int nb_sectors);
62 static BlockDriverAIOCB *bdrv_co_aio_readv_em(BlockDriverState *bs,
63 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
64 BlockDriverCompletionFunc *cb, void *opaque);
65 static BlockDriverAIOCB *bdrv_co_aio_writev_em(BlockDriverState *bs,
66 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
67 BlockDriverCompletionFunc *cb, void *opaque);
68 static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
69 int64_t sector_num, int nb_sectors,
70 QEMUIOVector *iov);
71 static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
72 int64_t sector_num, int nb_sectors,
73 QEMUIOVector *iov);
74 static int coroutine_fn bdrv_co_flush_em(BlockDriverState *bs);
76 static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
77 QTAILQ_HEAD_INITIALIZER(bdrv_states);
79 static QLIST_HEAD(, BlockDriver) bdrv_drivers =
80 QLIST_HEAD_INITIALIZER(bdrv_drivers);
82 /* The device to use for VM snapshots */
83 static BlockDriverState *bs_snapshots;
85 /* If non-zero, use only whitelisted block drivers */
86 static int use_bdrv_whitelist;
88 #ifdef _WIN32
89 static int is_windows_drive_prefix(const char *filename)
91 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
92 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
93 filename[1] == ':');
96 int is_windows_drive(const char *filename)
98 if (is_windows_drive_prefix(filename) &&
99 filename[2] == '\0')
100 return 1;
101 if (strstart(filename, "\\\\.\\", NULL) ||
102 strstart(filename, "//./", NULL))
103 return 1;
104 return 0;
106 #endif
108 /* check if the path starts with "<protocol>:" */
109 static int path_has_protocol(const char *path)
111 #ifdef _WIN32
112 if (is_windows_drive(path) ||
113 is_windows_drive_prefix(path)) {
114 return 0;
116 #endif
118 return strchr(path, ':') != NULL;
121 int path_is_absolute(const char *path)
123 const char *p;
124 #ifdef _WIN32
125 /* specific case for names like: "\\.\d:" */
126 if (*path == '/' || *path == '\\')
127 return 1;
128 #endif
129 p = strchr(path, ':');
130 if (p)
131 p++;
132 else
133 p = path;
134 #ifdef _WIN32
135 return (*p == '/' || *p == '\\');
136 #else
137 return (*p == '/');
138 #endif
141 /* if filename is absolute, just copy it to dest. Otherwise, build a
142 path to it by considering it is relative to base_path. URL are
143 supported. */
144 void path_combine(char *dest, int dest_size,
145 const char *base_path,
146 const char *filename)
148 const char *p, *p1;
149 int len;
151 if (dest_size <= 0)
152 return;
153 if (path_is_absolute(filename)) {
154 pstrcpy(dest, dest_size, filename);
155 } else {
156 p = strchr(base_path, ':');
157 if (p)
158 p++;
159 else
160 p = base_path;
161 p1 = strrchr(base_path, '/');
162 #ifdef _WIN32
164 const char *p2;
165 p2 = strrchr(base_path, '\\');
166 if (!p1 || p2 > p1)
167 p1 = p2;
169 #endif
170 if (p1)
171 p1++;
172 else
173 p1 = base_path;
174 if (p1 > p)
175 p = p1;
176 len = p - base_path;
177 if (len > dest_size - 1)
178 len = dest_size - 1;
179 memcpy(dest, base_path, len);
180 dest[len] = '\0';
181 pstrcat(dest, dest_size, filename);
185 void bdrv_register(BlockDriver *bdrv)
187 if (bdrv->bdrv_co_readv) {
188 /* Emulate AIO by coroutines, and sync by AIO */
189 bdrv->bdrv_aio_readv = bdrv_co_aio_readv_em;
190 bdrv->bdrv_aio_writev = bdrv_co_aio_writev_em;
191 bdrv->bdrv_read = bdrv_read_em;
192 bdrv->bdrv_write = bdrv_write_em;
193 } else {
194 bdrv->bdrv_co_readv = bdrv_co_readv_em;
195 bdrv->bdrv_co_writev = bdrv_co_writev_em;
197 if (!bdrv->bdrv_aio_readv) {
198 /* add AIO emulation layer */
199 bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
200 bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
201 } else if (!bdrv->bdrv_read) {
202 /* add synchronous IO emulation layer */
203 bdrv->bdrv_read = bdrv_read_em;
204 bdrv->bdrv_write = bdrv_write_em;
208 if (!bdrv->bdrv_aio_flush)
209 bdrv->bdrv_aio_flush = bdrv_aio_flush_em;
211 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
214 /* create a new block device (by default it is empty) */
215 BlockDriverState *bdrv_new(const char *device_name)
217 BlockDriverState *bs;
219 bs = g_malloc0(sizeof(BlockDriverState));
220 pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
221 if (device_name[0] != '\0') {
222 QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
224 bdrv_iostatus_disable(bs);
225 return bs;
228 BlockDriver *bdrv_find_format(const char *format_name)
230 BlockDriver *drv1;
231 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
232 if (!strcmp(drv1->format_name, format_name)) {
233 return drv1;
236 return NULL;
239 static int bdrv_is_whitelisted(BlockDriver *drv)
241 static const char *whitelist[] = {
242 CONFIG_BDRV_WHITELIST
244 const char **p;
246 if (!whitelist[0])
247 return 1; /* no whitelist, anything goes */
249 for (p = whitelist; *p; p++) {
250 if (!strcmp(drv->format_name, *p)) {
251 return 1;
254 return 0;
257 BlockDriver *bdrv_find_whitelisted_format(const char *format_name)
259 BlockDriver *drv = bdrv_find_format(format_name);
260 return drv && bdrv_is_whitelisted(drv) ? drv : NULL;
263 int bdrv_create(BlockDriver *drv, const char* filename,
264 QEMUOptionParameter *options)
266 if (!drv->bdrv_create)
267 return -ENOTSUP;
269 return drv->bdrv_create(filename, options);
272 int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
274 BlockDriver *drv;
276 drv = bdrv_find_protocol(filename);
277 if (drv == NULL) {
278 return -ENOENT;
281 return bdrv_create(drv, filename, options);
284 #ifdef _WIN32
285 void get_tmp_filename(char *filename, int size)
287 char temp_dir[MAX_PATH];
289 GetTempPath(MAX_PATH, temp_dir);
290 GetTempFileName(temp_dir, "qem", 0, filename);
292 #else
293 void get_tmp_filename(char *filename, int size)
295 int fd;
296 const char *tmpdir;
297 /* XXX: race condition possible */
298 tmpdir = getenv("TMPDIR");
299 if (!tmpdir)
300 tmpdir = "/tmp";
301 snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
302 fd = mkstemp(filename);
303 close(fd);
305 #endif
308 * Detect host devices. By convention, /dev/cdrom[N] is always
309 * recognized as a host CDROM.
311 static BlockDriver *find_hdev_driver(const char *filename)
313 int score_max = 0, score;
314 BlockDriver *drv = NULL, *d;
316 QLIST_FOREACH(d, &bdrv_drivers, list) {
317 if (d->bdrv_probe_device) {
318 score = d->bdrv_probe_device(filename);
319 if (score > score_max) {
320 score_max = score;
321 drv = d;
326 return drv;
329 BlockDriver *bdrv_find_protocol(const char *filename)
331 BlockDriver *drv1;
332 char protocol[128];
333 int len;
334 const char *p;
336 /* TODO Drivers without bdrv_file_open must be specified explicitly */
339 * XXX(hch): we really should not let host device detection
340 * override an explicit protocol specification, but moving this
341 * later breaks access to device names with colons in them.
342 * Thanks to the brain-dead persistent naming schemes on udev-
343 * based Linux systems those actually are quite common.
345 drv1 = find_hdev_driver(filename);
346 if (drv1) {
347 return drv1;
350 if (!path_has_protocol(filename)) {
351 return bdrv_find_format("file");
353 p = strchr(filename, ':');
354 assert(p != NULL);
355 len = p - filename;
356 if (len > sizeof(protocol) - 1)
357 len = sizeof(protocol) - 1;
358 memcpy(protocol, filename, len);
359 protocol[len] = '\0';
360 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
361 if (drv1->protocol_name &&
362 !strcmp(drv1->protocol_name, protocol)) {
363 return drv1;
366 return NULL;
369 static int find_image_format(const char *filename, BlockDriver **pdrv)
371 int ret, score, score_max;
372 BlockDriver *drv1, *drv;
373 uint8_t buf[2048];
374 BlockDriverState *bs;
376 ret = bdrv_file_open(&bs, filename, 0);
377 if (ret < 0) {
378 *pdrv = NULL;
379 return ret;
382 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
383 if (bs->sg || !bdrv_is_inserted(bs)) {
384 bdrv_delete(bs);
385 drv = bdrv_find_format("raw");
386 if (!drv) {
387 ret = -ENOENT;
389 *pdrv = drv;
390 return ret;
393 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
394 bdrv_delete(bs);
395 if (ret < 0) {
396 *pdrv = NULL;
397 return ret;
400 score_max = 0;
401 drv = NULL;
402 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
403 if (drv1->bdrv_probe) {
404 score = drv1->bdrv_probe(buf, ret, filename);
405 if (score > score_max) {
406 score_max = score;
407 drv = drv1;
411 if (!drv) {
412 ret = -ENOENT;
414 *pdrv = drv;
415 return ret;
419 * Set the current 'total_sectors' value
421 static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
423 BlockDriver *drv = bs->drv;
425 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
426 if (bs->sg)
427 return 0;
429 /* query actual device if possible, otherwise just trust the hint */
430 if (drv->bdrv_getlength) {
431 int64_t length = drv->bdrv_getlength(bs);
432 if (length < 0) {
433 return length;
435 hint = length >> BDRV_SECTOR_BITS;
438 bs->total_sectors = hint;
439 return 0;
443 * Set open flags for a given cache mode
445 * Return 0 on success, -1 if the cache mode was invalid.
447 int bdrv_parse_cache_flags(const char *mode, int *flags)
449 *flags &= ~BDRV_O_CACHE_MASK;
451 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
452 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
453 } else if (!strcmp(mode, "directsync")) {
454 *flags |= BDRV_O_NOCACHE;
455 } else if (!strcmp(mode, "writeback")) {
456 *flags |= BDRV_O_CACHE_WB;
457 } else if (!strcmp(mode, "unsafe")) {
458 *flags |= BDRV_O_CACHE_WB;
459 *flags |= BDRV_O_NO_FLUSH;
460 } else if (!strcmp(mode, "writethrough")) {
461 /* this is the default */
462 } else {
463 return -1;
466 return 0;
470 * Common part for opening disk images and files
472 static int bdrv_open_common(BlockDriverState *bs, const char *filename,
473 int flags, BlockDriver *drv)
475 int ret, open_flags;
477 assert(drv != NULL);
479 trace_bdrv_open_common(bs, filename, flags, drv->format_name);
481 bs->file = NULL;
482 bs->total_sectors = 0;
483 bs->encrypted = 0;
484 bs->valid_key = 0;
485 bs->open_flags = flags;
486 bs->buffer_alignment = 512;
488 pstrcpy(bs->filename, sizeof(bs->filename), filename);
490 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) {
491 return -ENOTSUP;
494 bs->drv = drv;
495 bs->opaque = g_malloc0(drv->instance_size);
497 if (flags & BDRV_O_CACHE_WB)
498 bs->enable_write_cache = 1;
501 * Clear flags that are internal to the block layer before opening the
502 * image.
504 open_flags = flags & ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
507 * Snapshots should be writable.
509 if (bs->is_temporary) {
510 open_flags |= BDRV_O_RDWR;
513 /* Open the image, either directly or using a protocol */
514 if (drv->bdrv_file_open) {
515 ret = drv->bdrv_file_open(bs, filename, open_flags);
516 } else {
517 ret = bdrv_file_open(&bs->file, filename, open_flags);
518 if (ret >= 0) {
519 ret = drv->bdrv_open(bs, open_flags);
523 if (ret < 0) {
524 goto free_and_fail;
527 bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR);
529 ret = refresh_total_sectors(bs, bs->total_sectors);
530 if (ret < 0) {
531 goto free_and_fail;
534 #ifndef _WIN32
535 if (bs->is_temporary) {
536 unlink(filename);
538 #endif
539 return 0;
541 free_and_fail:
542 if (bs->file) {
543 bdrv_delete(bs->file);
544 bs->file = NULL;
546 g_free(bs->opaque);
547 bs->opaque = NULL;
548 bs->drv = NULL;
549 return ret;
553 * Opens a file using a protocol (file, host_device, nbd, ...)
555 int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
557 BlockDriverState *bs;
558 BlockDriver *drv;
559 int ret;
561 drv = bdrv_find_protocol(filename);
562 if (!drv) {
563 return -ENOENT;
566 bs = bdrv_new("");
567 ret = bdrv_open_common(bs, filename, flags, drv);
568 if (ret < 0) {
569 bdrv_delete(bs);
570 return ret;
572 bs->growable = 1;
573 *pbs = bs;
574 return 0;
578 * Opens a disk image (raw, qcow2, vmdk, ...)
580 int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
581 BlockDriver *drv)
583 int ret;
585 if (flags & BDRV_O_SNAPSHOT) {
586 BlockDriverState *bs1;
587 int64_t total_size;
588 int is_protocol = 0;
589 BlockDriver *bdrv_qcow2;
590 QEMUOptionParameter *options;
591 char tmp_filename[PATH_MAX];
592 char backing_filename[PATH_MAX];
594 /* if snapshot, we create a temporary backing file and open it
595 instead of opening 'filename' directly */
597 /* if there is a backing file, use it */
598 bs1 = bdrv_new("");
599 ret = bdrv_open(bs1, filename, 0, drv);
600 if (ret < 0) {
601 bdrv_delete(bs1);
602 return ret;
604 total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
606 if (bs1->drv && bs1->drv->protocol_name)
607 is_protocol = 1;
609 bdrv_delete(bs1);
611 get_tmp_filename(tmp_filename, sizeof(tmp_filename));
613 /* Real path is meaningless for protocols */
614 if (is_protocol)
615 snprintf(backing_filename, sizeof(backing_filename),
616 "%s", filename);
617 else if (!realpath(filename, backing_filename))
618 return -errno;
620 bdrv_qcow2 = bdrv_find_format("qcow2");
621 options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
623 set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size);
624 set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
625 if (drv) {
626 set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
627 drv->format_name);
630 ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
631 free_option_parameters(options);
632 if (ret < 0) {
633 return ret;
636 filename = tmp_filename;
637 drv = bdrv_qcow2;
638 bs->is_temporary = 1;
641 /* Find the right image format driver */
642 if (!drv) {
643 ret = find_image_format(filename, &drv);
646 if (!drv) {
647 goto unlink_and_fail;
650 /* Open the image */
651 ret = bdrv_open_common(bs, filename, flags, drv);
652 if (ret < 0) {
653 goto unlink_and_fail;
656 /* If there is a backing file, use it */
657 if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') {
658 char backing_filename[PATH_MAX];
659 int back_flags;
660 BlockDriver *back_drv = NULL;
662 bs->backing_hd = bdrv_new("");
664 if (path_has_protocol(bs->backing_file)) {
665 pstrcpy(backing_filename, sizeof(backing_filename),
666 bs->backing_file);
667 } else {
668 path_combine(backing_filename, sizeof(backing_filename),
669 filename, bs->backing_file);
672 if (bs->backing_format[0] != '\0') {
673 back_drv = bdrv_find_format(bs->backing_format);
676 /* backing files always opened read-only */
677 back_flags =
678 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
680 ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv);
681 if (ret < 0) {
682 bdrv_close(bs);
683 return ret;
685 if (bs->is_temporary) {
686 bs->backing_hd->keep_read_only = !(flags & BDRV_O_RDWR);
687 } else {
688 /* base image inherits from "parent" */
689 bs->backing_hd->keep_read_only = bs->keep_read_only;
693 if (!bdrv_key_required(bs)) {
694 bdrv_dev_change_media_cb(bs, true);
697 return 0;
699 unlink_and_fail:
700 if (bs->is_temporary) {
701 unlink(filename);
703 return ret;
706 void bdrv_close(BlockDriverState *bs)
708 if (bs->drv) {
709 if (bs == bs_snapshots) {
710 bs_snapshots = NULL;
712 if (bs->backing_hd) {
713 bdrv_delete(bs->backing_hd);
714 bs->backing_hd = NULL;
716 bs->drv->bdrv_close(bs);
717 g_free(bs->opaque);
718 #ifdef _WIN32
719 if (bs->is_temporary) {
720 unlink(bs->filename);
722 #endif
723 bs->opaque = NULL;
724 bs->drv = NULL;
726 if (bs->file != NULL) {
727 bdrv_close(bs->file);
730 bdrv_dev_change_media_cb(bs, false);
734 void bdrv_close_all(void)
736 BlockDriverState *bs;
738 QTAILQ_FOREACH(bs, &bdrv_states, list) {
739 bdrv_close(bs);
743 /* make a BlockDriverState anonymous by removing from bdrv_state list.
744 Also, NULL terminate the device_name to prevent double remove */
745 void bdrv_make_anon(BlockDriverState *bs)
747 if (bs->device_name[0] != '\0') {
748 QTAILQ_REMOVE(&bdrv_states, bs, list);
750 bs->device_name[0] = '\0';
753 void bdrv_delete(BlockDriverState *bs)
755 assert(!bs->dev);
757 /* remove from list, if necessary */
758 bdrv_make_anon(bs);
760 bdrv_close(bs);
761 if (bs->file != NULL) {
762 bdrv_delete(bs->file);
765 assert(bs != bs_snapshots);
766 g_free(bs);
769 int bdrv_attach_dev(BlockDriverState *bs, void *dev)
770 /* TODO change to DeviceState *dev when all users are qdevified */
772 if (bs->dev) {
773 return -EBUSY;
775 bs->dev = dev;
776 bdrv_iostatus_reset(bs);
777 return 0;
780 /* TODO qdevified devices don't use this, remove when devices are qdevified */
781 void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
783 if (bdrv_attach_dev(bs, dev) < 0) {
784 abort();
788 void bdrv_detach_dev(BlockDriverState *bs, void *dev)
789 /* TODO change to DeviceState *dev when all users are qdevified */
791 assert(bs->dev == dev);
792 bs->dev = NULL;
793 bs->dev_ops = NULL;
794 bs->dev_opaque = NULL;
795 bs->buffer_alignment = 512;
798 /* TODO change to return DeviceState * when all users are qdevified */
799 void *bdrv_get_attached_dev(BlockDriverState *bs)
801 return bs->dev;
804 void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
805 void *opaque)
807 bs->dev_ops = ops;
808 bs->dev_opaque = opaque;
809 if (bdrv_dev_has_removable_media(bs) && bs == bs_snapshots) {
810 bs_snapshots = NULL;
814 static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load)
816 if (bs->dev_ops && bs->dev_ops->change_media_cb) {
817 bs->dev_ops->change_media_cb(bs->dev_opaque, load);
821 bool bdrv_dev_has_removable_media(BlockDriverState *bs)
823 return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb);
826 bool bdrv_dev_is_tray_open(BlockDriverState *bs)
828 if (bs->dev_ops && bs->dev_ops->is_tray_open) {
829 return bs->dev_ops->is_tray_open(bs->dev_opaque);
831 return false;
834 static void bdrv_dev_resize_cb(BlockDriverState *bs)
836 if (bs->dev_ops && bs->dev_ops->resize_cb) {
837 bs->dev_ops->resize_cb(bs->dev_opaque);
841 bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
843 if (bs->dev_ops && bs->dev_ops->is_medium_locked) {
844 return bs->dev_ops->is_medium_locked(bs->dev_opaque);
846 return false;
850 * Run consistency checks on an image
852 * Returns 0 if the check could be completed (it doesn't mean that the image is
853 * free of errors) or -errno when an internal error occurred. The results of the
854 * check are stored in res.
856 int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res)
858 if (bs->drv->bdrv_check == NULL) {
859 return -ENOTSUP;
862 memset(res, 0, sizeof(*res));
863 return bs->drv->bdrv_check(bs, res);
866 #define COMMIT_BUF_SECTORS 2048
868 /* commit COW file into the raw image */
869 int bdrv_commit(BlockDriverState *bs)
871 BlockDriver *drv = bs->drv;
872 BlockDriver *backing_drv;
873 int64_t sector, total_sectors;
874 int n, ro, open_flags;
875 int ret = 0, rw_ret = 0;
876 uint8_t *buf;
877 char filename[1024];
878 BlockDriverState *bs_rw, *bs_ro;
880 if (!drv)
881 return -ENOMEDIUM;
883 if (!bs->backing_hd) {
884 return -ENOTSUP;
887 if (bs->backing_hd->keep_read_only) {
888 return -EACCES;
891 backing_drv = bs->backing_hd->drv;
892 ro = bs->backing_hd->read_only;
893 strncpy(filename, bs->backing_hd->filename, sizeof(filename));
894 open_flags = bs->backing_hd->open_flags;
896 if (ro) {
897 /* re-open as RW */
898 bdrv_delete(bs->backing_hd);
899 bs->backing_hd = NULL;
900 bs_rw = bdrv_new("");
901 rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR,
902 backing_drv);
903 if (rw_ret < 0) {
904 bdrv_delete(bs_rw);
905 /* try to re-open read-only */
906 bs_ro = bdrv_new("");
907 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
908 backing_drv);
909 if (ret < 0) {
910 bdrv_delete(bs_ro);
911 /* drive not functional anymore */
912 bs->drv = NULL;
913 return ret;
915 bs->backing_hd = bs_ro;
916 return rw_ret;
918 bs->backing_hd = bs_rw;
921 total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
922 buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
924 for (sector = 0; sector < total_sectors; sector += n) {
925 if (drv->bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) {
927 if (bdrv_read(bs, sector, buf, n) != 0) {
928 ret = -EIO;
929 goto ro_cleanup;
932 if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) {
933 ret = -EIO;
934 goto ro_cleanup;
939 if (drv->bdrv_make_empty) {
940 ret = drv->bdrv_make_empty(bs);
941 bdrv_flush(bs);
945 * Make sure all data we wrote to the backing device is actually
946 * stable on disk.
948 if (bs->backing_hd)
949 bdrv_flush(bs->backing_hd);
951 ro_cleanup:
952 g_free(buf);
954 if (ro) {
955 /* re-open as RO */
956 bdrv_delete(bs->backing_hd);
957 bs->backing_hd = NULL;
958 bs_ro = bdrv_new("");
959 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
960 backing_drv);
961 if (ret < 0) {
962 bdrv_delete(bs_ro);
963 /* drive not functional anymore */
964 bs->drv = NULL;
965 return ret;
967 bs->backing_hd = bs_ro;
968 bs->backing_hd->keep_read_only = 0;
971 return ret;
974 void bdrv_commit_all(void)
976 BlockDriverState *bs;
978 QTAILQ_FOREACH(bs, &bdrv_states, list) {
979 bdrv_commit(bs);
984 * Return values:
985 * 0 - success
986 * -EINVAL - backing format specified, but no file
987 * -ENOSPC - can't update the backing file because no space is left in the
988 * image file header
989 * -ENOTSUP - format driver doesn't support changing the backing file
991 int bdrv_change_backing_file(BlockDriverState *bs,
992 const char *backing_file, const char *backing_fmt)
994 BlockDriver *drv = bs->drv;
996 if (drv->bdrv_change_backing_file != NULL) {
997 return drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
998 } else {
999 return -ENOTSUP;
1003 static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
1004 size_t size)
1006 int64_t len;
1008 if (!bdrv_is_inserted(bs))
1009 return -ENOMEDIUM;
1011 if (bs->growable)
1012 return 0;
1014 len = bdrv_getlength(bs);
1016 if (offset < 0)
1017 return -EIO;
1019 if ((offset > len) || (len - offset < size))
1020 return -EIO;
1022 return 0;
1025 static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
1026 int nb_sectors)
1028 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
1029 nb_sectors * BDRV_SECTOR_SIZE);
1032 static inline bool bdrv_has_async_rw(BlockDriver *drv)
1034 return drv->bdrv_co_readv != bdrv_co_readv_em
1035 || drv->bdrv_aio_readv != bdrv_aio_readv_em;
1038 static inline bool bdrv_has_async_flush(BlockDriver *drv)
1040 return drv->bdrv_aio_flush != bdrv_aio_flush_em;
1043 /* return < 0 if error. See bdrv_write() for the return codes */
1044 int bdrv_read(BlockDriverState *bs, int64_t sector_num,
1045 uint8_t *buf, int nb_sectors)
1047 BlockDriver *drv = bs->drv;
1049 if (!drv)
1050 return -ENOMEDIUM;
1052 if (bdrv_has_async_rw(drv) && qemu_in_coroutine()) {
1053 QEMUIOVector qiov;
1054 struct iovec iov = {
1055 .iov_base = (void *)buf,
1056 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
1059 qemu_iovec_init_external(&qiov, &iov, 1);
1060 return bdrv_co_readv(bs, sector_num, nb_sectors, &qiov);
1063 if (bdrv_check_request(bs, sector_num, nb_sectors))
1064 return -EIO;
1066 return drv->bdrv_read(bs, sector_num, buf, nb_sectors);
1069 static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
1070 int nb_sectors, int dirty)
1072 int64_t start, end;
1073 unsigned long val, idx, bit;
1075 start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
1076 end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
1078 for (; start <= end; start++) {
1079 idx = start / (sizeof(unsigned long) * 8);
1080 bit = start % (sizeof(unsigned long) * 8);
1081 val = bs->dirty_bitmap[idx];
1082 if (dirty) {
1083 if (!(val & (1UL << bit))) {
1084 bs->dirty_count++;
1085 val |= 1UL << bit;
1087 } else {
1088 if (val & (1UL << bit)) {
1089 bs->dirty_count--;
1090 val &= ~(1UL << bit);
1093 bs->dirty_bitmap[idx] = val;
1097 /* Return < 0 if error. Important errors are:
1098 -EIO generic I/O error (may happen for all errors)
1099 -ENOMEDIUM No media inserted.
1100 -EINVAL Invalid sector number or nb_sectors
1101 -EACCES Trying to write a read-only device
1103 int bdrv_write(BlockDriverState *bs, int64_t sector_num,
1104 const uint8_t *buf, int nb_sectors)
1106 BlockDriver *drv = bs->drv;
1108 if (!bs->drv)
1109 return -ENOMEDIUM;
1111 if (bdrv_has_async_rw(drv) && qemu_in_coroutine()) {
1112 QEMUIOVector qiov;
1113 struct iovec iov = {
1114 .iov_base = (void *)buf,
1115 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
1118 qemu_iovec_init_external(&qiov, &iov, 1);
1119 return bdrv_co_writev(bs, sector_num, nb_sectors, &qiov);
1122 if (bs->read_only)
1123 return -EACCES;
1124 if (bdrv_check_request(bs, sector_num, nb_sectors))
1125 return -EIO;
1127 if (bs->dirty_bitmap) {
1128 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1131 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
1132 bs->wr_highest_sector = sector_num + nb_sectors - 1;
1135 return drv->bdrv_write(bs, sector_num, buf, nb_sectors);
1138 int bdrv_pread(BlockDriverState *bs, int64_t offset,
1139 void *buf, int count1)
1141 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
1142 int len, nb_sectors, count;
1143 int64_t sector_num;
1144 int ret;
1146 count = count1;
1147 /* first read to align to sector start */
1148 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
1149 if (len > count)
1150 len = count;
1151 sector_num = offset >> BDRV_SECTOR_BITS;
1152 if (len > 0) {
1153 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1154 return ret;
1155 memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
1156 count -= len;
1157 if (count == 0)
1158 return count1;
1159 sector_num++;
1160 buf += len;
1163 /* read the sectors "in place" */
1164 nb_sectors = count >> BDRV_SECTOR_BITS;
1165 if (nb_sectors > 0) {
1166 if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
1167 return ret;
1168 sector_num += nb_sectors;
1169 len = nb_sectors << BDRV_SECTOR_BITS;
1170 buf += len;
1171 count -= len;
1174 /* add data from the last sector */
1175 if (count > 0) {
1176 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1177 return ret;
1178 memcpy(buf, tmp_buf, count);
1180 return count1;
1183 int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
1184 const void *buf, int count1)
1186 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
1187 int len, nb_sectors, count;
1188 int64_t sector_num;
1189 int ret;
1191 count = count1;
1192 /* first write to align to sector start */
1193 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
1194 if (len > count)
1195 len = count;
1196 sector_num = offset >> BDRV_SECTOR_BITS;
1197 if (len > 0) {
1198 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1199 return ret;
1200 memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len);
1201 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1202 return ret;
1203 count -= len;
1204 if (count == 0)
1205 return count1;
1206 sector_num++;
1207 buf += len;
1210 /* write the sectors "in place" */
1211 nb_sectors = count >> BDRV_SECTOR_BITS;
1212 if (nb_sectors > 0) {
1213 if ((ret = bdrv_write(bs, sector_num, buf, nb_sectors)) < 0)
1214 return ret;
1215 sector_num += nb_sectors;
1216 len = nb_sectors << BDRV_SECTOR_BITS;
1217 buf += len;
1218 count -= len;
1221 /* add data from the last sector */
1222 if (count > 0) {
1223 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1224 return ret;
1225 memcpy(tmp_buf, buf, count);
1226 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1227 return ret;
1229 return count1;
1233 * Writes to the file and ensures that no writes are reordered across this
1234 * request (acts as a barrier)
1236 * Returns 0 on success, -errno in error cases.
1238 int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
1239 const void *buf, int count)
1241 int ret;
1243 ret = bdrv_pwrite(bs, offset, buf, count);
1244 if (ret < 0) {
1245 return ret;
1248 /* No flush needed for cache modes that use O_DSYNC */
1249 if ((bs->open_flags & BDRV_O_CACHE_WB) != 0) {
1250 bdrv_flush(bs);
1253 return 0;
1256 int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
1257 int nb_sectors, QEMUIOVector *qiov)
1259 BlockDriver *drv = bs->drv;
1261 trace_bdrv_co_readv(bs, sector_num, nb_sectors);
1263 if (!drv) {
1264 return -ENOMEDIUM;
1266 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1267 return -EIO;
1270 return drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
1273 int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
1274 int nb_sectors, QEMUIOVector *qiov)
1276 BlockDriver *drv = bs->drv;
1278 trace_bdrv_co_writev(bs, sector_num, nb_sectors);
1280 if (!bs->drv) {
1281 return -ENOMEDIUM;
1283 if (bs->read_only) {
1284 return -EACCES;
1286 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1287 return -EIO;
1290 if (bs->dirty_bitmap) {
1291 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1294 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
1295 bs->wr_highest_sector = sector_num + nb_sectors - 1;
1298 return drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
1302 * Truncate file to 'offset' bytes (needed only for file protocols)
1304 int bdrv_truncate(BlockDriverState *bs, int64_t offset)
1306 BlockDriver *drv = bs->drv;
1307 int ret;
1308 if (!drv)
1309 return -ENOMEDIUM;
1310 if (!drv->bdrv_truncate)
1311 return -ENOTSUP;
1312 if (bs->read_only)
1313 return -EACCES;
1314 if (bdrv_in_use(bs))
1315 return -EBUSY;
1316 ret = drv->bdrv_truncate(bs, offset);
1317 if (ret == 0) {
1318 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
1319 bdrv_dev_resize_cb(bs);
1321 return ret;
1325 * Length of a allocated file in bytes. Sparse files are counted by actual
1326 * allocated space. Return < 0 if error or unknown.
1328 int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
1330 BlockDriver *drv = bs->drv;
1331 if (!drv) {
1332 return -ENOMEDIUM;
1334 if (drv->bdrv_get_allocated_file_size) {
1335 return drv->bdrv_get_allocated_file_size(bs);
1337 if (bs->file) {
1338 return bdrv_get_allocated_file_size(bs->file);
1340 return -ENOTSUP;
1344 * Length of a file in bytes. Return < 0 if error or unknown.
1346 int64_t bdrv_getlength(BlockDriverState *bs)
1348 BlockDriver *drv = bs->drv;
1349 if (!drv)
1350 return -ENOMEDIUM;
1352 if (bs->growable || bdrv_dev_has_removable_media(bs)) {
1353 if (drv->bdrv_getlength) {
1354 return drv->bdrv_getlength(bs);
1357 return bs->total_sectors * BDRV_SECTOR_SIZE;
1360 /* return 0 as number of sectors if no device present or error */
1361 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
1363 int64_t length;
1364 length = bdrv_getlength(bs);
1365 if (length < 0)
1366 length = 0;
1367 else
1368 length = length >> BDRV_SECTOR_BITS;
1369 *nb_sectors_ptr = length;
1372 struct partition {
1373 uint8_t boot_ind; /* 0x80 - active */
1374 uint8_t head; /* starting head */
1375 uint8_t sector; /* starting sector */
1376 uint8_t cyl; /* starting cylinder */
1377 uint8_t sys_ind; /* What partition type */
1378 uint8_t end_head; /* end head */
1379 uint8_t end_sector; /* end sector */
1380 uint8_t end_cyl; /* end cylinder */
1381 uint32_t start_sect; /* starting sector counting from 0 */
1382 uint32_t nr_sects; /* nr of sectors in partition */
1383 } QEMU_PACKED;
1385 /* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
1386 static int guess_disk_lchs(BlockDriverState *bs,
1387 int *pcylinders, int *pheads, int *psectors)
1389 uint8_t buf[BDRV_SECTOR_SIZE];
1390 int ret, i, heads, sectors, cylinders;
1391 struct partition *p;
1392 uint32_t nr_sects;
1393 uint64_t nb_sectors;
1395 bdrv_get_geometry(bs, &nb_sectors);
1397 ret = bdrv_read(bs, 0, buf, 1);
1398 if (ret < 0)
1399 return -1;
1400 /* test msdos magic */
1401 if (buf[510] != 0x55 || buf[511] != 0xaa)
1402 return -1;
1403 for(i = 0; i < 4; i++) {
1404 p = ((struct partition *)(buf + 0x1be)) + i;
1405 nr_sects = le32_to_cpu(p->nr_sects);
1406 if (nr_sects && p->end_head) {
1407 /* We make the assumption that the partition terminates on
1408 a cylinder boundary */
1409 heads = p->end_head + 1;
1410 sectors = p->end_sector & 63;
1411 if (sectors == 0)
1412 continue;
1413 cylinders = nb_sectors / (heads * sectors);
1414 if (cylinders < 1 || cylinders > 16383)
1415 continue;
1416 *pheads = heads;
1417 *psectors = sectors;
1418 *pcylinders = cylinders;
1419 #if 0
1420 printf("guessed geometry: LCHS=%d %d %d\n",
1421 cylinders, heads, sectors);
1422 #endif
1423 return 0;
1426 return -1;
1429 void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
1431 int translation, lba_detected = 0;
1432 int cylinders, heads, secs;
1433 uint64_t nb_sectors;
1435 /* if a geometry hint is available, use it */
1436 bdrv_get_geometry(bs, &nb_sectors);
1437 bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
1438 translation = bdrv_get_translation_hint(bs);
1439 if (cylinders != 0) {
1440 *pcyls = cylinders;
1441 *pheads = heads;
1442 *psecs = secs;
1443 } else {
1444 if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
1445 if (heads > 16) {
1446 /* if heads > 16, it means that a BIOS LBA
1447 translation was active, so the default
1448 hardware geometry is OK */
1449 lba_detected = 1;
1450 goto default_geometry;
1451 } else {
1452 *pcyls = cylinders;
1453 *pheads = heads;
1454 *psecs = secs;
1455 /* disable any translation to be in sync with
1456 the logical geometry */
1457 if (translation == BIOS_ATA_TRANSLATION_AUTO) {
1458 bdrv_set_translation_hint(bs,
1459 BIOS_ATA_TRANSLATION_NONE);
1462 } else {
1463 default_geometry:
1464 /* if no geometry, use a standard physical disk geometry */
1465 cylinders = nb_sectors / (16 * 63);
1467 if (cylinders > 16383)
1468 cylinders = 16383;
1469 else if (cylinders < 2)
1470 cylinders = 2;
1471 *pcyls = cylinders;
1472 *pheads = 16;
1473 *psecs = 63;
1474 if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
1475 if ((*pcyls * *pheads) <= 131072) {
1476 bdrv_set_translation_hint(bs,
1477 BIOS_ATA_TRANSLATION_LARGE);
1478 } else {
1479 bdrv_set_translation_hint(bs,
1480 BIOS_ATA_TRANSLATION_LBA);
1484 bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
1488 void bdrv_set_geometry_hint(BlockDriverState *bs,
1489 int cyls, int heads, int secs)
1491 bs->cyls = cyls;
1492 bs->heads = heads;
1493 bs->secs = secs;
1496 void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
1498 bs->translation = translation;
1501 void bdrv_get_geometry_hint(BlockDriverState *bs,
1502 int *pcyls, int *pheads, int *psecs)
1504 *pcyls = bs->cyls;
1505 *pheads = bs->heads;
1506 *psecs = bs->secs;
1509 /* Recognize floppy formats */
1510 typedef struct FDFormat {
1511 FDriveType drive;
1512 uint8_t last_sect;
1513 uint8_t max_track;
1514 uint8_t max_head;
1515 } FDFormat;
1517 static const FDFormat fd_formats[] = {
1518 /* First entry is default format */
1519 /* 1.44 MB 3"1/2 floppy disks */
1520 { FDRIVE_DRV_144, 18, 80, 1, },
1521 { FDRIVE_DRV_144, 20, 80, 1, },
1522 { FDRIVE_DRV_144, 21, 80, 1, },
1523 { FDRIVE_DRV_144, 21, 82, 1, },
1524 { FDRIVE_DRV_144, 21, 83, 1, },
1525 { FDRIVE_DRV_144, 22, 80, 1, },
1526 { FDRIVE_DRV_144, 23, 80, 1, },
1527 { FDRIVE_DRV_144, 24, 80, 1, },
1528 /* 2.88 MB 3"1/2 floppy disks */
1529 { FDRIVE_DRV_288, 36, 80, 1, },
1530 { FDRIVE_DRV_288, 39, 80, 1, },
1531 { FDRIVE_DRV_288, 40, 80, 1, },
1532 { FDRIVE_DRV_288, 44, 80, 1, },
1533 { FDRIVE_DRV_288, 48, 80, 1, },
1534 /* 720 kB 3"1/2 floppy disks */
1535 { FDRIVE_DRV_144, 9, 80, 1, },
1536 { FDRIVE_DRV_144, 10, 80, 1, },
1537 { FDRIVE_DRV_144, 10, 82, 1, },
1538 { FDRIVE_DRV_144, 10, 83, 1, },
1539 { FDRIVE_DRV_144, 13, 80, 1, },
1540 { FDRIVE_DRV_144, 14, 80, 1, },
1541 /* 1.2 MB 5"1/4 floppy disks */
1542 { FDRIVE_DRV_120, 15, 80, 1, },
1543 { FDRIVE_DRV_120, 18, 80, 1, },
1544 { FDRIVE_DRV_120, 18, 82, 1, },
1545 { FDRIVE_DRV_120, 18, 83, 1, },
1546 { FDRIVE_DRV_120, 20, 80, 1, },
1547 /* 720 kB 5"1/4 floppy disks */
1548 { FDRIVE_DRV_120, 9, 80, 1, },
1549 { FDRIVE_DRV_120, 11, 80, 1, },
1550 /* 360 kB 5"1/4 floppy disks */
1551 { FDRIVE_DRV_120, 9, 40, 1, },
1552 { FDRIVE_DRV_120, 9, 40, 0, },
1553 { FDRIVE_DRV_120, 10, 41, 1, },
1554 { FDRIVE_DRV_120, 10, 42, 1, },
1555 /* 320 kB 5"1/4 floppy disks */
1556 { FDRIVE_DRV_120, 8, 40, 1, },
1557 { FDRIVE_DRV_120, 8, 40, 0, },
1558 /* 360 kB must match 5"1/4 better than 3"1/2... */
1559 { FDRIVE_DRV_144, 9, 80, 0, },
1560 /* end */
1561 { FDRIVE_DRV_NONE, -1, -1, 0, },
1564 void bdrv_get_floppy_geometry_hint(BlockDriverState *bs, int *nb_heads,
1565 int *max_track, int *last_sect,
1566 FDriveType drive_in, FDriveType *drive)
1568 const FDFormat *parse;
1569 uint64_t nb_sectors, size;
1570 int i, first_match, match;
1572 bdrv_get_geometry_hint(bs, nb_heads, max_track, last_sect);
1573 if (*nb_heads != 0 && *max_track != 0 && *last_sect != 0) {
1574 /* User defined disk */
1575 } else {
1576 bdrv_get_geometry(bs, &nb_sectors);
1577 match = -1;
1578 first_match = -1;
1579 for (i = 0; ; i++) {
1580 parse = &fd_formats[i];
1581 if (parse->drive == FDRIVE_DRV_NONE) {
1582 break;
1584 if (drive_in == parse->drive ||
1585 drive_in == FDRIVE_DRV_NONE) {
1586 size = (parse->max_head + 1) * parse->max_track *
1587 parse->last_sect;
1588 if (nb_sectors == size) {
1589 match = i;
1590 break;
1592 if (first_match == -1) {
1593 first_match = i;
1597 if (match == -1) {
1598 if (first_match == -1) {
1599 match = 1;
1600 } else {
1601 match = first_match;
1603 parse = &fd_formats[match];
1605 *nb_heads = parse->max_head + 1;
1606 *max_track = parse->max_track;
1607 *last_sect = parse->last_sect;
1608 *drive = parse->drive;
1612 int bdrv_get_translation_hint(BlockDriverState *bs)
1614 return bs->translation;
1617 void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error,
1618 BlockErrorAction on_write_error)
1620 bs->on_read_error = on_read_error;
1621 bs->on_write_error = on_write_error;
1624 BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read)
1626 return is_read ? bs->on_read_error : bs->on_write_error;
1629 int bdrv_is_read_only(BlockDriverState *bs)
1631 return bs->read_only;
1634 int bdrv_is_sg(BlockDriverState *bs)
1636 return bs->sg;
1639 int bdrv_enable_write_cache(BlockDriverState *bs)
1641 return bs->enable_write_cache;
1644 int bdrv_is_encrypted(BlockDriverState *bs)
1646 if (bs->backing_hd && bs->backing_hd->encrypted)
1647 return 1;
1648 return bs->encrypted;
1651 int bdrv_key_required(BlockDriverState *bs)
1653 BlockDriverState *backing_hd = bs->backing_hd;
1655 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
1656 return 1;
1657 return (bs->encrypted && !bs->valid_key);
1660 int bdrv_set_key(BlockDriverState *bs, const char *key)
1662 int ret;
1663 if (bs->backing_hd && bs->backing_hd->encrypted) {
1664 ret = bdrv_set_key(bs->backing_hd, key);
1665 if (ret < 0)
1666 return ret;
1667 if (!bs->encrypted)
1668 return 0;
1670 if (!bs->encrypted) {
1671 return -EINVAL;
1672 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
1673 return -ENOMEDIUM;
1675 ret = bs->drv->bdrv_set_key(bs, key);
1676 if (ret < 0) {
1677 bs->valid_key = 0;
1678 } else if (!bs->valid_key) {
1679 bs->valid_key = 1;
1680 /* call the change callback now, we skipped it on open */
1681 bdrv_dev_change_media_cb(bs, true);
1683 return ret;
1686 void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
1688 if (!bs->drv) {
1689 buf[0] = '\0';
1690 } else {
1691 pstrcpy(buf, buf_size, bs->drv->format_name);
1695 void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
1696 void *opaque)
1698 BlockDriver *drv;
1700 QLIST_FOREACH(drv, &bdrv_drivers, list) {
1701 it(opaque, drv->format_name);
1705 BlockDriverState *bdrv_find(const char *name)
1707 BlockDriverState *bs;
1709 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1710 if (!strcmp(name, bs->device_name)) {
1711 return bs;
1714 return NULL;
1717 BlockDriverState *bdrv_next(BlockDriverState *bs)
1719 if (!bs) {
1720 return QTAILQ_FIRST(&bdrv_states);
1722 return QTAILQ_NEXT(bs, list);
1725 void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
1727 BlockDriverState *bs;
1729 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1730 it(opaque, bs);
1734 const char *bdrv_get_device_name(BlockDriverState *bs)
1736 return bs->device_name;
1739 int bdrv_flush(BlockDriverState *bs)
1741 if (bs->open_flags & BDRV_O_NO_FLUSH) {
1742 return 0;
1745 if (bs->drv && bdrv_has_async_flush(bs->drv) && qemu_in_coroutine()) {
1746 return bdrv_co_flush_em(bs);
1749 if (bs->drv && bs->drv->bdrv_flush) {
1750 return bs->drv->bdrv_flush(bs);
1754 * Some block drivers always operate in either writethrough or unsafe mode
1755 * and don't support bdrv_flush therefore. Usually qemu doesn't know how
1756 * the server works (because the behaviour is hardcoded or depends on
1757 * server-side configuration), so we can't ensure that everything is safe
1758 * on disk. Returning an error doesn't work because that would break guests
1759 * even if the server operates in writethrough mode.
1761 * Let's hope the user knows what he's doing.
1763 return 0;
1766 void bdrv_flush_all(void)
1768 BlockDriverState *bs;
1770 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1771 if (!bdrv_is_read_only(bs) && bdrv_is_inserted(bs)) {
1772 bdrv_flush(bs);
1777 int bdrv_has_zero_init(BlockDriverState *bs)
1779 assert(bs->drv);
1781 if (bs->drv->bdrv_has_zero_init) {
1782 return bs->drv->bdrv_has_zero_init(bs);
1785 return 1;
1788 int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
1790 if (!bs->drv) {
1791 return -ENOMEDIUM;
1793 if (!bs->drv->bdrv_discard) {
1794 return 0;
1796 return bs->drv->bdrv_discard(bs, sector_num, nb_sectors);
1800 * Returns true iff the specified sector is present in the disk image. Drivers
1801 * not implementing the functionality are assumed to not support backing files,
1802 * hence all their sectors are reported as allocated.
1804 * 'pnum' is set to the number of sectors (including and immediately following
1805 * the specified sector) that are known to be in the same
1806 * allocated/unallocated state.
1808 * 'nb_sectors' is the max value 'pnum' should be set to.
1810 int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
1811 int *pnum)
1813 int64_t n;
1814 if (!bs->drv->bdrv_is_allocated) {
1815 if (sector_num >= bs->total_sectors) {
1816 *pnum = 0;
1817 return 0;
1819 n = bs->total_sectors - sector_num;
1820 *pnum = (n < nb_sectors) ? (n) : (nb_sectors);
1821 return 1;
1823 return bs->drv->bdrv_is_allocated(bs, sector_num, nb_sectors, pnum);
1826 void bdrv_mon_event(const BlockDriverState *bdrv,
1827 BlockMonEventAction action, int is_read)
1829 QObject *data;
1830 const char *action_str;
1832 switch (action) {
1833 case BDRV_ACTION_REPORT:
1834 action_str = "report";
1835 break;
1836 case BDRV_ACTION_IGNORE:
1837 action_str = "ignore";
1838 break;
1839 case BDRV_ACTION_STOP:
1840 action_str = "stop";
1841 break;
1842 default:
1843 abort();
1846 data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
1847 bdrv->device_name,
1848 action_str,
1849 is_read ? "read" : "write");
1850 monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data);
1852 qobject_decref(data);
1855 static void bdrv_print_dict(QObject *obj, void *opaque)
1857 QDict *bs_dict;
1858 Monitor *mon = opaque;
1860 bs_dict = qobject_to_qdict(obj);
1862 monitor_printf(mon, "%s: removable=%d",
1863 qdict_get_str(bs_dict, "device"),
1864 qdict_get_bool(bs_dict, "removable"));
1866 if (qdict_get_bool(bs_dict, "removable")) {
1867 monitor_printf(mon, " locked=%d", qdict_get_bool(bs_dict, "locked"));
1868 monitor_printf(mon, " tray-open=%d",
1869 qdict_get_bool(bs_dict, "tray-open"));
1872 if (qdict_haskey(bs_dict, "io-status")) {
1873 monitor_printf(mon, " io-status=%s", qdict_get_str(bs_dict, "io-status"));
1876 if (qdict_haskey(bs_dict, "inserted")) {
1877 QDict *qdict = qobject_to_qdict(qdict_get(bs_dict, "inserted"));
1879 monitor_printf(mon, " file=");
1880 monitor_print_filename(mon, qdict_get_str(qdict, "file"));
1881 if (qdict_haskey(qdict, "backing_file")) {
1882 monitor_printf(mon, " backing_file=");
1883 monitor_print_filename(mon, qdict_get_str(qdict, "backing_file"));
1885 monitor_printf(mon, " ro=%d drv=%s encrypted=%d",
1886 qdict_get_bool(qdict, "ro"),
1887 qdict_get_str(qdict, "drv"),
1888 qdict_get_bool(qdict, "encrypted"));
1889 } else {
1890 monitor_printf(mon, " [not inserted]");
1893 monitor_printf(mon, "\n");
1896 void bdrv_info_print(Monitor *mon, const QObject *data)
1898 qlist_iter(qobject_to_qlist(data), bdrv_print_dict, mon);
1901 static const char *const io_status_name[BDRV_IOS_MAX] = {
1902 [BDRV_IOS_OK] = "ok",
1903 [BDRV_IOS_FAILED] = "failed",
1904 [BDRV_IOS_ENOSPC] = "nospace",
1907 void bdrv_info(Monitor *mon, QObject **ret_data)
1909 QList *bs_list;
1910 BlockDriverState *bs;
1912 bs_list = qlist_new();
1914 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1915 QObject *bs_obj;
1916 QDict *bs_dict;
1918 bs_obj = qobject_from_jsonf("{ 'device': %s, 'type': 'unknown', "
1919 "'removable': %i, 'locked': %i }",
1920 bs->device_name,
1921 bdrv_dev_has_removable_media(bs),
1922 bdrv_dev_is_medium_locked(bs));
1923 bs_dict = qobject_to_qdict(bs_obj);
1925 if (bdrv_dev_has_removable_media(bs)) {
1926 qdict_put(bs_dict, "tray-open",
1927 qbool_from_int(bdrv_dev_is_tray_open(bs)));
1930 if (bdrv_iostatus_is_enabled(bs)) {
1931 qdict_put(bs_dict, "io-status",
1932 qstring_from_str(io_status_name[bs->iostatus]));
1935 if (bs->drv) {
1936 QObject *obj;
1938 obj = qobject_from_jsonf("{ 'file': %s, 'ro': %i, 'drv': %s, "
1939 "'encrypted': %i }",
1940 bs->filename, bs->read_only,
1941 bs->drv->format_name,
1942 bdrv_is_encrypted(bs));
1943 if (bs->backing_file[0] != '\0') {
1944 QDict *qdict = qobject_to_qdict(obj);
1945 qdict_put(qdict, "backing_file",
1946 qstring_from_str(bs->backing_file));
1949 qdict_put_obj(bs_dict, "inserted", obj);
1951 qlist_append_obj(bs_list, bs_obj);
1954 *ret_data = QOBJECT(bs_list);
1957 static void bdrv_stats_iter(QObject *data, void *opaque)
1959 QDict *qdict;
1960 Monitor *mon = opaque;
1962 qdict = qobject_to_qdict(data);
1963 monitor_printf(mon, "%s:", qdict_get_str(qdict, "device"));
1965 qdict = qobject_to_qdict(qdict_get(qdict, "stats"));
1966 monitor_printf(mon, " rd_bytes=%" PRId64
1967 " wr_bytes=%" PRId64
1968 " rd_operations=%" PRId64
1969 " wr_operations=%" PRId64
1970 " flush_operations=%" PRId64
1971 " wr_total_time_ns=%" PRId64
1972 " rd_total_time_ns=%" PRId64
1973 " flush_total_time_ns=%" PRId64
1974 "\n",
1975 qdict_get_int(qdict, "rd_bytes"),
1976 qdict_get_int(qdict, "wr_bytes"),
1977 qdict_get_int(qdict, "rd_operations"),
1978 qdict_get_int(qdict, "wr_operations"),
1979 qdict_get_int(qdict, "flush_operations"),
1980 qdict_get_int(qdict, "wr_total_time_ns"),
1981 qdict_get_int(qdict, "rd_total_time_ns"),
1982 qdict_get_int(qdict, "flush_total_time_ns"));
1985 void bdrv_stats_print(Monitor *mon, const QObject *data)
1987 qlist_iter(qobject_to_qlist(data), bdrv_stats_iter, mon);
1990 static QObject* bdrv_info_stats_bs(BlockDriverState *bs)
1992 QObject *res;
1993 QDict *dict;
1995 res = qobject_from_jsonf("{ 'stats': {"
1996 "'rd_bytes': %" PRId64 ","
1997 "'wr_bytes': %" PRId64 ","
1998 "'rd_operations': %" PRId64 ","
1999 "'wr_operations': %" PRId64 ","
2000 "'wr_highest_offset': %" PRId64 ","
2001 "'flush_operations': %" PRId64 ","
2002 "'wr_total_time_ns': %" PRId64 ","
2003 "'rd_total_time_ns': %" PRId64 ","
2004 "'flush_total_time_ns': %" PRId64
2005 "} }",
2006 bs->nr_bytes[BDRV_ACCT_READ],
2007 bs->nr_bytes[BDRV_ACCT_WRITE],
2008 bs->nr_ops[BDRV_ACCT_READ],
2009 bs->nr_ops[BDRV_ACCT_WRITE],
2010 bs->wr_highest_sector *
2011 (uint64_t)BDRV_SECTOR_SIZE,
2012 bs->nr_ops[BDRV_ACCT_FLUSH],
2013 bs->total_time_ns[BDRV_ACCT_WRITE],
2014 bs->total_time_ns[BDRV_ACCT_READ],
2015 bs->total_time_ns[BDRV_ACCT_FLUSH]);
2016 dict = qobject_to_qdict(res);
2018 if (*bs->device_name) {
2019 qdict_put(dict, "device", qstring_from_str(bs->device_name));
2022 if (bs->file) {
2023 QObject *parent = bdrv_info_stats_bs(bs->file);
2024 qdict_put_obj(dict, "parent", parent);
2027 return res;
2030 void bdrv_info_stats(Monitor *mon, QObject **ret_data)
2032 QObject *obj;
2033 QList *devices;
2034 BlockDriverState *bs;
2036 devices = qlist_new();
2038 QTAILQ_FOREACH(bs, &bdrv_states, list) {
2039 obj = bdrv_info_stats_bs(bs);
2040 qlist_append_obj(devices, obj);
2043 *ret_data = QOBJECT(devices);
2046 const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
2048 if (bs->backing_hd && bs->backing_hd->encrypted)
2049 return bs->backing_file;
2050 else if (bs->encrypted)
2051 return bs->filename;
2052 else
2053 return NULL;
2056 void bdrv_get_backing_filename(BlockDriverState *bs,
2057 char *filename, int filename_size)
2059 if (!bs->backing_file) {
2060 pstrcpy(filename, filename_size, "");
2061 } else {
2062 pstrcpy(filename, filename_size, bs->backing_file);
2066 int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
2067 const uint8_t *buf, int nb_sectors)
2069 BlockDriver *drv = bs->drv;
2070 if (!drv)
2071 return -ENOMEDIUM;
2072 if (!drv->bdrv_write_compressed)
2073 return -ENOTSUP;
2074 if (bdrv_check_request(bs, sector_num, nb_sectors))
2075 return -EIO;
2077 if (bs->dirty_bitmap) {
2078 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
2081 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
2084 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2086 BlockDriver *drv = bs->drv;
2087 if (!drv)
2088 return -ENOMEDIUM;
2089 if (!drv->bdrv_get_info)
2090 return -ENOTSUP;
2091 memset(bdi, 0, sizeof(*bdi));
2092 return drv->bdrv_get_info(bs, bdi);
2095 int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
2096 int64_t pos, int size)
2098 BlockDriver *drv = bs->drv;
2099 if (!drv)
2100 return -ENOMEDIUM;
2101 if (drv->bdrv_save_vmstate)
2102 return drv->bdrv_save_vmstate(bs, buf, pos, size);
2103 if (bs->file)
2104 return bdrv_save_vmstate(bs->file, buf, pos, size);
2105 return -ENOTSUP;
2108 int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
2109 int64_t pos, int size)
2111 BlockDriver *drv = bs->drv;
2112 if (!drv)
2113 return -ENOMEDIUM;
2114 if (drv->bdrv_load_vmstate)
2115 return drv->bdrv_load_vmstate(bs, buf, pos, size);
2116 if (bs->file)
2117 return bdrv_load_vmstate(bs->file, buf, pos, size);
2118 return -ENOTSUP;
2121 void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
2123 BlockDriver *drv = bs->drv;
2125 if (!drv || !drv->bdrv_debug_event) {
2126 return;
2129 return drv->bdrv_debug_event(bs, event);
2133 /**************************************************************/
2134 /* handling of snapshots */
2136 int bdrv_can_snapshot(BlockDriverState *bs)
2138 BlockDriver *drv = bs->drv;
2139 if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
2140 return 0;
2143 if (!drv->bdrv_snapshot_create) {
2144 if (bs->file != NULL) {
2145 return bdrv_can_snapshot(bs->file);
2147 return 0;
2150 return 1;
2153 int bdrv_is_snapshot(BlockDriverState *bs)
2155 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
2158 BlockDriverState *bdrv_snapshots(void)
2160 BlockDriverState *bs;
2162 if (bs_snapshots) {
2163 return bs_snapshots;
2166 bs = NULL;
2167 while ((bs = bdrv_next(bs))) {
2168 if (bdrv_can_snapshot(bs)) {
2169 bs_snapshots = bs;
2170 return bs;
2173 return NULL;
2176 int bdrv_snapshot_create(BlockDriverState *bs,
2177 QEMUSnapshotInfo *sn_info)
2179 BlockDriver *drv = bs->drv;
2180 if (!drv)
2181 return -ENOMEDIUM;
2182 if (drv->bdrv_snapshot_create)
2183 return drv->bdrv_snapshot_create(bs, sn_info);
2184 if (bs->file)
2185 return bdrv_snapshot_create(bs->file, sn_info);
2186 return -ENOTSUP;
2189 int bdrv_snapshot_goto(BlockDriverState *bs,
2190 const char *snapshot_id)
2192 BlockDriver *drv = bs->drv;
2193 int ret, open_ret;
2195 if (!drv)
2196 return -ENOMEDIUM;
2197 if (drv->bdrv_snapshot_goto)
2198 return drv->bdrv_snapshot_goto(bs, snapshot_id);
2200 if (bs->file) {
2201 drv->bdrv_close(bs);
2202 ret = bdrv_snapshot_goto(bs->file, snapshot_id);
2203 open_ret = drv->bdrv_open(bs, bs->open_flags);
2204 if (open_ret < 0) {
2205 bdrv_delete(bs->file);
2206 bs->drv = NULL;
2207 return open_ret;
2209 return ret;
2212 return -ENOTSUP;
2215 int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
2217 BlockDriver *drv = bs->drv;
2218 if (!drv)
2219 return -ENOMEDIUM;
2220 if (drv->bdrv_snapshot_delete)
2221 return drv->bdrv_snapshot_delete(bs, snapshot_id);
2222 if (bs->file)
2223 return bdrv_snapshot_delete(bs->file, snapshot_id);
2224 return -ENOTSUP;
2227 int bdrv_snapshot_list(BlockDriverState *bs,
2228 QEMUSnapshotInfo **psn_info)
2230 BlockDriver *drv = bs->drv;
2231 if (!drv)
2232 return -ENOMEDIUM;
2233 if (drv->bdrv_snapshot_list)
2234 return drv->bdrv_snapshot_list(bs, psn_info);
2235 if (bs->file)
2236 return bdrv_snapshot_list(bs->file, psn_info);
2237 return -ENOTSUP;
2240 int bdrv_snapshot_load_tmp(BlockDriverState *bs,
2241 const char *snapshot_name)
2243 BlockDriver *drv = bs->drv;
2244 if (!drv) {
2245 return -ENOMEDIUM;
2247 if (!bs->read_only) {
2248 return -EINVAL;
2250 if (drv->bdrv_snapshot_load_tmp) {
2251 return drv->bdrv_snapshot_load_tmp(bs, snapshot_name);
2253 return -ENOTSUP;
2256 #define NB_SUFFIXES 4
2258 char *get_human_readable_size(char *buf, int buf_size, int64_t size)
2260 static const char suffixes[NB_SUFFIXES] = "KMGT";
2261 int64_t base;
2262 int i;
2264 if (size <= 999) {
2265 snprintf(buf, buf_size, "%" PRId64, size);
2266 } else {
2267 base = 1024;
2268 for(i = 0; i < NB_SUFFIXES; i++) {
2269 if (size < (10 * base)) {
2270 snprintf(buf, buf_size, "%0.1f%c",
2271 (double)size / base,
2272 suffixes[i]);
2273 break;
2274 } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
2275 snprintf(buf, buf_size, "%" PRId64 "%c",
2276 ((size + (base >> 1)) / base),
2277 suffixes[i]);
2278 break;
2280 base = base * 1024;
2283 return buf;
2286 char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
2288 char buf1[128], date_buf[128], clock_buf[128];
2289 #ifdef _WIN32
2290 struct tm *ptm;
2291 #else
2292 struct tm tm;
2293 #endif
2294 time_t ti;
2295 int64_t secs;
2297 if (!sn) {
2298 snprintf(buf, buf_size,
2299 "%-10s%-20s%7s%20s%15s",
2300 "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
2301 } else {
2302 ti = sn->date_sec;
2303 #ifdef _WIN32
2304 ptm = localtime(&ti);
2305 strftime(date_buf, sizeof(date_buf),
2306 "%Y-%m-%d %H:%M:%S", ptm);
2307 #else
2308 localtime_r(&ti, &tm);
2309 strftime(date_buf, sizeof(date_buf),
2310 "%Y-%m-%d %H:%M:%S", &tm);
2311 #endif
2312 secs = sn->vm_clock_nsec / 1000000000;
2313 snprintf(clock_buf, sizeof(clock_buf),
2314 "%02d:%02d:%02d.%03d",
2315 (int)(secs / 3600),
2316 (int)((secs / 60) % 60),
2317 (int)(secs % 60),
2318 (int)((sn->vm_clock_nsec / 1000000) % 1000));
2319 snprintf(buf, buf_size,
2320 "%-10s%-20s%7s%20s%15s",
2321 sn->id_str, sn->name,
2322 get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
2323 date_buf,
2324 clock_buf);
2326 return buf;
2329 /**************************************************************/
2330 /* async I/Os */
2332 BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
2333 QEMUIOVector *qiov, int nb_sectors,
2334 BlockDriverCompletionFunc *cb, void *opaque)
2336 BlockDriver *drv = bs->drv;
2338 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
2340 if (!drv)
2341 return NULL;
2342 if (bdrv_check_request(bs, sector_num, nb_sectors))
2343 return NULL;
2345 return drv->bdrv_aio_readv(bs, sector_num, qiov, nb_sectors,
2346 cb, opaque);
2349 typedef struct BlockCompleteData {
2350 BlockDriverCompletionFunc *cb;
2351 void *opaque;
2352 BlockDriverState *bs;
2353 int64_t sector_num;
2354 int nb_sectors;
2355 } BlockCompleteData;
2357 static void block_complete_cb(void *opaque, int ret)
2359 BlockCompleteData *b = opaque;
2361 if (b->bs->dirty_bitmap) {
2362 set_dirty_bitmap(b->bs, b->sector_num, b->nb_sectors, 1);
2364 b->cb(b->opaque, ret);
2365 g_free(b);
2368 static BlockCompleteData *blk_dirty_cb_alloc(BlockDriverState *bs,
2369 int64_t sector_num,
2370 int nb_sectors,
2371 BlockDriverCompletionFunc *cb,
2372 void *opaque)
2374 BlockCompleteData *blkdata = g_malloc0(sizeof(BlockCompleteData));
2376 blkdata->bs = bs;
2377 blkdata->cb = cb;
2378 blkdata->opaque = opaque;
2379 blkdata->sector_num = sector_num;
2380 blkdata->nb_sectors = nb_sectors;
2382 return blkdata;
2385 BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
2386 QEMUIOVector *qiov, int nb_sectors,
2387 BlockDriverCompletionFunc *cb, void *opaque)
2389 BlockDriver *drv = bs->drv;
2390 BlockDriverAIOCB *ret;
2391 BlockCompleteData *blk_cb_data;
2393 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
2395 if (!drv)
2396 return NULL;
2397 if (bs->read_only)
2398 return NULL;
2399 if (bdrv_check_request(bs, sector_num, nb_sectors))
2400 return NULL;
2402 if (bs->dirty_bitmap) {
2403 blk_cb_data = blk_dirty_cb_alloc(bs, sector_num, nb_sectors, cb,
2404 opaque);
2405 cb = &block_complete_cb;
2406 opaque = blk_cb_data;
2409 ret = drv->bdrv_aio_writev(bs, sector_num, qiov, nb_sectors,
2410 cb, opaque);
2412 if (ret) {
2413 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
2414 bs->wr_highest_sector = sector_num + nb_sectors - 1;
2418 return ret;
2422 typedef struct MultiwriteCB {
2423 int error;
2424 int num_requests;
2425 int num_callbacks;
2426 struct {
2427 BlockDriverCompletionFunc *cb;
2428 void *opaque;
2429 QEMUIOVector *free_qiov;
2430 void *free_buf;
2431 } callbacks[];
2432 } MultiwriteCB;
2434 static void multiwrite_user_cb(MultiwriteCB *mcb)
2436 int i;
2438 for (i = 0; i < mcb->num_callbacks; i++) {
2439 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
2440 if (mcb->callbacks[i].free_qiov) {
2441 qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
2443 g_free(mcb->callbacks[i].free_qiov);
2444 qemu_vfree(mcb->callbacks[i].free_buf);
2448 static void multiwrite_cb(void *opaque, int ret)
2450 MultiwriteCB *mcb = opaque;
2452 trace_multiwrite_cb(mcb, ret);
2454 if (ret < 0 && !mcb->error) {
2455 mcb->error = ret;
2458 mcb->num_requests--;
2459 if (mcb->num_requests == 0) {
2460 multiwrite_user_cb(mcb);
2461 g_free(mcb);
2465 static int multiwrite_req_compare(const void *a, const void *b)
2467 const BlockRequest *req1 = a, *req2 = b;
2470 * Note that we can't simply subtract req2->sector from req1->sector
2471 * here as that could overflow the return value.
2473 if (req1->sector > req2->sector) {
2474 return 1;
2475 } else if (req1->sector < req2->sector) {
2476 return -1;
2477 } else {
2478 return 0;
2483 * Takes a bunch of requests and tries to merge them. Returns the number of
2484 * requests that remain after merging.
2486 static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
2487 int num_reqs, MultiwriteCB *mcb)
2489 int i, outidx;
2491 // Sort requests by start sector
2492 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
2494 // Check if adjacent requests touch the same clusters. If so, combine them,
2495 // filling up gaps with zero sectors.
2496 outidx = 0;
2497 for (i = 1; i < num_reqs; i++) {
2498 int merge = 0;
2499 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
2501 // This handles the cases that are valid for all block drivers, namely
2502 // exactly sequential writes and overlapping writes.
2503 if (reqs[i].sector <= oldreq_last) {
2504 merge = 1;
2507 // The block driver may decide that it makes sense to combine requests
2508 // even if there is a gap of some sectors between them. In this case,
2509 // the gap is filled with zeros (therefore only applicable for yet
2510 // unused space in format like qcow2).
2511 if (!merge && bs->drv->bdrv_merge_requests) {
2512 merge = bs->drv->bdrv_merge_requests(bs, &reqs[outidx], &reqs[i]);
2515 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
2516 merge = 0;
2519 if (merge) {
2520 size_t size;
2521 QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
2522 qemu_iovec_init(qiov,
2523 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
2525 // Add the first request to the merged one. If the requests are
2526 // overlapping, drop the last sectors of the first request.
2527 size = (reqs[i].sector - reqs[outidx].sector) << 9;
2528 qemu_iovec_concat(qiov, reqs[outidx].qiov, size);
2530 // We might need to add some zeros between the two requests
2531 if (reqs[i].sector > oldreq_last) {
2532 size_t zero_bytes = (reqs[i].sector - oldreq_last) << 9;
2533 uint8_t *buf = qemu_blockalign(bs, zero_bytes);
2534 memset(buf, 0, zero_bytes);
2535 qemu_iovec_add(qiov, buf, zero_bytes);
2536 mcb->callbacks[i].free_buf = buf;
2539 // Add the second request
2540 qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size);
2542 reqs[outidx].nb_sectors = qiov->size >> 9;
2543 reqs[outidx].qiov = qiov;
2545 mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
2546 } else {
2547 outidx++;
2548 reqs[outidx].sector = reqs[i].sector;
2549 reqs[outidx].nb_sectors = reqs[i].nb_sectors;
2550 reqs[outidx].qiov = reqs[i].qiov;
2554 return outidx + 1;
2558 * Submit multiple AIO write requests at once.
2560 * On success, the function returns 0 and all requests in the reqs array have
2561 * been submitted. In error case this function returns -1, and any of the
2562 * requests may or may not be submitted yet. In particular, this means that the
2563 * callback will be called for some of the requests, for others it won't. The
2564 * caller must check the error field of the BlockRequest to wait for the right
2565 * callbacks (if error != 0, no callback will be called).
2567 * The implementation may modify the contents of the reqs array, e.g. to merge
2568 * requests. However, the fields opaque and error are left unmodified as they
2569 * are used to signal failure for a single request to the caller.
2571 int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
2573 BlockDriverAIOCB *acb;
2574 MultiwriteCB *mcb;
2575 int i;
2577 /* don't submit writes if we don't have a medium */
2578 if (bs->drv == NULL) {
2579 for (i = 0; i < num_reqs; i++) {
2580 reqs[i].error = -ENOMEDIUM;
2582 return -1;
2585 if (num_reqs == 0) {
2586 return 0;
2589 // Create MultiwriteCB structure
2590 mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
2591 mcb->num_requests = 0;
2592 mcb->num_callbacks = num_reqs;
2594 for (i = 0; i < num_reqs; i++) {
2595 mcb->callbacks[i].cb = reqs[i].cb;
2596 mcb->callbacks[i].opaque = reqs[i].opaque;
2599 // Check for mergable requests
2600 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
2602 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
2605 * Run the aio requests. As soon as one request can't be submitted
2606 * successfully, fail all requests that are not yet submitted (we must
2607 * return failure for all requests anyway)
2609 * num_requests cannot be set to the right value immediately: If
2610 * bdrv_aio_writev fails for some request, num_requests would be too high
2611 * and therefore multiwrite_cb() would never recognize the multiwrite
2612 * request as completed. We also cannot use the loop variable i to set it
2613 * when the first request fails because the callback may already have been
2614 * called for previously submitted requests. Thus, num_requests must be
2615 * incremented for each request that is submitted.
2617 * The problem that callbacks may be called early also means that we need
2618 * to take care that num_requests doesn't become 0 before all requests are
2619 * submitted - multiwrite_cb() would consider the multiwrite request
2620 * completed. A dummy request that is "completed" by a manual call to
2621 * multiwrite_cb() takes care of this.
2623 mcb->num_requests = 1;
2625 // Run the aio requests
2626 for (i = 0; i < num_reqs; i++) {
2627 mcb->num_requests++;
2628 acb = bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
2629 reqs[i].nb_sectors, multiwrite_cb, mcb);
2631 if (acb == NULL) {
2632 // We can only fail the whole thing if no request has been
2633 // submitted yet. Otherwise we'll wait for the submitted AIOs to
2634 // complete and report the error in the callback.
2635 if (i == 0) {
2636 trace_bdrv_aio_multiwrite_earlyfail(mcb);
2637 goto fail;
2638 } else {
2639 trace_bdrv_aio_multiwrite_latefail(mcb, i);
2640 multiwrite_cb(mcb, -EIO);
2641 break;
2646 /* Complete the dummy request */
2647 multiwrite_cb(mcb, 0);
2649 return 0;
2651 fail:
2652 for (i = 0; i < mcb->num_callbacks; i++) {
2653 reqs[i].error = -EIO;
2655 g_free(mcb);
2656 return -1;
2659 BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
2660 BlockDriverCompletionFunc *cb, void *opaque)
2662 BlockDriver *drv = bs->drv;
2664 trace_bdrv_aio_flush(bs, opaque);
2666 if (bs->open_flags & BDRV_O_NO_FLUSH) {
2667 return bdrv_aio_noop_em(bs, cb, opaque);
2670 if (!drv)
2671 return NULL;
2672 return drv->bdrv_aio_flush(bs, cb, opaque);
2675 void bdrv_aio_cancel(BlockDriverAIOCB *acb)
2677 acb->pool->cancel(acb);
2681 /**************************************************************/
2682 /* async block device emulation */
2684 typedef struct BlockDriverAIOCBSync {
2685 BlockDriverAIOCB common;
2686 QEMUBH *bh;
2687 int ret;
2688 /* vector translation state */
2689 QEMUIOVector *qiov;
2690 uint8_t *bounce;
2691 int is_write;
2692 } BlockDriverAIOCBSync;
2694 static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
2696 BlockDriverAIOCBSync *acb =
2697 container_of(blockacb, BlockDriverAIOCBSync, common);
2698 qemu_bh_delete(acb->bh);
2699 acb->bh = NULL;
2700 qemu_aio_release(acb);
2703 static AIOPool bdrv_em_aio_pool = {
2704 .aiocb_size = sizeof(BlockDriverAIOCBSync),
2705 .cancel = bdrv_aio_cancel_em,
2708 static void bdrv_aio_bh_cb(void *opaque)
2710 BlockDriverAIOCBSync *acb = opaque;
2712 if (!acb->is_write)
2713 qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
2714 qemu_vfree(acb->bounce);
2715 acb->common.cb(acb->common.opaque, acb->ret);
2716 qemu_bh_delete(acb->bh);
2717 acb->bh = NULL;
2718 qemu_aio_release(acb);
2721 static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
2722 int64_t sector_num,
2723 QEMUIOVector *qiov,
2724 int nb_sectors,
2725 BlockDriverCompletionFunc *cb,
2726 void *opaque,
2727 int is_write)
2730 BlockDriverAIOCBSync *acb;
2732 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2733 acb->is_write = is_write;
2734 acb->qiov = qiov;
2735 acb->bounce = qemu_blockalign(bs, qiov->size);
2737 if (!acb->bh)
2738 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2740 if (is_write) {
2741 qemu_iovec_to_buffer(acb->qiov, acb->bounce);
2742 acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
2743 } else {
2744 acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
2747 qemu_bh_schedule(acb->bh);
2749 return &acb->common;
2752 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
2753 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2754 BlockDriverCompletionFunc *cb, void *opaque)
2756 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
2759 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
2760 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2761 BlockDriverCompletionFunc *cb, void *opaque)
2763 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
2767 typedef struct BlockDriverAIOCBCoroutine {
2768 BlockDriverAIOCB common;
2769 BlockRequest req;
2770 bool is_write;
2771 QEMUBH* bh;
2772 } BlockDriverAIOCBCoroutine;
2774 static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
2776 qemu_aio_flush();
2779 static AIOPool bdrv_em_co_aio_pool = {
2780 .aiocb_size = sizeof(BlockDriverAIOCBCoroutine),
2781 .cancel = bdrv_aio_co_cancel_em,
2784 static void bdrv_co_rw_bh(void *opaque)
2786 BlockDriverAIOCBCoroutine *acb = opaque;
2788 acb->common.cb(acb->common.opaque, acb->req.error);
2789 qemu_bh_delete(acb->bh);
2790 qemu_aio_release(acb);
2793 static void coroutine_fn bdrv_co_rw(void *opaque)
2795 BlockDriverAIOCBCoroutine *acb = opaque;
2796 BlockDriverState *bs = acb->common.bs;
2798 if (!acb->is_write) {
2799 acb->req.error = bs->drv->bdrv_co_readv(bs, acb->req.sector,
2800 acb->req.nb_sectors, acb->req.qiov);
2801 } else {
2802 acb->req.error = bs->drv->bdrv_co_writev(bs, acb->req.sector,
2803 acb->req.nb_sectors, acb->req.qiov);
2806 acb->bh = qemu_bh_new(bdrv_co_rw_bh, acb);
2807 qemu_bh_schedule(acb->bh);
2810 static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
2811 int64_t sector_num,
2812 QEMUIOVector *qiov,
2813 int nb_sectors,
2814 BlockDriverCompletionFunc *cb,
2815 void *opaque,
2816 bool is_write)
2818 Coroutine *co;
2819 BlockDriverAIOCBCoroutine *acb;
2821 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
2822 acb->req.sector = sector_num;
2823 acb->req.nb_sectors = nb_sectors;
2824 acb->req.qiov = qiov;
2825 acb->is_write = is_write;
2827 co = qemu_coroutine_create(bdrv_co_rw);
2828 qemu_coroutine_enter(co, acb);
2830 return &acb->common;
2833 static BlockDriverAIOCB *bdrv_co_aio_readv_em(BlockDriverState *bs,
2834 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2835 BlockDriverCompletionFunc *cb, void *opaque)
2837 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque,
2838 false);
2841 static BlockDriverAIOCB *bdrv_co_aio_writev_em(BlockDriverState *bs,
2842 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2843 BlockDriverCompletionFunc *cb, void *opaque)
2845 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque,
2846 true);
2849 static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
2850 BlockDriverCompletionFunc *cb, void *opaque)
2852 BlockDriverAIOCBSync *acb;
2854 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2855 acb->is_write = 1; /* don't bounce in the completion hadler */
2856 acb->qiov = NULL;
2857 acb->bounce = NULL;
2858 acb->ret = 0;
2860 if (!acb->bh)
2861 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2863 bdrv_flush(bs);
2864 qemu_bh_schedule(acb->bh);
2865 return &acb->common;
2868 static BlockDriverAIOCB *bdrv_aio_noop_em(BlockDriverState *bs,
2869 BlockDriverCompletionFunc *cb, void *opaque)
2871 BlockDriverAIOCBSync *acb;
2873 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2874 acb->is_write = 1; /* don't bounce in the completion handler */
2875 acb->qiov = NULL;
2876 acb->bounce = NULL;
2877 acb->ret = 0;
2879 if (!acb->bh) {
2880 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2883 qemu_bh_schedule(acb->bh);
2884 return &acb->common;
2887 /**************************************************************/
2888 /* sync block device emulation */
2890 static void bdrv_rw_em_cb(void *opaque, int ret)
2892 *(int *)opaque = ret;
2895 #define NOT_DONE 0x7fffffff
2897 static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
2898 uint8_t *buf, int nb_sectors)
2900 int async_ret;
2901 BlockDriverAIOCB *acb;
2902 struct iovec iov;
2903 QEMUIOVector qiov;
2905 async_ret = NOT_DONE;
2906 iov.iov_base = (void *)buf;
2907 iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE;
2908 qemu_iovec_init_external(&qiov, &iov, 1);
2910 acb = bs->drv->bdrv_aio_readv(bs, sector_num, &qiov, nb_sectors,
2911 bdrv_rw_em_cb, &async_ret);
2912 if (acb == NULL) {
2913 async_ret = -1;
2914 goto fail;
2917 while (async_ret == NOT_DONE) {
2918 qemu_aio_wait();
2922 fail:
2923 return async_ret;
2926 static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
2927 const uint8_t *buf, int nb_sectors)
2929 int async_ret;
2930 BlockDriverAIOCB *acb;
2931 struct iovec iov;
2932 QEMUIOVector qiov;
2934 async_ret = NOT_DONE;
2935 iov.iov_base = (void *)buf;
2936 iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE;
2937 qemu_iovec_init_external(&qiov, &iov, 1);
2939 acb = bs->drv->bdrv_aio_writev(bs, sector_num, &qiov, nb_sectors,
2940 bdrv_rw_em_cb, &async_ret);
2941 if (acb == NULL) {
2942 async_ret = -1;
2943 goto fail;
2945 while (async_ret == NOT_DONE) {
2946 qemu_aio_wait();
2949 fail:
2950 return async_ret;
2953 void bdrv_init(void)
2955 module_call_init(MODULE_INIT_BLOCK);
2958 void bdrv_init_with_whitelist(void)
2960 use_bdrv_whitelist = 1;
2961 bdrv_init();
2964 void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
2965 BlockDriverCompletionFunc *cb, void *opaque)
2967 BlockDriverAIOCB *acb;
2969 if (pool->free_aiocb) {
2970 acb = pool->free_aiocb;
2971 pool->free_aiocb = acb->next;
2972 } else {
2973 acb = g_malloc0(pool->aiocb_size);
2974 acb->pool = pool;
2976 acb->bs = bs;
2977 acb->cb = cb;
2978 acb->opaque = opaque;
2979 return acb;
2982 void qemu_aio_release(void *p)
2984 BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
2985 AIOPool *pool = acb->pool;
2986 acb->next = pool->free_aiocb;
2987 pool->free_aiocb = acb;
2990 /**************************************************************/
2991 /* Coroutine block device emulation */
2993 typedef struct CoroutineIOCompletion {
2994 Coroutine *coroutine;
2995 int ret;
2996 } CoroutineIOCompletion;
2998 static void bdrv_co_io_em_complete(void *opaque, int ret)
3000 CoroutineIOCompletion *co = opaque;
3002 co->ret = ret;
3003 qemu_coroutine_enter(co->coroutine, NULL);
3006 static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
3007 int nb_sectors, QEMUIOVector *iov,
3008 bool is_write)
3010 CoroutineIOCompletion co = {
3011 .coroutine = qemu_coroutine_self(),
3013 BlockDriverAIOCB *acb;
3015 if (is_write) {
3016 acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
3017 bdrv_co_io_em_complete, &co);
3018 } else {
3019 acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
3020 bdrv_co_io_em_complete, &co);
3023 trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
3024 if (!acb) {
3025 return -EIO;
3027 qemu_coroutine_yield();
3029 return co.ret;
3032 static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
3033 int64_t sector_num, int nb_sectors,
3034 QEMUIOVector *iov)
3036 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
3039 static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
3040 int64_t sector_num, int nb_sectors,
3041 QEMUIOVector *iov)
3043 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
3046 static int coroutine_fn bdrv_co_flush_em(BlockDriverState *bs)
3048 CoroutineIOCompletion co = {
3049 .coroutine = qemu_coroutine_self(),
3051 BlockDriverAIOCB *acb;
3053 acb = bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
3054 if (!acb) {
3055 return -EIO;
3057 qemu_coroutine_yield();
3058 return co.ret;
3061 /**************************************************************/
3062 /* removable device support */
3065 * Return TRUE if the media is present
3067 int bdrv_is_inserted(BlockDriverState *bs)
3069 BlockDriver *drv = bs->drv;
3071 if (!drv)
3072 return 0;
3073 if (!drv->bdrv_is_inserted)
3074 return 1;
3075 return drv->bdrv_is_inserted(bs);
3079 * Return whether the media changed since the last call to this
3080 * function, or -ENOTSUP if we don't know. Most drivers don't know.
3082 int bdrv_media_changed(BlockDriverState *bs)
3084 BlockDriver *drv = bs->drv;
3086 if (drv && drv->bdrv_media_changed) {
3087 return drv->bdrv_media_changed(bs);
3089 return -ENOTSUP;
3093 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3095 void bdrv_eject(BlockDriverState *bs, int eject_flag)
3097 BlockDriver *drv = bs->drv;
3099 if (drv && drv->bdrv_eject) {
3100 drv->bdrv_eject(bs, eject_flag);
3105 * Lock or unlock the media (if it is locked, the user won't be able
3106 * to eject it manually).
3108 void bdrv_lock_medium(BlockDriverState *bs, bool locked)
3110 BlockDriver *drv = bs->drv;
3112 trace_bdrv_lock_medium(bs, locked);
3114 if (drv && drv->bdrv_lock_medium) {
3115 drv->bdrv_lock_medium(bs, locked);
3119 /* needed for generic scsi interface */
3121 int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
3123 BlockDriver *drv = bs->drv;
3125 if (drv && drv->bdrv_ioctl)
3126 return drv->bdrv_ioctl(bs, req, buf);
3127 return -ENOTSUP;
3130 BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
3131 unsigned long int req, void *buf,
3132 BlockDriverCompletionFunc *cb, void *opaque)
3134 BlockDriver *drv = bs->drv;
3136 if (drv && drv->bdrv_aio_ioctl)
3137 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
3138 return NULL;
3141 void bdrv_set_buffer_alignment(BlockDriverState *bs, int align)
3143 bs->buffer_alignment = align;
3146 void *qemu_blockalign(BlockDriverState *bs, size_t size)
3148 return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
3151 void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
3153 int64_t bitmap_size;
3155 bs->dirty_count = 0;
3156 if (enable) {
3157 if (!bs->dirty_bitmap) {
3158 bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
3159 BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
3160 bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
3162 bs->dirty_bitmap = g_malloc0(bitmap_size);
3164 } else {
3165 if (bs->dirty_bitmap) {
3166 g_free(bs->dirty_bitmap);
3167 bs->dirty_bitmap = NULL;
3172 int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
3174 int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
3176 if (bs->dirty_bitmap &&
3177 (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
3178 return !!(bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] &
3179 (1UL << (chunk % (sizeof(unsigned long) * 8))));
3180 } else {
3181 return 0;
3185 void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
3186 int nr_sectors)
3188 set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
3191 int64_t bdrv_get_dirty_count(BlockDriverState *bs)
3193 return bs->dirty_count;
3196 void bdrv_set_in_use(BlockDriverState *bs, int in_use)
3198 assert(bs->in_use != in_use);
3199 bs->in_use = in_use;
3202 int bdrv_in_use(BlockDriverState *bs)
3204 return bs->in_use;
3207 void bdrv_iostatus_enable(BlockDriverState *bs)
3209 bs->iostatus = BDRV_IOS_OK;
3212 /* The I/O status is only enabled if the drive explicitly
3213 * enables it _and_ the VM is configured to stop on errors */
3214 bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
3216 return (bs->iostatus != BDRV_IOS_INVAL &&
3217 (bs->on_write_error == BLOCK_ERR_STOP_ENOSPC ||
3218 bs->on_write_error == BLOCK_ERR_STOP_ANY ||
3219 bs->on_read_error == BLOCK_ERR_STOP_ANY));
3222 void bdrv_iostatus_disable(BlockDriverState *bs)
3224 bs->iostatus = BDRV_IOS_INVAL;
3227 void bdrv_iostatus_reset(BlockDriverState *bs)
3229 if (bdrv_iostatus_is_enabled(bs)) {
3230 bs->iostatus = BDRV_IOS_OK;
3234 /* XXX: Today this is set by device models because it makes the implementation
3235 quite simple. However, the block layer knows about the error, so it's
3236 possible to implement this without device models being involved */
3237 void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
3239 if (bdrv_iostatus_is_enabled(bs) && bs->iostatus == BDRV_IOS_OK) {
3240 assert(error >= 0);
3241 bs->iostatus = error == ENOSPC ? BDRV_IOS_ENOSPC : BDRV_IOS_FAILED;
3245 void
3246 bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
3247 enum BlockAcctType type)
3249 assert(type < BDRV_MAX_IOTYPE);
3251 cookie->bytes = bytes;
3252 cookie->start_time_ns = get_clock();
3253 cookie->type = type;
3256 void
3257 bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie)
3259 assert(cookie->type < BDRV_MAX_IOTYPE);
3261 bs->nr_bytes[cookie->type] += cookie->bytes;
3262 bs->nr_ops[cookie->type]++;
3263 bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
3266 int bdrv_img_create(const char *filename, const char *fmt,
3267 const char *base_filename, const char *base_fmt,
3268 char *options, uint64_t img_size, int flags)
3270 QEMUOptionParameter *param = NULL, *create_options = NULL;
3271 QEMUOptionParameter *backing_fmt, *backing_file, *size;
3272 BlockDriverState *bs = NULL;
3273 BlockDriver *drv, *proto_drv;
3274 BlockDriver *backing_drv = NULL;
3275 int ret = 0;
3277 /* Find driver and parse its options */
3278 drv = bdrv_find_format(fmt);
3279 if (!drv) {
3280 error_report("Unknown file format '%s'", fmt);
3281 ret = -EINVAL;
3282 goto out;
3285 proto_drv = bdrv_find_protocol(filename);
3286 if (!proto_drv) {
3287 error_report("Unknown protocol '%s'", filename);
3288 ret = -EINVAL;
3289 goto out;
3292 create_options = append_option_parameters(create_options,
3293 drv->create_options);
3294 create_options = append_option_parameters(create_options,
3295 proto_drv->create_options);
3297 /* Create parameter list with default values */
3298 param = parse_option_parameters("", create_options, param);
3300 set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
3302 /* Parse -o options */
3303 if (options) {
3304 param = parse_option_parameters(options, create_options, param);
3305 if (param == NULL) {
3306 error_report("Invalid options for file format '%s'.", fmt);
3307 ret = -EINVAL;
3308 goto out;
3312 if (base_filename) {
3313 if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
3314 base_filename)) {
3315 error_report("Backing file not supported for file format '%s'",
3316 fmt);
3317 ret = -EINVAL;
3318 goto out;
3322 if (base_fmt) {
3323 if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
3324 error_report("Backing file format not supported for file "
3325 "format '%s'", fmt);
3326 ret = -EINVAL;
3327 goto out;
3331 backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
3332 if (backing_file && backing_file->value.s) {
3333 if (!strcmp(filename, backing_file->value.s)) {
3334 error_report("Error: Trying to create an image with the "
3335 "same filename as the backing file");
3336 ret = -EINVAL;
3337 goto out;
3341 backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
3342 if (backing_fmt && backing_fmt->value.s) {
3343 backing_drv = bdrv_find_format(backing_fmt->value.s);
3344 if (!backing_drv) {
3345 error_report("Unknown backing file format '%s'",
3346 backing_fmt->value.s);
3347 ret = -EINVAL;
3348 goto out;
3352 // The size for the image must always be specified, with one exception:
3353 // If we are using a backing file, we can obtain the size from there
3354 size = get_option_parameter(param, BLOCK_OPT_SIZE);
3355 if (size && size->value.n == -1) {
3356 if (backing_file && backing_file->value.s) {
3357 uint64_t size;
3358 char buf[32];
3360 bs = bdrv_new("");
3362 ret = bdrv_open(bs, backing_file->value.s, flags, backing_drv);
3363 if (ret < 0) {
3364 error_report("Could not open '%s'", backing_file->value.s);
3365 goto out;
3367 bdrv_get_geometry(bs, &size);
3368 size *= 512;
3370 snprintf(buf, sizeof(buf), "%" PRId64, size);
3371 set_option_parameter(param, BLOCK_OPT_SIZE, buf);
3372 } else {
3373 error_report("Image creation needs a size parameter");
3374 ret = -EINVAL;
3375 goto out;
3379 printf("Formatting '%s', fmt=%s ", filename, fmt);
3380 print_option_parameters(param);
3381 puts("");
3383 ret = bdrv_create(drv, filename, param);
3385 if (ret < 0) {
3386 if (ret == -ENOTSUP) {
3387 error_report("Formatting or formatting option not supported for "
3388 "file format '%s'", fmt);
3389 } else if (ret == -EFBIG) {
3390 error_report("The image size is too large for file format '%s'",
3391 fmt);
3392 } else {
3393 error_report("%s: error while creating %s: %s", filename, fmt,
3394 strerror(-ret));
3398 out:
3399 free_option_parameters(create_options);
3400 free_option_parameters(param);
3402 if (bs) {
3403 bdrv_delete(bs);
3406 return ret;