e1000: Use PCI DMA stub functions
[qemu.git] / block.c
blob9bb236c98940f7f4d294c52ded605f1972062759
1 /*
2 * QEMU System Emulator block driver
4 * Copyright (c) 2003 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
24 #include "config-host.h"
25 #include "qemu-common.h"
26 #include "trace.h"
27 #include "monitor.h"
28 #include "block_int.h"
29 #include "module.h"
30 #include "qjson.h"
31 #include "qemu-coroutine.h"
32 #include "qmp-commands.h"
34 #ifdef CONFIG_BSD
35 #include <sys/types.h>
36 #include <sys/stat.h>
37 #include <sys/ioctl.h>
38 #include <sys/queue.h>
39 #ifndef __DragonFly__
40 #include <sys/disk.h>
41 #endif
42 #endif
44 #ifdef _WIN32
45 #include <windows.h>
46 #endif
48 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
50 static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
51 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
52 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
53 BlockDriverCompletionFunc *cb, void *opaque);
54 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
55 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
56 BlockDriverCompletionFunc *cb, void *opaque);
57 static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
58 int64_t sector_num, int nb_sectors,
59 QEMUIOVector *iov);
60 static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
61 int64_t sector_num, int nb_sectors,
62 QEMUIOVector *iov);
63 static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
64 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
65 static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
66 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
67 static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
68 int64_t sector_num,
69 QEMUIOVector *qiov,
70 int nb_sectors,
71 BlockDriverCompletionFunc *cb,
72 void *opaque,
73 bool is_write);
74 static void coroutine_fn bdrv_co_do_rw(void *opaque);
76 static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
77 QTAILQ_HEAD_INITIALIZER(bdrv_states);
79 static QLIST_HEAD(, BlockDriver) bdrv_drivers =
80 QLIST_HEAD_INITIALIZER(bdrv_drivers);
82 /* The device to use for VM snapshots */
83 static BlockDriverState *bs_snapshots;
85 /* If non-zero, use only whitelisted block drivers */
86 static int use_bdrv_whitelist;
88 #ifdef _WIN32
89 static int is_windows_drive_prefix(const char *filename)
91 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
92 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
93 filename[1] == ':');
96 int is_windows_drive(const char *filename)
98 if (is_windows_drive_prefix(filename) &&
99 filename[2] == '\0')
100 return 1;
101 if (strstart(filename, "\\\\.\\", NULL) ||
102 strstart(filename, "//./", NULL))
103 return 1;
104 return 0;
106 #endif
108 /* check if the path starts with "<protocol>:" */
109 static int path_has_protocol(const char *path)
111 #ifdef _WIN32
112 if (is_windows_drive(path) ||
113 is_windows_drive_prefix(path)) {
114 return 0;
116 #endif
118 return strchr(path, ':') != NULL;
121 int path_is_absolute(const char *path)
123 const char *p;
124 #ifdef _WIN32
125 /* specific case for names like: "\\.\d:" */
126 if (*path == '/' || *path == '\\')
127 return 1;
128 #endif
129 p = strchr(path, ':');
130 if (p)
131 p++;
132 else
133 p = path;
134 #ifdef _WIN32
135 return (*p == '/' || *p == '\\');
136 #else
137 return (*p == '/');
138 #endif
141 /* if filename is absolute, just copy it to dest. Otherwise, build a
142 path to it by considering it is relative to base_path. URL are
143 supported. */
144 void path_combine(char *dest, int dest_size,
145 const char *base_path,
146 const char *filename)
148 const char *p, *p1;
149 int len;
151 if (dest_size <= 0)
152 return;
153 if (path_is_absolute(filename)) {
154 pstrcpy(dest, dest_size, filename);
155 } else {
156 p = strchr(base_path, ':');
157 if (p)
158 p++;
159 else
160 p = base_path;
161 p1 = strrchr(base_path, '/');
162 #ifdef _WIN32
164 const char *p2;
165 p2 = strrchr(base_path, '\\');
166 if (!p1 || p2 > p1)
167 p1 = p2;
169 #endif
170 if (p1)
171 p1++;
172 else
173 p1 = base_path;
174 if (p1 > p)
175 p = p1;
176 len = p - base_path;
177 if (len > dest_size - 1)
178 len = dest_size - 1;
179 memcpy(dest, base_path, len);
180 dest[len] = '\0';
181 pstrcat(dest, dest_size, filename);
185 void bdrv_register(BlockDriver *bdrv)
187 /* Block drivers without coroutine functions need emulation */
188 if (!bdrv->bdrv_co_readv) {
189 bdrv->bdrv_co_readv = bdrv_co_readv_em;
190 bdrv->bdrv_co_writev = bdrv_co_writev_em;
192 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
193 * the block driver lacks aio we need to emulate that too.
195 if (!bdrv->bdrv_aio_readv) {
196 /* add AIO emulation layer */
197 bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
198 bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
202 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
205 /* create a new block device (by default it is empty) */
206 BlockDriverState *bdrv_new(const char *device_name)
208 BlockDriverState *bs;
210 bs = g_malloc0(sizeof(BlockDriverState));
211 pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
212 if (device_name[0] != '\0') {
213 QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
215 bdrv_iostatus_disable(bs);
216 return bs;
219 BlockDriver *bdrv_find_format(const char *format_name)
221 BlockDriver *drv1;
222 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
223 if (!strcmp(drv1->format_name, format_name)) {
224 return drv1;
227 return NULL;
230 static int bdrv_is_whitelisted(BlockDriver *drv)
232 static const char *whitelist[] = {
233 CONFIG_BDRV_WHITELIST
235 const char **p;
237 if (!whitelist[0])
238 return 1; /* no whitelist, anything goes */
240 for (p = whitelist; *p; p++) {
241 if (!strcmp(drv->format_name, *p)) {
242 return 1;
245 return 0;
248 BlockDriver *bdrv_find_whitelisted_format(const char *format_name)
250 BlockDriver *drv = bdrv_find_format(format_name);
251 return drv && bdrv_is_whitelisted(drv) ? drv : NULL;
254 int bdrv_create(BlockDriver *drv, const char* filename,
255 QEMUOptionParameter *options)
257 if (!drv->bdrv_create)
258 return -ENOTSUP;
260 return drv->bdrv_create(filename, options);
263 int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
265 BlockDriver *drv;
267 drv = bdrv_find_protocol(filename);
268 if (drv == NULL) {
269 return -ENOENT;
272 return bdrv_create(drv, filename, options);
275 #ifdef _WIN32
276 void get_tmp_filename(char *filename, int size)
278 char temp_dir[MAX_PATH];
280 GetTempPath(MAX_PATH, temp_dir);
281 GetTempFileName(temp_dir, "qem", 0, filename);
283 #else
284 void get_tmp_filename(char *filename, int size)
286 int fd;
287 const char *tmpdir;
288 /* XXX: race condition possible */
289 tmpdir = getenv("TMPDIR");
290 if (!tmpdir)
291 tmpdir = "/tmp";
292 snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
293 fd = mkstemp(filename);
294 close(fd);
296 #endif
299 * Detect host devices. By convention, /dev/cdrom[N] is always
300 * recognized as a host CDROM.
302 static BlockDriver *find_hdev_driver(const char *filename)
304 int score_max = 0, score;
305 BlockDriver *drv = NULL, *d;
307 QLIST_FOREACH(d, &bdrv_drivers, list) {
308 if (d->bdrv_probe_device) {
309 score = d->bdrv_probe_device(filename);
310 if (score > score_max) {
311 score_max = score;
312 drv = d;
317 return drv;
320 BlockDriver *bdrv_find_protocol(const char *filename)
322 BlockDriver *drv1;
323 char protocol[128];
324 int len;
325 const char *p;
327 /* TODO Drivers without bdrv_file_open must be specified explicitly */
330 * XXX(hch): we really should not let host device detection
331 * override an explicit protocol specification, but moving this
332 * later breaks access to device names with colons in them.
333 * Thanks to the brain-dead persistent naming schemes on udev-
334 * based Linux systems those actually are quite common.
336 drv1 = find_hdev_driver(filename);
337 if (drv1) {
338 return drv1;
341 if (!path_has_protocol(filename)) {
342 return bdrv_find_format("file");
344 p = strchr(filename, ':');
345 assert(p != NULL);
346 len = p - filename;
347 if (len > sizeof(protocol) - 1)
348 len = sizeof(protocol) - 1;
349 memcpy(protocol, filename, len);
350 protocol[len] = '\0';
351 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
352 if (drv1->protocol_name &&
353 !strcmp(drv1->protocol_name, protocol)) {
354 return drv1;
357 return NULL;
360 static int find_image_format(const char *filename, BlockDriver **pdrv)
362 int ret, score, score_max;
363 BlockDriver *drv1, *drv;
364 uint8_t buf[2048];
365 BlockDriverState *bs;
367 ret = bdrv_file_open(&bs, filename, 0);
368 if (ret < 0) {
369 *pdrv = NULL;
370 return ret;
373 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
374 if (bs->sg || !bdrv_is_inserted(bs)) {
375 bdrv_delete(bs);
376 drv = bdrv_find_format("raw");
377 if (!drv) {
378 ret = -ENOENT;
380 *pdrv = drv;
381 return ret;
384 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
385 bdrv_delete(bs);
386 if (ret < 0) {
387 *pdrv = NULL;
388 return ret;
391 score_max = 0;
392 drv = NULL;
393 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
394 if (drv1->bdrv_probe) {
395 score = drv1->bdrv_probe(buf, ret, filename);
396 if (score > score_max) {
397 score_max = score;
398 drv = drv1;
402 if (!drv) {
403 ret = -ENOENT;
405 *pdrv = drv;
406 return ret;
410 * Set the current 'total_sectors' value
412 static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
414 BlockDriver *drv = bs->drv;
416 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
417 if (bs->sg)
418 return 0;
420 /* query actual device if possible, otherwise just trust the hint */
421 if (drv->bdrv_getlength) {
422 int64_t length = drv->bdrv_getlength(bs);
423 if (length < 0) {
424 return length;
426 hint = length >> BDRV_SECTOR_BITS;
429 bs->total_sectors = hint;
430 return 0;
434 * Set open flags for a given cache mode
436 * Return 0 on success, -1 if the cache mode was invalid.
438 int bdrv_parse_cache_flags(const char *mode, int *flags)
440 *flags &= ~BDRV_O_CACHE_MASK;
442 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
443 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
444 } else if (!strcmp(mode, "directsync")) {
445 *flags |= BDRV_O_NOCACHE;
446 } else if (!strcmp(mode, "writeback")) {
447 *flags |= BDRV_O_CACHE_WB;
448 } else if (!strcmp(mode, "unsafe")) {
449 *flags |= BDRV_O_CACHE_WB;
450 *flags |= BDRV_O_NO_FLUSH;
451 } else if (!strcmp(mode, "writethrough")) {
452 /* this is the default */
453 } else {
454 return -1;
457 return 0;
461 * Common part for opening disk images and files
463 static int bdrv_open_common(BlockDriverState *bs, const char *filename,
464 int flags, BlockDriver *drv)
466 int ret, open_flags;
468 assert(drv != NULL);
470 trace_bdrv_open_common(bs, filename, flags, drv->format_name);
472 bs->file = NULL;
473 bs->total_sectors = 0;
474 bs->encrypted = 0;
475 bs->valid_key = 0;
476 bs->sg = 0;
477 bs->open_flags = flags;
478 bs->growable = 0;
479 bs->buffer_alignment = 512;
481 pstrcpy(bs->filename, sizeof(bs->filename), filename);
482 bs->backing_file[0] = '\0';
484 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) {
485 return -ENOTSUP;
488 bs->drv = drv;
489 bs->opaque = g_malloc0(drv->instance_size);
491 bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
494 * Clear flags that are internal to the block layer before opening the
495 * image.
497 open_flags = flags & ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
500 * Snapshots should be writable.
502 if (bs->is_temporary) {
503 open_flags |= BDRV_O_RDWR;
506 bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR);
508 /* Open the image, either directly or using a protocol */
509 if (drv->bdrv_file_open) {
510 ret = drv->bdrv_file_open(bs, filename, open_flags);
511 } else {
512 ret = bdrv_file_open(&bs->file, filename, open_flags);
513 if (ret >= 0) {
514 ret = drv->bdrv_open(bs, open_flags);
518 if (ret < 0) {
519 goto free_and_fail;
522 ret = refresh_total_sectors(bs, bs->total_sectors);
523 if (ret < 0) {
524 goto free_and_fail;
527 #ifndef _WIN32
528 if (bs->is_temporary) {
529 unlink(filename);
531 #endif
532 return 0;
534 free_and_fail:
535 if (bs->file) {
536 bdrv_delete(bs->file);
537 bs->file = NULL;
539 g_free(bs->opaque);
540 bs->opaque = NULL;
541 bs->drv = NULL;
542 return ret;
546 * Opens a file using a protocol (file, host_device, nbd, ...)
548 int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
550 BlockDriverState *bs;
551 BlockDriver *drv;
552 int ret;
554 drv = bdrv_find_protocol(filename);
555 if (!drv) {
556 return -ENOENT;
559 bs = bdrv_new("");
560 ret = bdrv_open_common(bs, filename, flags, drv);
561 if (ret < 0) {
562 bdrv_delete(bs);
563 return ret;
565 bs->growable = 1;
566 *pbs = bs;
567 return 0;
571 * Opens a disk image (raw, qcow2, vmdk, ...)
573 int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
574 BlockDriver *drv)
576 int ret;
577 char tmp_filename[PATH_MAX];
579 if (flags & BDRV_O_SNAPSHOT) {
580 BlockDriverState *bs1;
581 int64_t total_size;
582 int is_protocol = 0;
583 BlockDriver *bdrv_qcow2;
584 QEMUOptionParameter *options;
585 char backing_filename[PATH_MAX];
587 /* if snapshot, we create a temporary backing file and open it
588 instead of opening 'filename' directly */
590 /* if there is a backing file, use it */
591 bs1 = bdrv_new("");
592 ret = bdrv_open(bs1, filename, 0, drv);
593 if (ret < 0) {
594 bdrv_delete(bs1);
595 return ret;
597 total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
599 if (bs1->drv && bs1->drv->protocol_name)
600 is_protocol = 1;
602 bdrv_delete(bs1);
604 get_tmp_filename(tmp_filename, sizeof(tmp_filename));
606 /* Real path is meaningless for protocols */
607 if (is_protocol)
608 snprintf(backing_filename, sizeof(backing_filename),
609 "%s", filename);
610 else if (!realpath(filename, backing_filename))
611 return -errno;
613 bdrv_qcow2 = bdrv_find_format("qcow2");
614 options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
616 set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size);
617 set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
618 if (drv) {
619 set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
620 drv->format_name);
623 ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
624 free_option_parameters(options);
625 if (ret < 0) {
626 return ret;
629 filename = tmp_filename;
630 drv = bdrv_qcow2;
631 bs->is_temporary = 1;
634 /* Find the right image format driver */
635 if (!drv) {
636 ret = find_image_format(filename, &drv);
639 if (!drv) {
640 goto unlink_and_fail;
643 /* Open the image */
644 ret = bdrv_open_common(bs, filename, flags, drv);
645 if (ret < 0) {
646 goto unlink_and_fail;
649 /* If there is a backing file, use it */
650 if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') {
651 char backing_filename[PATH_MAX];
652 int back_flags;
653 BlockDriver *back_drv = NULL;
655 bs->backing_hd = bdrv_new("");
657 if (path_has_protocol(bs->backing_file)) {
658 pstrcpy(backing_filename, sizeof(backing_filename),
659 bs->backing_file);
660 } else {
661 path_combine(backing_filename, sizeof(backing_filename),
662 filename, bs->backing_file);
665 if (bs->backing_format[0] != '\0') {
666 back_drv = bdrv_find_format(bs->backing_format);
669 /* backing files always opened read-only */
670 back_flags =
671 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
673 ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv);
674 if (ret < 0) {
675 bdrv_close(bs);
676 return ret;
678 if (bs->is_temporary) {
679 bs->backing_hd->keep_read_only = !(flags & BDRV_O_RDWR);
680 } else {
681 /* base image inherits from "parent" */
682 bs->backing_hd->keep_read_only = bs->keep_read_only;
686 if (!bdrv_key_required(bs)) {
687 bdrv_dev_change_media_cb(bs, true);
690 return 0;
692 unlink_and_fail:
693 if (bs->is_temporary) {
694 unlink(filename);
696 return ret;
699 void bdrv_close(BlockDriverState *bs)
701 if (bs->drv) {
702 if (bs == bs_snapshots) {
703 bs_snapshots = NULL;
705 if (bs->backing_hd) {
706 bdrv_delete(bs->backing_hd);
707 bs->backing_hd = NULL;
709 bs->drv->bdrv_close(bs);
710 g_free(bs->opaque);
711 #ifdef _WIN32
712 if (bs->is_temporary) {
713 unlink(bs->filename);
715 #endif
716 bs->opaque = NULL;
717 bs->drv = NULL;
719 if (bs->file != NULL) {
720 bdrv_close(bs->file);
723 bdrv_dev_change_media_cb(bs, false);
727 void bdrv_close_all(void)
729 BlockDriverState *bs;
731 QTAILQ_FOREACH(bs, &bdrv_states, list) {
732 bdrv_close(bs);
736 /* make a BlockDriverState anonymous by removing from bdrv_state list.
737 Also, NULL terminate the device_name to prevent double remove */
738 void bdrv_make_anon(BlockDriverState *bs)
740 if (bs->device_name[0] != '\0') {
741 QTAILQ_REMOVE(&bdrv_states, bs, list);
743 bs->device_name[0] = '\0';
746 void bdrv_delete(BlockDriverState *bs)
748 assert(!bs->dev);
750 /* remove from list, if necessary */
751 bdrv_make_anon(bs);
753 bdrv_close(bs);
754 if (bs->file != NULL) {
755 bdrv_delete(bs->file);
758 assert(bs != bs_snapshots);
759 g_free(bs);
762 int bdrv_attach_dev(BlockDriverState *bs, void *dev)
763 /* TODO change to DeviceState *dev when all users are qdevified */
765 if (bs->dev) {
766 return -EBUSY;
768 bs->dev = dev;
769 bdrv_iostatus_reset(bs);
770 return 0;
773 /* TODO qdevified devices don't use this, remove when devices are qdevified */
774 void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
776 if (bdrv_attach_dev(bs, dev) < 0) {
777 abort();
781 void bdrv_detach_dev(BlockDriverState *bs, void *dev)
782 /* TODO change to DeviceState *dev when all users are qdevified */
784 assert(bs->dev == dev);
785 bs->dev = NULL;
786 bs->dev_ops = NULL;
787 bs->dev_opaque = NULL;
788 bs->buffer_alignment = 512;
791 /* TODO change to return DeviceState * when all users are qdevified */
792 void *bdrv_get_attached_dev(BlockDriverState *bs)
794 return bs->dev;
797 void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
798 void *opaque)
800 bs->dev_ops = ops;
801 bs->dev_opaque = opaque;
802 if (bdrv_dev_has_removable_media(bs) && bs == bs_snapshots) {
803 bs_snapshots = NULL;
807 static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load)
809 if (bs->dev_ops && bs->dev_ops->change_media_cb) {
810 bs->dev_ops->change_media_cb(bs->dev_opaque, load);
814 bool bdrv_dev_has_removable_media(BlockDriverState *bs)
816 return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb);
819 bool bdrv_dev_is_tray_open(BlockDriverState *bs)
821 if (bs->dev_ops && bs->dev_ops->is_tray_open) {
822 return bs->dev_ops->is_tray_open(bs->dev_opaque);
824 return false;
827 static void bdrv_dev_resize_cb(BlockDriverState *bs)
829 if (bs->dev_ops && bs->dev_ops->resize_cb) {
830 bs->dev_ops->resize_cb(bs->dev_opaque);
834 bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
836 if (bs->dev_ops && bs->dev_ops->is_medium_locked) {
837 return bs->dev_ops->is_medium_locked(bs->dev_opaque);
839 return false;
843 * Run consistency checks on an image
845 * Returns 0 if the check could be completed (it doesn't mean that the image is
846 * free of errors) or -errno when an internal error occurred. The results of the
847 * check are stored in res.
849 int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res)
851 if (bs->drv->bdrv_check == NULL) {
852 return -ENOTSUP;
855 memset(res, 0, sizeof(*res));
856 return bs->drv->bdrv_check(bs, res);
859 #define COMMIT_BUF_SECTORS 2048
861 /* commit COW file into the raw image */
862 int bdrv_commit(BlockDriverState *bs)
864 BlockDriver *drv = bs->drv;
865 BlockDriver *backing_drv;
866 int64_t sector, total_sectors;
867 int n, ro, open_flags;
868 int ret = 0, rw_ret = 0;
869 uint8_t *buf;
870 char filename[1024];
871 BlockDriverState *bs_rw, *bs_ro;
873 if (!drv)
874 return -ENOMEDIUM;
876 if (!bs->backing_hd) {
877 return -ENOTSUP;
880 if (bs->backing_hd->keep_read_only) {
881 return -EACCES;
884 backing_drv = bs->backing_hd->drv;
885 ro = bs->backing_hd->read_only;
886 strncpy(filename, bs->backing_hd->filename, sizeof(filename));
887 open_flags = bs->backing_hd->open_flags;
889 if (ro) {
890 /* re-open as RW */
891 bdrv_delete(bs->backing_hd);
892 bs->backing_hd = NULL;
893 bs_rw = bdrv_new("");
894 rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR,
895 backing_drv);
896 if (rw_ret < 0) {
897 bdrv_delete(bs_rw);
898 /* try to re-open read-only */
899 bs_ro = bdrv_new("");
900 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
901 backing_drv);
902 if (ret < 0) {
903 bdrv_delete(bs_ro);
904 /* drive not functional anymore */
905 bs->drv = NULL;
906 return ret;
908 bs->backing_hd = bs_ro;
909 return rw_ret;
911 bs->backing_hd = bs_rw;
914 total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
915 buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
917 for (sector = 0; sector < total_sectors; sector += n) {
918 if (drv->bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) {
920 if (bdrv_read(bs, sector, buf, n) != 0) {
921 ret = -EIO;
922 goto ro_cleanup;
925 if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) {
926 ret = -EIO;
927 goto ro_cleanup;
932 if (drv->bdrv_make_empty) {
933 ret = drv->bdrv_make_empty(bs);
934 bdrv_flush(bs);
938 * Make sure all data we wrote to the backing device is actually
939 * stable on disk.
941 if (bs->backing_hd)
942 bdrv_flush(bs->backing_hd);
944 ro_cleanup:
945 g_free(buf);
947 if (ro) {
948 /* re-open as RO */
949 bdrv_delete(bs->backing_hd);
950 bs->backing_hd = NULL;
951 bs_ro = bdrv_new("");
952 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
953 backing_drv);
954 if (ret < 0) {
955 bdrv_delete(bs_ro);
956 /* drive not functional anymore */
957 bs->drv = NULL;
958 return ret;
960 bs->backing_hd = bs_ro;
961 bs->backing_hd->keep_read_only = 0;
964 return ret;
967 void bdrv_commit_all(void)
969 BlockDriverState *bs;
971 QTAILQ_FOREACH(bs, &bdrv_states, list) {
972 bdrv_commit(bs);
977 * Return values:
978 * 0 - success
979 * -EINVAL - backing format specified, but no file
980 * -ENOSPC - can't update the backing file because no space is left in the
981 * image file header
982 * -ENOTSUP - format driver doesn't support changing the backing file
984 int bdrv_change_backing_file(BlockDriverState *bs,
985 const char *backing_file, const char *backing_fmt)
987 BlockDriver *drv = bs->drv;
989 if (drv->bdrv_change_backing_file != NULL) {
990 return drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
991 } else {
992 return -ENOTSUP;
996 static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
997 size_t size)
999 int64_t len;
1001 if (!bdrv_is_inserted(bs))
1002 return -ENOMEDIUM;
1004 if (bs->growable)
1005 return 0;
1007 len = bdrv_getlength(bs);
1009 if (offset < 0)
1010 return -EIO;
1012 if ((offset > len) || (len - offset < size))
1013 return -EIO;
1015 return 0;
1018 static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
1019 int nb_sectors)
1021 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
1022 nb_sectors * BDRV_SECTOR_SIZE);
1025 typedef struct RwCo {
1026 BlockDriverState *bs;
1027 int64_t sector_num;
1028 int nb_sectors;
1029 QEMUIOVector *qiov;
1030 bool is_write;
1031 int ret;
1032 } RwCo;
1034 static void coroutine_fn bdrv_rw_co_entry(void *opaque)
1036 RwCo *rwco = opaque;
1038 if (!rwco->is_write) {
1039 rwco->ret = bdrv_co_do_readv(rwco->bs, rwco->sector_num,
1040 rwco->nb_sectors, rwco->qiov);
1041 } else {
1042 rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num,
1043 rwco->nb_sectors, rwco->qiov);
1048 * Process a synchronous request using coroutines
1050 static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
1051 int nb_sectors, bool is_write)
1053 QEMUIOVector qiov;
1054 struct iovec iov = {
1055 .iov_base = (void *)buf,
1056 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
1058 Coroutine *co;
1059 RwCo rwco = {
1060 .bs = bs,
1061 .sector_num = sector_num,
1062 .nb_sectors = nb_sectors,
1063 .qiov = &qiov,
1064 .is_write = is_write,
1065 .ret = NOT_DONE,
1068 qemu_iovec_init_external(&qiov, &iov, 1);
1070 if (qemu_in_coroutine()) {
1071 /* Fast-path if already in coroutine context */
1072 bdrv_rw_co_entry(&rwco);
1073 } else {
1074 co = qemu_coroutine_create(bdrv_rw_co_entry);
1075 qemu_coroutine_enter(co, &rwco);
1076 while (rwco.ret == NOT_DONE) {
1077 qemu_aio_wait();
1080 return rwco.ret;
1083 /* return < 0 if error. See bdrv_write() for the return codes */
1084 int bdrv_read(BlockDriverState *bs, int64_t sector_num,
1085 uint8_t *buf, int nb_sectors)
1087 return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false);
1090 static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
1091 int nb_sectors, int dirty)
1093 int64_t start, end;
1094 unsigned long val, idx, bit;
1096 start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
1097 end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
1099 for (; start <= end; start++) {
1100 idx = start / (sizeof(unsigned long) * 8);
1101 bit = start % (sizeof(unsigned long) * 8);
1102 val = bs->dirty_bitmap[idx];
1103 if (dirty) {
1104 if (!(val & (1UL << bit))) {
1105 bs->dirty_count++;
1106 val |= 1UL << bit;
1108 } else {
1109 if (val & (1UL << bit)) {
1110 bs->dirty_count--;
1111 val &= ~(1UL << bit);
1114 bs->dirty_bitmap[idx] = val;
1118 /* Return < 0 if error. Important errors are:
1119 -EIO generic I/O error (may happen for all errors)
1120 -ENOMEDIUM No media inserted.
1121 -EINVAL Invalid sector number or nb_sectors
1122 -EACCES Trying to write a read-only device
1124 int bdrv_write(BlockDriverState *bs, int64_t sector_num,
1125 const uint8_t *buf, int nb_sectors)
1127 return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true);
1130 int bdrv_pread(BlockDriverState *bs, int64_t offset,
1131 void *buf, int count1)
1133 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
1134 int len, nb_sectors, count;
1135 int64_t sector_num;
1136 int ret;
1138 count = count1;
1139 /* first read to align to sector start */
1140 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
1141 if (len > count)
1142 len = count;
1143 sector_num = offset >> BDRV_SECTOR_BITS;
1144 if (len > 0) {
1145 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1146 return ret;
1147 memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
1148 count -= len;
1149 if (count == 0)
1150 return count1;
1151 sector_num++;
1152 buf += len;
1155 /* read the sectors "in place" */
1156 nb_sectors = count >> BDRV_SECTOR_BITS;
1157 if (nb_sectors > 0) {
1158 if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
1159 return ret;
1160 sector_num += nb_sectors;
1161 len = nb_sectors << BDRV_SECTOR_BITS;
1162 buf += len;
1163 count -= len;
1166 /* add data from the last sector */
1167 if (count > 0) {
1168 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1169 return ret;
1170 memcpy(buf, tmp_buf, count);
1172 return count1;
1175 int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
1176 const void *buf, int count1)
1178 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
1179 int len, nb_sectors, count;
1180 int64_t sector_num;
1181 int ret;
1183 count = count1;
1184 /* first write to align to sector start */
1185 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
1186 if (len > count)
1187 len = count;
1188 sector_num = offset >> BDRV_SECTOR_BITS;
1189 if (len > 0) {
1190 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1191 return ret;
1192 memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len);
1193 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1194 return ret;
1195 count -= len;
1196 if (count == 0)
1197 return count1;
1198 sector_num++;
1199 buf += len;
1202 /* write the sectors "in place" */
1203 nb_sectors = count >> BDRV_SECTOR_BITS;
1204 if (nb_sectors > 0) {
1205 if ((ret = bdrv_write(bs, sector_num, buf, nb_sectors)) < 0)
1206 return ret;
1207 sector_num += nb_sectors;
1208 len = nb_sectors << BDRV_SECTOR_BITS;
1209 buf += len;
1210 count -= len;
1213 /* add data from the last sector */
1214 if (count > 0) {
1215 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1216 return ret;
1217 memcpy(tmp_buf, buf, count);
1218 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1219 return ret;
1221 return count1;
1225 * Writes to the file and ensures that no writes are reordered across this
1226 * request (acts as a barrier)
1228 * Returns 0 on success, -errno in error cases.
1230 int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
1231 const void *buf, int count)
1233 int ret;
1235 ret = bdrv_pwrite(bs, offset, buf, count);
1236 if (ret < 0) {
1237 return ret;
1240 /* No flush needed for cache modes that use O_DSYNC */
1241 if ((bs->open_flags & BDRV_O_CACHE_WB) != 0) {
1242 bdrv_flush(bs);
1245 return 0;
1249 * Handle a read request in coroutine context
1251 static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
1252 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1254 BlockDriver *drv = bs->drv;
1256 if (!drv) {
1257 return -ENOMEDIUM;
1259 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1260 return -EIO;
1263 return drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
1266 int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
1267 int nb_sectors, QEMUIOVector *qiov)
1269 trace_bdrv_co_readv(bs, sector_num, nb_sectors);
1271 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov);
1275 * Handle a write request in coroutine context
1277 static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
1278 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1280 BlockDriver *drv = bs->drv;
1281 int ret;
1283 if (!bs->drv) {
1284 return -ENOMEDIUM;
1286 if (bs->read_only) {
1287 return -EACCES;
1289 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1290 return -EIO;
1293 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
1295 if (bs->dirty_bitmap) {
1296 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1299 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
1300 bs->wr_highest_sector = sector_num + nb_sectors - 1;
1303 return ret;
1306 int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
1307 int nb_sectors, QEMUIOVector *qiov)
1309 trace_bdrv_co_writev(bs, sector_num, nb_sectors);
1311 return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov);
1315 * Truncate file to 'offset' bytes (needed only for file protocols)
1317 int bdrv_truncate(BlockDriverState *bs, int64_t offset)
1319 BlockDriver *drv = bs->drv;
1320 int ret;
1321 if (!drv)
1322 return -ENOMEDIUM;
1323 if (!drv->bdrv_truncate)
1324 return -ENOTSUP;
1325 if (bs->read_only)
1326 return -EACCES;
1327 if (bdrv_in_use(bs))
1328 return -EBUSY;
1329 ret = drv->bdrv_truncate(bs, offset);
1330 if (ret == 0) {
1331 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
1332 bdrv_dev_resize_cb(bs);
1334 return ret;
1338 * Length of a allocated file in bytes. Sparse files are counted by actual
1339 * allocated space. Return < 0 if error or unknown.
1341 int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
1343 BlockDriver *drv = bs->drv;
1344 if (!drv) {
1345 return -ENOMEDIUM;
1347 if (drv->bdrv_get_allocated_file_size) {
1348 return drv->bdrv_get_allocated_file_size(bs);
1350 if (bs->file) {
1351 return bdrv_get_allocated_file_size(bs->file);
1353 return -ENOTSUP;
1357 * Length of a file in bytes. Return < 0 if error or unknown.
1359 int64_t bdrv_getlength(BlockDriverState *bs)
1361 BlockDriver *drv = bs->drv;
1362 if (!drv)
1363 return -ENOMEDIUM;
1365 if (bs->growable || bdrv_dev_has_removable_media(bs)) {
1366 if (drv->bdrv_getlength) {
1367 return drv->bdrv_getlength(bs);
1370 return bs->total_sectors * BDRV_SECTOR_SIZE;
1373 /* return 0 as number of sectors if no device present or error */
1374 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
1376 int64_t length;
1377 length = bdrv_getlength(bs);
1378 if (length < 0)
1379 length = 0;
1380 else
1381 length = length >> BDRV_SECTOR_BITS;
1382 *nb_sectors_ptr = length;
1385 struct partition {
1386 uint8_t boot_ind; /* 0x80 - active */
1387 uint8_t head; /* starting head */
1388 uint8_t sector; /* starting sector */
1389 uint8_t cyl; /* starting cylinder */
1390 uint8_t sys_ind; /* What partition type */
1391 uint8_t end_head; /* end head */
1392 uint8_t end_sector; /* end sector */
1393 uint8_t end_cyl; /* end cylinder */
1394 uint32_t start_sect; /* starting sector counting from 0 */
1395 uint32_t nr_sects; /* nr of sectors in partition */
1396 } QEMU_PACKED;
1398 /* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
1399 static int guess_disk_lchs(BlockDriverState *bs,
1400 int *pcylinders, int *pheads, int *psectors)
1402 uint8_t buf[BDRV_SECTOR_SIZE];
1403 int ret, i, heads, sectors, cylinders;
1404 struct partition *p;
1405 uint32_t nr_sects;
1406 uint64_t nb_sectors;
1408 bdrv_get_geometry(bs, &nb_sectors);
1410 ret = bdrv_read(bs, 0, buf, 1);
1411 if (ret < 0)
1412 return -1;
1413 /* test msdos magic */
1414 if (buf[510] != 0x55 || buf[511] != 0xaa)
1415 return -1;
1416 for(i = 0; i < 4; i++) {
1417 p = ((struct partition *)(buf + 0x1be)) + i;
1418 nr_sects = le32_to_cpu(p->nr_sects);
1419 if (nr_sects && p->end_head) {
1420 /* We make the assumption that the partition terminates on
1421 a cylinder boundary */
1422 heads = p->end_head + 1;
1423 sectors = p->end_sector & 63;
1424 if (sectors == 0)
1425 continue;
1426 cylinders = nb_sectors / (heads * sectors);
1427 if (cylinders < 1 || cylinders > 16383)
1428 continue;
1429 *pheads = heads;
1430 *psectors = sectors;
1431 *pcylinders = cylinders;
1432 #if 0
1433 printf("guessed geometry: LCHS=%d %d %d\n",
1434 cylinders, heads, sectors);
1435 #endif
1436 return 0;
1439 return -1;
1442 void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
1444 int translation, lba_detected = 0;
1445 int cylinders, heads, secs;
1446 uint64_t nb_sectors;
1448 /* if a geometry hint is available, use it */
1449 bdrv_get_geometry(bs, &nb_sectors);
1450 bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
1451 translation = bdrv_get_translation_hint(bs);
1452 if (cylinders != 0) {
1453 *pcyls = cylinders;
1454 *pheads = heads;
1455 *psecs = secs;
1456 } else {
1457 if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
1458 if (heads > 16) {
1459 /* if heads > 16, it means that a BIOS LBA
1460 translation was active, so the default
1461 hardware geometry is OK */
1462 lba_detected = 1;
1463 goto default_geometry;
1464 } else {
1465 *pcyls = cylinders;
1466 *pheads = heads;
1467 *psecs = secs;
1468 /* disable any translation to be in sync with
1469 the logical geometry */
1470 if (translation == BIOS_ATA_TRANSLATION_AUTO) {
1471 bdrv_set_translation_hint(bs,
1472 BIOS_ATA_TRANSLATION_NONE);
1475 } else {
1476 default_geometry:
1477 /* if no geometry, use a standard physical disk geometry */
1478 cylinders = nb_sectors / (16 * 63);
1480 if (cylinders > 16383)
1481 cylinders = 16383;
1482 else if (cylinders < 2)
1483 cylinders = 2;
1484 *pcyls = cylinders;
1485 *pheads = 16;
1486 *psecs = 63;
1487 if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
1488 if ((*pcyls * *pheads) <= 131072) {
1489 bdrv_set_translation_hint(bs,
1490 BIOS_ATA_TRANSLATION_LARGE);
1491 } else {
1492 bdrv_set_translation_hint(bs,
1493 BIOS_ATA_TRANSLATION_LBA);
1497 bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
1501 void bdrv_set_geometry_hint(BlockDriverState *bs,
1502 int cyls, int heads, int secs)
1504 bs->cyls = cyls;
1505 bs->heads = heads;
1506 bs->secs = secs;
1509 void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
1511 bs->translation = translation;
1514 void bdrv_get_geometry_hint(BlockDriverState *bs,
1515 int *pcyls, int *pheads, int *psecs)
1517 *pcyls = bs->cyls;
1518 *pheads = bs->heads;
1519 *psecs = bs->secs;
1522 /* Recognize floppy formats */
1523 typedef struct FDFormat {
1524 FDriveType drive;
1525 uint8_t last_sect;
1526 uint8_t max_track;
1527 uint8_t max_head;
1528 } FDFormat;
1530 static const FDFormat fd_formats[] = {
1531 /* First entry is default format */
1532 /* 1.44 MB 3"1/2 floppy disks */
1533 { FDRIVE_DRV_144, 18, 80, 1, },
1534 { FDRIVE_DRV_144, 20, 80, 1, },
1535 { FDRIVE_DRV_144, 21, 80, 1, },
1536 { FDRIVE_DRV_144, 21, 82, 1, },
1537 { FDRIVE_DRV_144, 21, 83, 1, },
1538 { FDRIVE_DRV_144, 22, 80, 1, },
1539 { FDRIVE_DRV_144, 23, 80, 1, },
1540 { FDRIVE_DRV_144, 24, 80, 1, },
1541 /* 2.88 MB 3"1/2 floppy disks */
1542 { FDRIVE_DRV_288, 36, 80, 1, },
1543 { FDRIVE_DRV_288, 39, 80, 1, },
1544 { FDRIVE_DRV_288, 40, 80, 1, },
1545 { FDRIVE_DRV_288, 44, 80, 1, },
1546 { FDRIVE_DRV_288, 48, 80, 1, },
1547 /* 720 kB 3"1/2 floppy disks */
1548 { FDRIVE_DRV_144, 9, 80, 1, },
1549 { FDRIVE_DRV_144, 10, 80, 1, },
1550 { FDRIVE_DRV_144, 10, 82, 1, },
1551 { FDRIVE_DRV_144, 10, 83, 1, },
1552 { FDRIVE_DRV_144, 13, 80, 1, },
1553 { FDRIVE_DRV_144, 14, 80, 1, },
1554 /* 1.2 MB 5"1/4 floppy disks */
1555 { FDRIVE_DRV_120, 15, 80, 1, },
1556 { FDRIVE_DRV_120, 18, 80, 1, },
1557 { FDRIVE_DRV_120, 18, 82, 1, },
1558 { FDRIVE_DRV_120, 18, 83, 1, },
1559 { FDRIVE_DRV_120, 20, 80, 1, },
1560 /* 720 kB 5"1/4 floppy disks */
1561 { FDRIVE_DRV_120, 9, 80, 1, },
1562 { FDRIVE_DRV_120, 11, 80, 1, },
1563 /* 360 kB 5"1/4 floppy disks */
1564 { FDRIVE_DRV_120, 9, 40, 1, },
1565 { FDRIVE_DRV_120, 9, 40, 0, },
1566 { FDRIVE_DRV_120, 10, 41, 1, },
1567 { FDRIVE_DRV_120, 10, 42, 1, },
1568 /* 320 kB 5"1/4 floppy disks */
1569 { FDRIVE_DRV_120, 8, 40, 1, },
1570 { FDRIVE_DRV_120, 8, 40, 0, },
1571 /* 360 kB must match 5"1/4 better than 3"1/2... */
1572 { FDRIVE_DRV_144, 9, 80, 0, },
1573 /* end */
1574 { FDRIVE_DRV_NONE, -1, -1, 0, },
1577 void bdrv_get_floppy_geometry_hint(BlockDriverState *bs, int *nb_heads,
1578 int *max_track, int *last_sect,
1579 FDriveType drive_in, FDriveType *drive)
1581 const FDFormat *parse;
1582 uint64_t nb_sectors, size;
1583 int i, first_match, match;
1585 bdrv_get_geometry_hint(bs, nb_heads, max_track, last_sect);
1586 if (*nb_heads != 0 && *max_track != 0 && *last_sect != 0) {
1587 /* User defined disk */
1588 } else {
1589 bdrv_get_geometry(bs, &nb_sectors);
1590 match = -1;
1591 first_match = -1;
1592 for (i = 0; ; i++) {
1593 parse = &fd_formats[i];
1594 if (parse->drive == FDRIVE_DRV_NONE) {
1595 break;
1597 if (drive_in == parse->drive ||
1598 drive_in == FDRIVE_DRV_NONE) {
1599 size = (parse->max_head + 1) * parse->max_track *
1600 parse->last_sect;
1601 if (nb_sectors == size) {
1602 match = i;
1603 break;
1605 if (first_match == -1) {
1606 first_match = i;
1610 if (match == -1) {
1611 if (first_match == -1) {
1612 match = 1;
1613 } else {
1614 match = first_match;
1616 parse = &fd_formats[match];
1618 *nb_heads = parse->max_head + 1;
1619 *max_track = parse->max_track;
1620 *last_sect = parse->last_sect;
1621 *drive = parse->drive;
1625 int bdrv_get_translation_hint(BlockDriverState *bs)
1627 return bs->translation;
1630 void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error,
1631 BlockErrorAction on_write_error)
1633 bs->on_read_error = on_read_error;
1634 bs->on_write_error = on_write_error;
1637 BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read)
1639 return is_read ? bs->on_read_error : bs->on_write_error;
1642 int bdrv_is_read_only(BlockDriverState *bs)
1644 return bs->read_only;
1647 int bdrv_is_sg(BlockDriverState *bs)
1649 return bs->sg;
1652 int bdrv_enable_write_cache(BlockDriverState *bs)
1654 return bs->enable_write_cache;
1657 int bdrv_is_encrypted(BlockDriverState *bs)
1659 if (bs->backing_hd && bs->backing_hd->encrypted)
1660 return 1;
1661 return bs->encrypted;
1664 int bdrv_key_required(BlockDriverState *bs)
1666 BlockDriverState *backing_hd = bs->backing_hd;
1668 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
1669 return 1;
1670 return (bs->encrypted && !bs->valid_key);
1673 int bdrv_set_key(BlockDriverState *bs, const char *key)
1675 int ret;
1676 if (bs->backing_hd && bs->backing_hd->encrypted) {
1677 ret = bdrv_set_key(bs->backing_hd, key);
1678 if (ret < 0)
1679 return ret;
1680 if (!bs->encrypted)
1681 return 0;
1683 if (!bs->encrypted) {
1684 return -EINVAL;
1685 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
1686 return -ENOMEDIUM;
1688 ret = bs->drv->bdrv_set_key(bs, key);
1689 if (ret < 0) {
1690 bs->valid_key = 0;
1691 } else if (!bs->valid_key) {
1692 bs->valid_key = 1;
1693 /* call the change callback now, we skipped it on open */
1694 bdrv_dev_change_media_cb(bs, true);
1696 return ret;
1699 void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
1701 if (!bs->drv) {
1702 buf[0] = '\0';
1703 } else {
1704 pstrcpy(buf, buf_size, bs->drv->format_name);
1708 void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
1709 void *opaque)
1711 BlockDriver *drv;
1713 QLIST_FOREACH(drv, &bdrv_drivers, list) {
1714 it(opaque, drv->format_name);
1718 BlockDriverState *bdrv_find(const char *name)
1720 BlockDriverState *bs;
1722 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1723 if (!strcmp(name, bs->device_name)) {
1724 return bs;
1727 return NULL;
1730 BlockDriverState *bdrv_next(BlockDriverState *bs)
1732 if (!bs) {
1733 return QTAILQ_FIRST(&bdrv_states);
1735 return QTAILQ_NEXT(bs, list);
1738 void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
1740 BlockDriverState *bs;
1742 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1743 it(opaque, bs);
1747 const char *bdrv_get_device_name(BlockDriverState *bs)
1749 return bs->device_name;
1752 void bdrv_flush_all(void)
1754 BlockDriverState *bs;
1756 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1757 if (!bdrv_is_read_only(bs) && bdrv_is_inserted(bs)) {
1758 bdrv_flush(bs);
1763 int bdrv_has_zero_init(BlockDriverState *bs)
1765 assert(bs->drv);
1767 if (bs->drv->bdrv_has_zero_init) {
1768 return bs->drv->bdrv_has_zero_init(bs);
1771 return 1;
1775 * Returns true iff the specified sector is present in the disk image. Drivers
1776 * not implementing the functionality are assumed to not support backing files,
1777 * hence all their sectors are reported as allocated.
1779 * 'pnum' is set to the number of sectors (including and immediately following
1780 * the specified sector) that are known to be in the same
1781 * allocated/unallocated state.
1783 * 'nb_sectors' is the max value 'pnum' should be set to.
1785 int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
1786 int *pnum)
1788 int64_t n;
1789 if (!bs->drv->bdrv_is_allocated) {
1790 if (sector_num >= bs->total_sectors) {
1791 *pnum = 0;
1792 return 0;
1794 n = bs->total_sectors - sector_num;
1795 *pnum = (n < nb_sectors) ? (n) : (nb_sectors);
1796 return 1;
1798 return bs->drv->bdrv_is_allocated(bs, sector_num, nb_sectors, pnum);
1801 void bdrv_mon_event(const BlockDriverState *bdrv,
1802 BlockMonEventAction action, int is_read)
1804 QObject *data;
1805 const char *action_str;
1807 switch (action) {
1808 case BDRV_ACTION_REPORT:
1809 action_str = "report";
1810 break;
1811 case BDRV_ACTION_IGNORE:
1812 action_str = "ignore";
1813 break;
1814 case BDRV_ACTION_STOP:
1815 action_str = "stop";
1816 break;
1817 default:
1818 abort();
1821 data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
1822 bdrv->device_name,
1823 action_str,
1824 is_read ? "read" : "write");
1825 monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data);
1827 qobject_decref(data);
1830 BlockInfoList *qmp_query_block(Error **errp)
1832 BlockInfoList *head = NULL, *cur_item = NULL;
1833 BlockDriverState *bs;
1835 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1836 BlockInfoList *info = g_malloc0(sizeof(*info));
1838 info->value = g_malloc0(sizeof(*info->value));
1839 info->value->device = g_strdup(bs->device_name);
1840 info->value->type = g_strdup("unknown");
1841 info->value->locked = bdrv_dev_is_medium_locked(bs);
1842 info->value->removable = bdrv_dev_has_removable_media(bs);
1844 if (bdrv_dev_has_removable_media(bs)) {
1845 info->value->has_tray_open = true;
1846 info->value->tray_open = bdrv_dev_is_tray_open(bs);
1849 if (bdrv_iostatus_is_enabled(bs)) {
1850 info->value->has_io_status = true;
1851 info->value->io_status = bs->iostatus;
1854 if (bs->drv) {
1855 info->value->has_inserted = true;
1856 info->value->inserted = g_malloc0(sizeof(*info->value->inserted));
1857 info->value->inserted->file = g_strdup(bs->filename);
1858 info->value->inserted->ro = bs->read_only;
1859 info->value->inserted->drv = g_strdup(bs->drv->format_name);
1860 info->value->inserted->encrypted = bs->encrypted;
1861 if (bs->backing_file[0]) {
1862 info->value->inserted->has_backing_file = true;
1863 info->value->inserted->backing_file = g_strdup(bs->backing_file);
1867 /* XXX: waiting for the qapi to support GSList */
1868 if (!cur_item) {
1869 head = cur_item = info;
1870 } else {
1871 cur_item->next = info;
1872 cur_item = info;
1876 return head;
1879 /* Consider exposing this as a full fledged QMP command */
1880 static BlockStats *qmp_query_blockstat(const BlockDriverState *bs, Error **errp)
1882 BlockStats *s;
1884 s = g_malloc0(sizeof(*s));
1886 if (bs->device_name[0]) {
1887 s->has_device = true;
1888 s->device = g_strdup(bs->device_name);
1891 s->stats = g_malloc0(sizeof(*s->stats));
1892 s->stats->rd_bytes = bs->nr_bytes[BDRV_ACCT_READ];
1893 s->stats->wr_bytes = bs->nr_bytes[BDRV_ACCT_WRITE];
1894 s->stats->rd_operations = bs->nr_ops[BDRV_ACCT_READ];
1895 s->stats->wr_operations = bs->nr_ops[BDRV_ACCT_WRITE];
1896 s->stats->wr_highest_offset = bs->wr_highest_sector * BDRV_SECTOR_SIZE;
1897 s->stats->flush_operations = bs->nr_ops[BDRV_ACCT_FLUSH];
1898 s->stats->wr_total_time_ns = bs->total_time_ns[BDRV_ACCT_WRITE];
1899 s->stats->rd_total_time_ns = bs->total_time_ns[BDRV_ACCT_READ];
1900 s->stats->flush_total_time_ns = bs->total_time_ns[BDRV_ACCT_FLUSH];
1902 if (bs->file) {
1903 s->has_parent = true;
1904 s->parent = qmp_query_blockstat(bs->file, NULL);
1907 return s;
1910 BlockStatsList *qmp_query_blockstats(Error **errp)
1912 BlockStatsList *head = NULL, *cur_item = NULL;
1913 BlockDriverState *bs;
1915 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1916 BlockStatsList *info = g_malloc0(sizeof(*info));
1917 info->value = qmp_query_blockstat(bs, NULL);
1919 /* XXX: waiting for the qapi to support GSList */
1920 if (!cur_item) {
1921 head = cur_item = info;
1922 } else {
1923 cur_item->next = info;
1924 cur_item = info;
1928 return head;
1931 const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
1933 if (bs->backing_hd && bs->backing_hd->encrypted)
1934 return bs->backing_file;
1935 else if (bs->encrypted)
1936 return bs->filename;
1937 else
1938 return NULL;
1941 void bdrv_get_backing_filename(BlockDriverState *bs,
1942 char *filename, int filename_size)
1944 pstrcpy(filename, filename_size, bs->backing_file);
1947 int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
1948 const uint8_t *buf, int nb_sectors)
1950 BlockDriver *drv = bs->drv;
1951 if (!drv)
1952 return -ENOMEDIUM;
1953 if (!drv->bdrv_write_compressed)
1954 return -ENOTSUP;
1955 if (bdrv_check_request(bs, sector_num, nb_sectors))
1956 return -EIO;
1958 if (bs->dirty_bitmap) {
1959 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1962 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
1965 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
1967 BlockDriver *drv = bs->drv;
1968 if (!drv)
1969 return -ENOMEDIUM;
1970 if (!drv->bdrv_get_info)
1971 return -ENOTSUP;
1972 memset(bdi, 0, sizeof(*bdi));
1973 return drv->bdrv_get_info(bs, bdi);
1976 int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
1977 int64_t pos, int size)
1979 BlockDriver *drv = bs->drv;
1980 if (!drv)
1981 return -ENOMEDIUM;
1982 if (drv->bdrv_save_vmstate)
1983 return drv->bdrv_save_vmstate(bs, buf, pos, size);
1984 if (bs->file)
1985 return bdrv_save_vmstate(bs->file, buf, pos, size);
1986 return -ENOTSUP;
1989 int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
1990 int64_t pos, int size)
1992 BlockDriver *drv = bs->drv;
1993 if (!drv)
1994 return -ENOMEDIUM;
1995 if (drv->bdrv_load_vmstate)
1996 return drv->bdrv_load_vmstate(bs, buf, pos, size);
1997 if (bs->file)
1998 return bdrv_load_vmstate(bs->file, buf, pos, size);
1999 return -ENOTSUP;
2002 void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
2004 BlockDriver *drv = bs->drv;
2006 if (!drv || !drv->bdrv_debug_event) {
2007 return;
2010 return drv->bdrv_debug_event(bs, event);
2014 /**************************************************************/
2015 /* handling of snapshots */
2017 int bdrv_can_snapshot(BlockDriverState *bs)
2019 BlockDriver *drv = bs->drv;
2020 if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
2021 return 0;
2024 if (!drv->bdrv_snapshot_create) {
2025 if (bs->file != NULL) {
2026 return bdrv_can_snapshot(bs->file);
2028 return 0;
2031 return 1;
2034 int bdrv_is_snapshot(BlockDriverState *bs)
2036 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
2039 BlockDriverState *bdrv_snapshots(void)
2041 BlockDriverState *bs;
2043 if (bs_snapshots) {
2044 return bs_snapshots;
2047 bs = NULL;
2048 while ((bs = bdrv_next(bs))) {
2049 if (bdrv_can_snapshot(bs)) {
2050 bs_snapshots = bs;
2051 return bs;
2054 return NULL;
2057 int bdrv_snapshot_create(BlockDriverState *bs,
2058 QEMUSnapshotInfo *sn_info)
2060 BlockDriver *drv = bs->drv;
2061 if (!drv)
2062 return -ENOMEDIUM;
2063 if (drv->bdrv_snapshot_create)
2064 return drv->bdrv_snapshot_create(bs, sn_info);
2065 if (bs->file)
2066 return bdrv_snapshot_create(bs->file, sn_info);
2067 return -ENOTSUP;
2070 int bdrv_snapshot_goto(BlockDriverState *bs,
2071 const char *snapshot_id)
2073 BlockDriver *drv = bs->drv;
2074 int ret, open_ret;
2076 if (!drv)
2077 return -ENOMEDIUM;
2078 if (drv->bdrv_snapshot_goto)
2079 return drv->bdrv_snapshot_goto(bs, snapshot_id);
2081 if (bs->file) {
2082 drv->bdrv_close(bs);
2083 ret = bdrv_snapshot_goto(bs->file, snapshot_id);
2084 open_ret = drv->bdrv_open(bs, bs->open_flags);
2085 if (open_ret < 0) {
2086 bdrv_delete(bs->file);
2087 bs->drv = NULL;
2088 return open_ret;
2090 return ret;
2093 return -ENOTSUP;
2096 int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
2098 BlockDriver *drv = bs->drv;
2099 if (!drv)
2100 return -ENOMEDIUM;
2101 if (drv->bdrv_snapshot_delete)
2102 return drv->bdrv_snapshot_delete(bs, snapshot_id);
2103 if (bs->file)
2104 return bdrv_snapshot_delete(bs->file, snapshot_id);
2105 return -ENOTSUP;
2108 int bdrv_snapshot_list(BlockDriverState *bs,
2109 QEMUSnapshotInfo **psn_info)
2111 BlockDriver *drv = bs->drv;
2112 if (!drv)
2113 return -ENOMEDIUM;
2114 if (drv->bdrv_snapshot_list)
2115 return drv->bdrv_snapshot_list(bs, psn_info);
2116 if (bs->file)
2117 return bdrv_snapshot_list(bs->file, psn_info);
2118 return -ENOTSUP;
2121 int bdrv_snapshot_load_tmp(BlockDriverState *bs,
2122 const char *snapshot_name)
2124 BlockDriver *drv = bs->drv;
2125 if (!drv) {
2126 return -ENOMEDIUM;
2128 if (!bs->read_only) {
2129 return -EINVAL;
2131 if (drv->bdrv_snapshot_load_tmp) {
2132 return drv->bdrv_snapshot_load_tmp(bs, snapshot_name);
2134 return -ENOTSUP;
2137 #define NB_SUFFIXES 4
2139 char *get_human_readable_size(char *buf, int buf_size, int64_t size)
2141 static const char suffixes[NB_SUFFIXES] = "KMGT";
2142 int64_t base;
2143 int i;
2145 if (size <= 999) {
2146 snprintf(buf, buf_size, "%" PRId64, size);
2147 } else {
2148 base = 1024;
2149 for(i = 0; i < NB_SUFFIXES; i++) {
2150 if (size < (10 * base)) {
2151 snprintf(buf, buf_size, "%0.1f%c",
2152 (double)size / base,
2153 suffixes[i]);
2154 break;
2155 } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
2156 snprintf(buf, buf_size, "%" PRId64 "%c",
2157 ((size + (base >> 1)) / base),
2158 suffixes[i]);
2159 break;
2161 base = base * 1024;
2164 return buf;
2167 char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
2169 char buf1[128], date_buf[128], clock_buf[128];
2170 #ifdef _WIN32
2171 struct tm *ptm;
2172 #else
2173 struct tm tm;
2174 #endif
2175 time_t ti;
2176 int64_t secs;
2178 if (!sn) {
2179 snprintf(buf, buf_size,
2180 "%-10s%-20s%7s%20s%15s",
2181 "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
2182 } else {
2183 ti = sn->date_sec;
2184 #ifdef _WIN32
2185 ptm = localtime(&ti);
2186 strftime(date_buf, sizeof(date_buf),
2187 "%Y-%m-%d %H:%M:%S", ptm);
2188 #else
2189 localtime_r(&ti, &tm);
2190 strftime(date_buf, sizeof(date_buf),
2191 "%Y-%m-%d %H:%M:%S", &tm);
2192 #endif
2193 secs = sn->vm_clock_nsec / 1000000000;
2194 snprintf(clock_buf, sizeof(clock_buf),
2195 "%02d:%02d:%02d.%03d",
2196 (int)(secs / 3600),
2197 (int)((secs / 60) % 60),
2198 (int)(secs % 60),
2199 (int)((sn->vm_clock_nsec / 1000000) % 1000));
2200 snprintf(buf, buf_size,
2201 "%-10s%-20s%7s%20s%15s",
2202 sn->id_str, sn->name,
2203 get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
2204 date_buf,
2205 clock_buf);
2207 return buf;
2210 /**************************************************************/
2211 /* async I/Os */
2213 BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
2214 QEMUIOVector *qiov, int nb_sectors,
2215 BlockDriverCompletionFunc *cb, void *opaque)
2217 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
2219 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
2220 cb, opaque, false);
2223 BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
2224 QEMUIOVector *qiov, int nb_sectors,
2225 BlockDriverCompletionFunc *cb, void *opaque)
2227 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
2229 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
2230 cb, opaque, true);
2234 typedef struct MultiwriteCB {
2235 int error;
2236 int num_requests;
2237 int num_callbacks;
2238 struct {
2239 BlockDriverCompletionFunc *cb;
2240 void *opaque;
2241 QEMUIOVector *free_qiov;
2242 void *free_buf;
2243 } callbacks[];
2244 } MultiwriteCB;
2246 static void multiwrite_user_cb(MultiwriteCB *mcb)
2248 int i;
2250 for (i = 0; i < mcb->num_callbacks; i++) {
2251 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
2252 if (mcb->callbacks[i].free_qiov) {
2253 qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
2255 g_free(mcb->callbacks[i].free_qiov);
2256 qemu_vfree(mcb->callbacks[i].free_buf);
2260 static void multiwrite_cb(void *opaque, int ret)
2262 MultiwriteCB *mcb = opaque;
2264 trace_multiwrite_cb(mcb, ret);
2266 if (ret < 0 && !mcb->error) {
2267 mcb->error = ret;
2270 mcb->num_requests--;
2271 if (mcb->num_requests == 0) {
2272 multiwrite_user_cb(mcb);
2273 g_free(mcb);
2277 static int multiwrite_req_compare(const void *a, const void *b)
2279 const BlockRequest *req1 = a, *req2 = b;
2282 * Note that we can't simply subtract req2->sector from req1->sector
2283 * here as that could overflow the return value.
2285 if (req1->sector > req2->sector) {
2286 return 1;
2287 } else if (req1->sector < req2->sector) {
2288 return -1;
2289 } else {
2290 return 0;
2295 * Takes a bunch of requests and tries to merge them. Returns the number of
2296 * requests that remain after merging.
2298 static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
2299 int num_reqs, MultiwriteCB *mcb)
2301 int i, outidx;
2303 // Sort requests by start sector
2304 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
2306 // Check if adjacent requests touch the same clusters. If so, combine them,
2307 // filling up gaps with zero sectors.
2308 outidx = 0;
2309 for (i = 1; i < num_reqs; i++) {
2310 int merge = 0;
2311 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
2313 // This handles the cases that are valid for all block drivers, namely
2314 // exactly sequential writes and overlapping writes.
2315 if (reqs[i].sector <= oldreq_last) {
2316 merge = 1;
2319 // The block driver may decide that it makes sense to combine requests
2320 // even if there is a gap of some sectors between them. In this case,
2321 // the gap is filled with zeros (therefore only applicable for yet
2322 // unused space in format like qcow2).
2323 if (!merge && bs->drv->bdrv_merge_requests) {
2324 merge = bs->drv->bdrv_merge_requests(bs, &reqs[outidx], &reqs[i]);
2327 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
2328 merge = 0;
2331 if (merge) {
2332 size_t size;
2333 QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
2334 qemu_iovec_init(qiov,
2335 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
2337 // Add the first request to the merged one. If the requests are
2338 // overlapping, drop the last sectors of the first request.
2339 size = (reqs[i].sector - reqs[outidx].sector) << 9;
2340 qemu_iovec_concat(qiov, reqs[outidx].qiov, size);
2342 // We might need to add some zeros between the two requests
2343 if (reqs[i].sector > oldreq_last) {
2344 size_t zero_bytes = (reqs[i].sector - oldreq_last) << 9;
2345 uint8_t *buf = qemu_blockalign(bs, zero_bytes);
2346 memset(buf, 0, zero_bytes);
2347 qemu_iovec_add(qiov, buf, zero_bytes);
2348 mcb->callbacks[i].free_buf = buf;
2351 // Add the second request
2352 qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size);
2354 reqs[outidx].nb_sectors = qiov->size >> 9;
2355 reqs[outidx].qiov = qiov;
2357 mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
2358 } else {
2359 outidx++;
2360 reqs[outidx].sector = reqs[i].sector;
2361 reqs[outidx].nb_sectors = reqs[i].nb_sectors;
2362 reqs[outidx].qiov = reqs[i].qiov;
2366 return outidx + 1;
2370 * Submit multiple AIO write requests at once.
2372 * On success, the function returns 0 and all requests in the reqs array have
2373 * been submitted. In error case this function returns -1, and any of the
2374 * requests may or may not be submitted yet. In particular, this means that the
2375 * callback will be called for some of the requests, for others it won't. The
2376 * caller must check the error field of the BlockRequest to wait for the right
2377 * callbacks (if error != 0, no callback will be called).
2379 * The implementation may modify the contents of the reqs array, e.g. to merge
2380 * requests. However, the fields opaque and error are left unmodified as they
2381 * are used to signal failure for a single request to the caller.
2383 int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
2385 BlockDriverAIOCB *acb;
2386 MultiwriteCB *mcb;
2387 int i;
2389 /* don't submit writes if we don't have a medium */
2390 if (bs->drv == NULL) {
2391 for (i = 0; i < num_reqs; i++) {
2392 reqs[i].error = -ENOMEDIUM;
2394 return -1;
2397 if (num_reqs == 0) {
2398 return 0;
2401 // Create MultiwriteCB structure
2402 mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
2403 mcb->num_requests = 0;
2404 mcb->num_callbacks = num_reqs;
2406 for (i = 0; i < num_reqs; i++) {
2407 mcb->callbacks[i].cb = reqs[i].cb;
2408 mcb->callbacks[i].opaque = reqs[i].opaque;
2411 // Check for mergable requests
2412 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
2414 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
2417 * Run the aio requests. As soon as one request can't be submitted
2418 * successfully, fail all requests that are not yet submitted (we must
2419 * return failure for all requests anyway)
2421 * num_requests cannot be set to the right value immediately: If
2422 * bdrv_aio_writev fails for some request, num_requests would be too high
2423 * and therefore multiwrite_cb() would never recognize the multiwrite
2424 * request as completed. We also cannot use the loop variable i to set it
2425 * when the first request fails because the callback may already have been
2426 * called for previously submitted requests. Thus, num_requests must be
2427 * incremented for each request that is submitted.
2429 * The problem that callbacks may be called early also means that we need
2430 * to take care that num_requests doesn't become 0 before all requests are
2431 * submitted - multiwrite_cb() would consider the multiwrite request
2432 * completed. A dummy request that is "completed" by a manual call to
2433 * multiwrite_cb() takes care of this.
2435 mcb->num_requests = 1;
2437 // Run the aio requests
2438 for (i = 0; i < num_reqs; i++) {
2439 mcb->num_requests++;
2440 acb = bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
2441 reqs[i].nb_sectors, multiwrite_cb, mcb);
2443 if (acb == NULL) {
2444 // We can only fail the whole thing if no request has been
2445 // submitted yet. Otherwise we'll wait for the submitted AIOs to
2446 // complete and report the error in the callback.
2447 if (i == 0) {
2448 trace_bdrv_aio_multiwrite_earlyfail(mcb);
2449 goto fail;
2450 } else {
2451 trace_bdrv_aio_multiwrite_latefail(mcb, i);
2452 multiwrite_cb(mcb, -EIO);
2453 break;
2458 /* Complete the dummy request */
2459 multiwrite_cb(mcb, 0);
2461 return 0;
2463 fail:
2464 for (i = 0; i < mcb->num_callbacks; i++) {
2465 reqs[i].error = -EIO;
2467 g_free(mcb);
2468 return -1;
2471 void bdrv_aio_cancel(BlockDriverAIOCB *acb)
2473 acb->pool->cancel(acb);
2477 /**************************************************************/
2478 /* async block device emulation */
2480 typedef struct BlockDriverAIOCBSync {
2481 BlockDriverAIOCB common;
2482 QEMUBH *bh;
2483 int ret;
2484 /* vector translation state */
2485 QEMUIOVector *qiov;
2486 uint8_t *bounce;
2487 int is_write;
2488 } BlockDriverAIOCBSync;
2490 static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
2492 BlockDriverAIOCBSync *acb =
2493 container_of(blockacb, BlockDriverAIOCBSync, common);
2494 qemu_bh_delete(acb->bh);
2495 acb->bh = NULL;
2496 qemu_aio_release(acb);
2499 static AIOPool bdrv_em_aio_pool = {
2500 .aiocb_size = sizeof(BlockDriverAIOCBSync),
2501 .cancel = bdrv_aio_cancel_em,
2504 static void bdrv_aio_bh_cb(void *opaque)
2506 BlockDriverAIOCBSync *acb = opaque;
2508 if (!acb->is_write)
2509 qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
2510 qemu_vfree(acb->bounce);
2511 acb->common.cb(acb->common.opaque, acb->ret);
2512 qemu_bh_delete(acb->bh);
2513 acb->bh = NULL;
2514 qemu_aio_release(acb);
2517 static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
2518 int64_t sector_num,
2519 QEMUIOVector *qiov,
2520 int nb_sectors,
2521 BlockDriverCompletionFunc *cb,
2522 void *opaque,
2523 int is_write)
2526 BlockDriverAIOCBSync *acb;
2528 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2529 acb->is_write = is_write;
2530 acb->qiov = qiov;
2531 acb->bounce = qemu_blockalign(bs, qiov->size);
2533 if (!acb->bh)
2534 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2536 if (is_write) {
2537 qemu_iovec_to_buffer(acb->qiov, acb->bounce);
2538 acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
2539 } else {
2540 acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
2543 qemu_bh_schedule(acb->bh);
2545 return &acb->common;
2548 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
2549 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2550 BlockDriverCompletionFunc *cb, void *opaque)
2552 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
2555 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
2556 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2557 BlockDriverCompletionFunc *cb, void *opaque)
2559 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
2563 typedef struct BlockDriverAIOCBCoroutine {
2564 BlockDriverAIOCB common;
2565 BlockRequest req;
2566 bool is_write;
2567 QEMUBH* bh;
2568 } BlockDriverAIOCBCoroutine;
2570 static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
2572 qemu_aio_flush();
2575 static AIOPool bdrv_em_co_aio_pool = {
2576 .aiocb_size = sizeof(BlockDriverAIOCBCoroutine),
2577 .cancel = bdrv_aio_co_cancel_em,
2580 static void bdrv_co_em_bh(void *opaque)
2582 BlockDriverAIOCBCoroutine *acb = opaque;
2584 acb->common.cb(acb->common.opaque, acb->req.error);
2585 qemu_bh_delete(acb->bh);
2586 qemu_aio_release(acb);
2589 /* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
2590 static void coroutine_fn bdrv_co_do_rw(void *opaque)
2592 BlockDriverAIOCBCoroutine *acb = opaque;
2593 BlockDriverState *bs = acb->common.bs;
2595 if (!acb->is_write) {
2596 acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
2597 acb->req.nb_sectors, acb->req.qiov);
2598 } else {
2599 acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
2600 acb->req.nb_sectors, acb->req.qiov);
2603 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
2604 qemu_bh_schedule(acb->bh);
2607 static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
2608 int64_t sector_num,
2609 QEMUIOVector *qiov,
2610 int nb_sectors,
2611 BlockDriverCompletionFunc *cb,
2612 void *opaque,
2613 bool is_write)
2615 Coroutine *co;
2616 BlockDriverAIOCBCoroutine *acb;
2618 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
2619 acb->req.sector = sector_num;
2620 acb->req.nb_sectors = nb_sectors;
2621 acb->req.qiov = qiov;
2622 acb->is_write = is_write;
2624 co = qemu_coroutine_create(bdrv_co_do_rw);
2625 qemu_coroutine_enter(co, acb);
2627 return &acb->common;
2630 static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
2632 BlockDriverAIOCBCoroutine *acb = opaque;
2633 BlockDriverState *bs = acb->common.bs;
2635 acb->req.error = bdrv_co_flush(bs);
2636 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
2637 qemu_bh_schedule(acb->bh);
2640 BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
2641 BlockDriverCompletionFunc *cb, void *opaque)
2643 trace_bdrv_aio_flush(bs, opaque);
2645 Coroutine *co;
2646 BlockDriverAIOCBCoroutine *acb;
2648 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
2649 co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
2650 qemu_coroutine_enter(co, acb);
2652 return &acb->common;
2655 static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
2657 BlockDriverAIOCBCoroutine *acb = opaque;
2658 BlockDriverState *bs = acb->common.bs;
2660 acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
2661 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
2662 qemu_bh_schedule(acb->bh);
2665 BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs,
2666 int64_t sector_num, int nb_sectors,
2667 BlockDriverCompletionFunc *cb, void *opaque)
2669 Coroutine *co;
2670 BlockDriverAIOCBCoroutine *acb;
2672 trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
2674 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
2675 acb->req.sector = sector_num;
2676 acb->req.nb_sectors = nb_sectors;
2677 co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
2678 qemu_coroutine_enter(co, acb);
2680 return &acb->common;
2683 void bdrv_init(void)
2685 module_call_init(MODULE_INIT_BLOCK);
2688 void bdrv_init_with_whitelist(void)
2690 use_bdrv_whitelist = 1;
2691 bdrv_init();
2694 void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
2695 BlockDriverCompletionFunc *cb, void *opaque)
2697 BlockDriverAIOCB *acb;
2699 if (pool->free_aiocb) {
2700 acb = pool->free_aiocb;
2701 pool->free_aiocb = acb->next;
2702 } else {
2703 acb = g_malloc0(pool->aiocb_size);
2704 acb->pool = pool;
2706 acb->bs = bs;
2707 acb->cb = cb;
2708 acb->opaque = opaque;
2709 return acb;
2712 void qemu_aio_release(void *p)
2714 BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
2715 AIOPool *pool = acb->pool;
2716 acb->next = pool->free_aiocb;
2717 pool->free_aiocb = acb;
2720 /**************************************************************/
2721 /* Coroutine block device emulation */
2723 typedef struct CoroutineIOCompletion {
2724 Coroutine *coroutine;
2725 int ret;
2726 } CoroutineIOCompletion;
2728 static void bdrv_co_io_em_complete(void *opaque, int ret)
2730 CoroutineIOCompletion *co = opaque;
2732 co->ret = ret;
2733 qemu_coroutine_enter(co->coroutine, NULL);
2736 static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
2737 int nb_sectors, QEMUIOVector *iov,
2738 bool is_write)
2740 CoroutineIOCompletion co = {
2741 .coroutine = qemu_coroutine_self(),
2743 BlockDriverAIOCB *acb;
2745 if (is_write) {
2746 acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
2747 bdrv_co_io_em_complete, &co);
2748 } else {
2749 acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
2750 bdrv_co_io_em_complete, &co);
2753 trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
2754 if (!acb) {
2755 return -EIO;
2757 qemu_coroutine_yield();
2759 return co.ret;
2762 static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
2763 int64_t sector_num, int nb_sectors,
2764 QEMUIOVector *iov)
2766 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
2769 static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
2770 int64_t sector_num, int nb_sectors,
2771 QEMUIOVector *iov)
2773 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
2776 static void coroutine_fn bdrv_flush_co_entry(void *opaque)
2778 RwCo *rwco = opaque;
2780 rwco->ret = bdrv_co_flush(rwco->bs);
2783 int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
2785 if (bs->open_flags & BDRV_O_NO_FLUSH) {
2786 return 0;
2787 } else if (!bs->drv) {
2788 return 0;
2789 } else if (bs->drv->bdrv_co_flush) {
2790 return bs->drv->bdrv_co_flush(bs);
2791 } else if (bs->drv->bdrv_aio_flush) {
2792 BlockDriverAIOCB *acb;
2793 CoroutineIOCompletion co = {
2794 .coroutine = qemu_coroutine_self(),
2797 acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
2798 if (acb == NULL) {
2799 return -EIO;
2800 } else {
2801 qemu_coroutine_yield();
2802 return co.ret;
2804 } else {
2806 * Some block drivers always operate in either writethrough or unsafe
2807 * mode and don't support bdrv_flush therefore. Usually qemu doesn't
2808 * know how the server works (because the behaviour is hardcoded or
2809 * depends on server-side configuration), so we can't ensure that
2810 * everything is safe on disk. Returning an error doesn't work because
2811 * that would break guests even if the server operates in writethrough
2812 * mode.
2814 * Let's hope the user knows what he's doing.
2816 return 0;
2820 int bdrv_flush(BlockDriverState *bs)
2822 Coroutine *co;
2823 RwCo rwco = {
2824 .bs = bs,
2825 .ret = NOT_DONE,
2828 if (qemu_in_coroutine()) {
2829 /* Fast-path if already in coroutine context */
2830 bdrv_flush_co_entry(&rwco);
2831 } else {
2832 co = qemu_coroutine_create(bdrv_flush_co_entry);
2833 qemu_coroutine_enter(co, &rwco);
2834 while (rwco.ret == NOT_DONE) {
2835 qemu_aio_wait();
2839 return rwco.ret;
2842 static void coroutine_fn bdrv_discard_co_entry(void *opaque)
2844 RwCo *rwco = opaque;
2846 rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
2849 int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
2850 int nb_sectors)
2852 if (!bs->drv) {
2853 return -ENOMEDIUM;
2854 } else if (bdrv_check_request(bs, sector_num, nb_sectors)) {
2855 return -EIO;
2856 } else if (bs->read_only) {
2857 return -EROFS;
2858 } else if (bs->drv->bdrv_co_discard) {
2859 return bs->drv->bdrv_co_discard(bs, sector_num, nb_sectors);
2860 } else if (bs->drv->bdrv_aio_discard) {
2861 BlockDriverAIOCB *acb;
2862 CoroutineIOCompletion co = {
2863 .coroutine = qemu_coroutine_self(),
2866 acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
2867 bdrv_co_io_em_complete, &co);
2868 if (acb == NULL) {
2869 return -EIO;
2870 } else {
2871 qemu_coroutine_yield();
2872 return co.ret;
2874 } else {
2875 return 0;
2879 int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
2881 Coroutine *co;
2882 RwCo rwco = {
2883 .bs = bs,
2884 .sector_num = sector_num,
2885 .nb_sectors = nb_sectors,
2886 .ret = NOT_DONE,
2889 if (qemu_in_coroutine()) {
2890 /* Fast-path if already in coroutine context */
2891 bdrv_discard_co_entry(&rwco);
2892 } else {
2893 co = qemu_coroutine_create(bdrv_discard_co_entry);
2894 qemu_coroutine_enter(co, &rwco);
2895 while (rwco.ret == NOT_DONE) {
2896 qemu_aio_wait();
2900 return rwco.ret;
2903 /**************************************************************/
2904 /* removable device support */
2907 * Return TRUE if the media is present
2909 int bdrv_is_inserted(BlockDriverState *bs)
2911 BlockDriver *drv = bs->drv;
2913 if (!drv)
2914 return 0;
2915 if (!drv->bdrv_is_inserted)
2916 return 1;
2917 return drv->bdrv_is_inserted(bs);
2921 * Return whether the media changed since the last call to this
2922 * function, or -ENOTSUP if we don't know. Most drivers don't know.
2924 int bdrv_media_changed(BlockDriverState *bs)
2926 BlockDriver *drv = bs->drv;
2928 if (drv && drv->bdrv_media_changed) {
2929 return drv->bdrv_media_changed(bs);
2931 return -ENOTSUP;
2935 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
2937 void bdrv_eject(BlockDriverState *bs, int eject_flag)
2939 BlockDriver *drv = bs->drv;
2941 if (drv && drv->bdrv_eject) {
2942 drv->bdrv_eject(bs, eject_flag);
2947 * Lock or unlock the media (if it is locked, the user won't be able
2948 * to eject it manually).
2950 void bdrv_lock_medium(BlockDriverState *bs, bool locked)
2952 BlockDriver *drv = bs->drv;
2954 trace_bdrv_lock_medium(bs, locked);
2956 if (drv && drv->bdrv_lock_medium) {
2957 drv->bdrv_lock_medium(bs, locked);
2961 /* needed for generic scsi interface */
2963 int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
2965 BlockDriver *drv = bs->drv;
2967 if (drv && drv->bdrv_ioctl)
2968 return drv->bdrv_ioctl(bs, req, buf);
2969 return -ENOTSUP;
2972 BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
2973 unsigned long int req, void *buf,
2974 BlockDriverCompletionFunc *cb, void *opaque)
2976 BlockDriver *drv = bs->drv;
2978 if (drv && drv->bdrv_aio_ioctl)
2979 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
2980 return NULL;
2983 void bdrv_set_buffer_alignment(BlockDriverState *bs, int align)
2985 bs->buffer_alignment = align;
2988 void *qemu_blockalign(BlockDriverState *bs, size_t size)
2990 return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
2993 void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
2995 int64_t bitmap_size;
2997 bs->dirty_count = 0;
2998 if (enable) {
2999 if (!bs->dirty_bitmap) {
3000 bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
3001 BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
3002 bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
3004 bs->dirty_bitmap = g_malloc0(bitmap_size);
3006 } else {
3007 if (bs->dirty_bitmap) {
3008 g_free(bs->dirty_bitmap);
3009 bs->dirty_bitmap = NULL;
3014 int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
3016 int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
3018 if (bs->dirty_bitmap &&
3019 (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
3020 return !!(bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] &
3021 (1UL << (chunk % (sizeof(unsigned long) * 8))));
3022 } else {
3023 return 0;
3027 void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
3028 int nr_sectors)
3030 set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
3033 int64_t bdrv_get_dirty_count(BlockDriverState *bs)
3035 return bs->dirty_count;
3038 void bdrv_set_in_use(BlockDriverState *bs, int in_use)
3040 assert(bs->in_use != in_use);
3041 bs->in_use = in_use;
3044 int bdrv_in_use(BlockDriverState *bs)
3046 return bs->in_use;
3049 void bdrv_iostatus_enable(BlockDriverState *bs)
3051 bs->iostatus_enabled = true;
3052 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
3055 /* The I/O status is only enabled if the drive explicitly
3056 * enables it _and_ the VM is configured to stop on errors */
3057 bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
3059 return (bs->iostatus_enabled &&
3060 (bs->on_write_error == BLOCK_ERR_STOP_ENOSPC ||
3061 bs->on_write_error == BLOCK_ERR_STOP_ANY ||
3062 bs->on_read_error == BLOCK_ERR_STOP_ANY));
3065 void bdrv_iostatus_disable(BlockDriverState *bs)
3067 bs->iostatus_enabled = false;
3070 void bdrv_iostatus_reset(BlockDriverState *bs)
3072 if (bdrv_iostatus_is_enabled(bs)) {
3073 bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
3077 /* XXX: Today this is set by device models because it makes the implementation
3078 quite simple. However, the block layer knows about the error, so it's
3079 possible to implement this without device models being involved */
3080 void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
3082 if (bdrv_iostatus_is_enabled(bs) &&
3083 bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
3084 assert(error >= 0);
3085 bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
3086 BLOCK_DEVICE_IO_STATUS_FAILED;
3090 void
3091 bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
3092 enum BlockAcctType type)
3094 assert(type < BDRV_MAX_IOTYPE);
3096 cookie->bytes = bytes;
3097 cookie->start_time_ns = get_clock();
3098 cookie->type = type;
3101 void
3102 bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie)
3104 assert(cookie->type < BDRV_MAX_IOTYPE);
3106 bs->nr_bytes[cookie->type] += cookie->bytes;
3107 bs->nr_ops[cookie->type]++;
3108 bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
3111 int bdrv_img_create(const char *filename, const char *fmt,
3112 const char *base_filename, const char *base_fmt,
3113 char *options, uint64_t img_size, int flags)
3115 QEMUOptionParameter *param = NULL, *create_options = NULL;
3116 QEMUOptionParameter *backing_fmt, *backing_file, *size;
3117 BlockDriverState *bs = NULL;
3118 BlockDriver *drv, *proto_drv;
3119 BlockDriver *backing_drv = NULL;
3120 int ret = 0;
3122 /* Find driver and parse its options */
3123 drv = bdrv_find_format(fmt);
3124 if (!drv) {
3125 error_report("Unknown file format '%s'", fmt);
3126 ret = -EINVAL;
3127 goto out;
3130 proto_drv = bdrv_find_protocol(filename);
3131 if (!proto_drv) {
3132 error_report("Unknown protocol '%s'", filename);
3133 ret = -EINVAL;
3134 goto out;
3137 create_options = append_option_parameters(create_options,
3138 drv->create_options);
3139 create_options = append_option_parameters(create_options,
3140 proto_drv->create_options);
3142 /* Create parameter list with default values */
3143 param = parse_option_parameters("", create_options, param);
3145 set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
3147 /* Parse -o options */
3148 if (options) {
3149 param = parse_option_parameters(options, create_options, param);
3150 if (param == NULL) {
3151 error_report("Invalid options for file format '%s'.", fmt);
3152 ret = -EINVAL;
3153 goto out;
3157 if (base_filename) {
3158 if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
3159 base_filename)) {
3160 error_report("Backing file not supported for file format '%s'",
3161 fmt);
3162 ret = -EINVAL;
3163 goto out;
3167 if (base_fmt) {
3168 if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
3169 error_report("Backing file format not supported for file "
3170 "format '%s'", fmt);
3171 ret = -EINVAL;
3172 goto out;
3176 backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
3177 if (backing_file && backing_file->value.s) {
3178 if (!strcmp(filename, backing_file->value.s)) {
3179 error_report("Error: Trying to create an image with the "
3180 "same filename as the backing file");
3181 ret = -EINVAL;
3182 goto out;
3186 backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
3187 if (backing_fmt && backing_fmt->value.s) {
3188 backing_drv = bdrv_find_format(backing_fmt->value.s);
3189 if (!backing_drv) {
3190 error_report("Unknown backing file format '%s'",
3191 backing_fmt->value.s);
3192 ret = -EINVAL;
3193 goto out;
3197 // The size for the image must always be specified, with one exception:
3198 // If we are using a backing file, we can obtain the size from there
3199 size = get_option_parameter(param, BLOCK_OPT_SIZE);
3200 if (size && size->value.n == -1) {
3201 if (backing_file && backing_file->value.s) {
3202 uint64_t size;
3203 char buf[32];
3205 bs = bdrv_new("");
3207 ret = bdrv_open(bs, backing_file->value.s, flags, backing_drv);
3208 if (ret < 0) {
3209 error_report("Could not open '%s'", backing_file->value.s);
3210 goto out;
3212 bdrv_get_geometry(bs, &size);
3213 size *= 512;
3215 snprintf(buf, sizeof(buf), "%" PRId64, size);
3216 set_option_parameter(param, BLOCK_OPT_SIZE, buf);
3217 } else {
3218 error_report("Image creation needs a size parameter");
3219 ret = -EINVAL;
3220 goto out;
3224 printf("Formatting '%s', fmt=%s ", filename, fmt);
3225 print_option_parameters(param);
3226 puts("");
3228 ret = bdrv_create(drv, filename, param);
3230 if (ret < 0) {
3231 if (ret == -ENOTSUP) {
3232 error_report("Formatting or formatting option not supported for "
3233 "file format '%s'", fmt);
3234 } else if (ret == -EFBIG) {
3235 error_report("The image size is too large for file format '%s'",
3236 fmt);
3237 } else {
3238 error_report("%s: error while creating %s: %s", filename, fmt,
3239 strerror(-ret));
3243 out:
3244 free_option_parameters(create_options);
3245 free_option_parameters(param);
3247 if (bs) {
3248 bdrv_delete(bs);
3251 return ret;