scsi-generic: look at host status
[qemu.git] / block.c
blob96f3c3fe4d2450331f4c61d8c6b658f2760ac082
1 /*
2 * QEMU System Emulator block driver
4 * Copyright (c) 2003 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
24 #include "config-host.h"
25 #include "qemu-common.h"
26 #include "trace.h"
27 #include "monitor.h"
28 #include "block_int.h"
29 #include "module.h"
30 #include "qemu-objects.h"
31 #include "qemu-coroutine.h"
33 #ifdef CONFIG_BSD
34 #include <sys/types.h>
35 #include <sys/stat.h>
36 #include <sys/ioctl.h>
37 #include <sys/queue.h>
38 #ifndef __DragonFly__
39 #include <sys/disk.h>
40 #endif
41 #endif
43 #ifdef _WIN32
44 #include <windows.h>
45 #endif
47 #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
49 static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
50 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
51 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
52 BlockDriverCompletionFunc *cb, void *opaque);
53 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
54 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
55 BlockDriverCompletionFunc *cb, void *opaque);
56 static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
57 int64_t sector_num, int nb_sectors,
58 QEMUIOVector *iov);
59 static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
60 int64_t sector_num, int nb_sectors,
61 QEMUIOVector *iov);
62 static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
63 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
64 static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
65 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
66 static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
67 int64_t sector_num,
68 QEMUIOVector *qiov,
69 int nb_sectors,
70 BlockDriverCompletionFunc *cb,
71 void *opaque,
72 bool is_write);
73 static void coroutine_fn bdrv_co_do_rw(void *opaque);
75 static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
76 QTAILQ_HEAD_INITIALIZER(bdrv_states);
78 static QLIST_HEAD(, BlockDriver) bdrv_drivers =
79 QLIST_HEAD_INITIALIZER(bdrv_drivers);
81 /* The device to use for VM snapshots */
82 static BlockDriverState *bs_snapshots;
84 /* If non-zero, use only whitelisted block drivers */
85 static int use_bdrv_whitelist;
87 #ifdef _WIN32
88 static int is_windows_drive_prefix(const char *filename)
90 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
91 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
92 filename[1] == ':');
95 int is_windows_drive(const char *filename)
97 if (is_windows_drive_prefix(filename) &&
98 filename[2] == '\0')
99 return 1;
100 if (strstart(filename, "\\\\.\\", NULL) ||
101 strstart(filename, "//./", NULL))
102 return 1;
103 return 0;
105 #endif
107 /* check if the path starts with "<protocol>:" */
108 static int path_has_protocol(const char *path)
110 #ifdef _WIN32
111 if (is_windows_drive(path) ||
112 is_windows_drive_prefix(path)) {
113 return 0;
115 #endif
117 return strchr(path, ':') != NULL;
120 int path_is_absolute(const char *path)
122 const char *p;
123 #ifdef _WIN32
124 /* specific case for names like: "\\.\d:" */
125 if (*path == '/' || *path == '\\')
126 return 1;
127 #endif
128 p = strchr(path, ':');
129 if (p)
130 p++;
131 else
132 p = path;
133 #ifdef _WIN32
134 return (*p == '/' || *p == '\\');
135 #else
136 return (*p == '/');
137 #endif
140 /* if filename is absolute, just copy it to dest. Otherwise, build a
141 path to it by considering it is relative to base_path. URL are
142 supported. */
143 void path_combine(char *dest, int dest_size,
144 const char *base_path,
145 const char *filename)
147 const char *p, *p1;
148 int len;
150 if (dest_size <= 0)
151 return;
152 if (path_is_absolute(filename)) {
153 pstrcpy(dest, dest_size, filename);
154 } else {
155 p = strchr(base_path, ':');
156 if (p)
157 p++;
158 else
159 p = base_path;
160 p1 = strrchr(base_path, '/');
161 #ifdef _WIN32
163 const char *p2;
164 p2 = strrchr(base_path, '\\');
165 if (!p1 || p2 > p1)
166 p1 = p2;
168 #endif
169 if (p1)
170 p1++;
171 else
172 p1 = base_path;
173 if (p1 > p)
174 p = p1;
175 len = p - base_path;
176 if (len > dest_size - 1)
177 len = dest_size - 1;
178 memcpy(dest, base_path, len);
179 dest[len] = '\0';
180 pstrcat(dest, dest_size, filename);
184 void bdrv_register(BlockDriver *bdrv)
186 /* Block drivers without coroutine functions need emulation */
187 if (!bdrv->bdrv_co_readv) {
188 bdrv->bdrv_co_readv = bdrv_co_readv_em;
189 bdrv->bdrv_co_writev = bdrv_co_writev_em;
191 /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
192 * the block driver lacks aio we need to emulate that too.
194 if (!bdrv->bdrv_aio_readv) {
195 /* add AIO emulation layer */
196 bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
197 bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
201 QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
204 /* create a new block device (by default it is empty) */
205 BlockDriverState *bdrv_new(const char *device_name)
207 BlockDriverState *bs;
209 bs = g_malloc0(sizeof(BlockDriverState));
210 pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
211 if (device_name[0] != '\0') {
212 QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
214 bdrv_iostatus_disable(bs);
215 return bs;
218 BlockDriver *bdrv_find_format(const char *format_name)
220 BlockDriver *drv1;
221 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
222 if (!strcmp(drv1->format_name, format_name)) {
223 return drv1;
226 return NULL;
229 static int bdrv_is_whitelisted(BlockDriver *drv)
231 static const char *whitelist[] = {
232 CONFIG_BDRV_WHITELIST
234 const char **p;
236 if (!whitelist[0])
237 return 1; /* no whitelist, anything goes */
239 for (p = whitelist; *p; p++) {
240 if (!strcmp(drv->format_name, *p)) {
241 return 1;
244 return 0;
247 BlockDriver *bdrv_find_whitelisted_format(const char *format_name)
249 BlockDriver *drv = bdrv_find_format(format_name);
250 return drv && bdrv_is_whitelisted(drv) ? drv : NULL;
253 int bdrv_create(BlockDriver *drv, const char* filename,
254 QEMUOptionParameter *options)
256 if (!drv->bdrv_create)
257 return -ENOTSUP;
259 return drv->bdrv_create(filename, options);
262 int bdrv_create_file(const char* filename, QEMUOptionParameter *options)
264 BlockDriver *drv;
266 drv = bdrv_find_protocol(filename);
267 if (drv == NULL) {
268 return -ENOENT;
271 return bdrv_create(drv, filename, options);
274 #ifdef _WIN32
275 void get_tmp_filename(char *filename, int size)
277 char temp_dir[MAX_PATH];
279 GetTempPath(MAX_PATH, temp_dir);
280 GetTempFileName(temp_dir, "qem", 0, filename);
282 #else
283 void get_tmp_filename(char *filename, int size)
285 int fd;
286 const char *tmpdir;
287 /* XXX: race condition possible */
288 tmpdir = getenv("TMPDIR");
289 if (!tmpdir)
290 tmpdir = "/tmp";
291 snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
292 fd = mkstemp(filename);
293 close(fd);
295 #endif
298 * Detect host devices. By convention, /dev/cdrom[N] is always
299 * recognized as a host CDROM.
301 static BlockDriver *find_hdev_driver(const char *filename)
303 int score_max = 0, score;
304 BlockDriver *drv = NULL, *d;
306 QLIST_FOREACH(d, &bdrv_drivers, list) {
307 if (d->bdrv_probe_device) {
308 score = d->bdrv_probe_device(filename);
309 if (score > score_max) {
310 score_max = score;
311 drv = d;
316 return drv;
319 BlockDriver *bdrv_find_protocol(const char *filename)
321 BlockDriver *drv1;
322 char protocol[128];
323 int len;
324 const char *p;
326 /* TODO Drivers without bdrv_file_open must be specified explicitly */
329 * XXX(hch): we really should not let host device detection
330 * override an explicit protocol specification, but moving this
331 * later breaks access to device names with colons in them.
332 * Thanks to the brain-dead persistent naming schemes on udev-
333 * based Linux systems those actually are quite common.
335 drv1 = find_hdev_driver(filename);
336 if (drv1) {
337 return drv1;
340 if (!path_has_protocol(filename)) {
341 return bdrv_find_format("file");
343 p = strchr(filename, ':');
344 assert(p != NULL);
345 len = p - filename;
346 if (len > sizeof(protocol) - 1)
347 len = sizeof(protocol) - 1;
348 memcpy(protocol, filename, len);
349 protocol[len] = '\0';
350 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
351 if (drv1->protocol_name &&
352 !strcmp(drv1->protocol_name, protocol)) {
353 return drv1;
356 return NULL;
359 static int find_image_format(const char *filename, BlockDriver **pdrv)
361 int ret, score, score_max;
362 BlockDriver *drv1, *drv;
363 uint8_t buf[2048];
364 BlockDriverState *bs;
366 ret = bdrv_file_open(&bs, filename, 0);
367 if (ret < 0) {
368 *pdrv = NULL;
369 return ret;
372 /* Return the raw BlockDriver * to scsi-generic devices or empty drives */
373 if (bs->sg || !bdrv_is_inserted(bs)) {
374 bdrv_delete(bs);
375 drv = bdrv_find_format("raw");
376 if (!drv) {
377 ret = -ENOENT;
379 *pdrv = drv;
380 return ret;
383 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
384 bdrv_delete(bs);
385 if (ret < 0) {
386 *pdrv = NULL;
387 return ret;
390 score_max = 0;
391 drv = NULL;
392 QLIST_FOREACH(drv1, &bdrv_drivers, list) {
393 if (drv1->bdrv_probe) {
394 score = drv1->bdrv_probe(buf, ret, filename);
395 if (score > score_max) {
396 score_max = score;
397 drv = drv1;
401 if (!drv) {
402 ret = -ENOENT;
404 *pdrv = drv;
405 return ret;
409 * Set the current 'total_sectors' value
411 static int refresh_total_sectors(BlockDriverState *bs, int64_t hint)
413 BlockDriver *drv = bs->drv;
415 /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
416 if (bs->sg)
417 return 0;
419 /* query actual device if possible, otherwise just trust the hint */
420 if (drv->bdrv_getlength) {
421 int64_t length = drv->bdrv_getlength(bs);
422 if (length < 0) {
423 return length;
425 hint = length >> BDRV_SECTOR_BITS;
428 bs->total_sectors = hint;
429 return 0;
433 * Set open flags for a given cache mode
435 * Return 0 on success, -1 if the cache mode was invalid.
437 int bdrv_parse_cache_flags(const char *mode, int *flags)
439 *flags &= ~BDRV_O_CACHE_MASK;
441 if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
442 *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
443 } else if (!strcmp(mode, "directsync")) {
444 *flags |= BDRV_O_NOCACHE;
445 } else if (!strcmp(mode, "writeback")) {
446 *flags |= BDRV_O_CACHE_WB;
447 } else if (!strcmp(mode, "unsafe")) {
448 *flags |= BDRV_O_CACHE_WB;
449 *flags |= BDRV_O_NO_FLUSH;
450 } else if (!strcmp(mode, "writethrough")) {
451 /* this is the default */
452 } else {
453 return -1;
456 return 0;
460 * Common part for opening disk images and files
462 static int bdrv_open_common(BlockDriverState *bs, const char *filename,
463 int flags, BlockDriver *drv)
465 int ret, open_flags;
467 assert(drv != NULL);
469 trace_bdrv_open_common(bs, filename, flags, drv->format_name);
471 bs->file = NULL;
472 bs->total_sectors = 0;
473 bs->encrypted = 0;
474 bs->valid_key = 0;
475 bs->sg = 0;
476 bs->open_flags = flags;
477 bs->growable = 0;
478 bs->buffer_alignment = 512;
480 pstrcpy(bs->filename, sizeof(bs->filename), filename);
481 bs->backing_file[0] = '\0';
483 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv)) {
484 return -ENOTSUP;
487 bs->drv = drv;
488 bs->opaque = g_malloc0(drv->instance_size);
490 bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
493 * Clear flags that are internal to the block layer before opening the
494 * image.
496 open_flags = flags & ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
499 * Snapshots should be writable.
501 if (bs->is_temporary) {
502 open_flags |= BDRV_O_RDWR;
505 bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR);
507 /* Open the image, either directly or using a protocol */
508 if (drv->bdrv_file_open) {
509 ret = drv->bdrv_file_open(bs, filename, open_flags);
510 } else {
511 ret = bdrv_file_open(&bs->file, filename, open_flags);
512 if (ret >= 0) {
513 ret = drv->bdrv_open(bs, open_flags);
517 if (ret < 0) {
518 goto free_and_fail;
521 ret = refresh_total_sectors(bs, bs->total_sectors);
522 if (ret < 0) {
523 goto free_and_fail;
526 #ifndef _WIN32
527 if (bs->is_temporary) {
528 unlink(filename);
530 #endif
531 return 0;
533 free_and_fail:
534 if (bs->file) {
535 bdrv_delete(bs->file);
536 bs->file = NULL;
538 g_free(bs->opaque);
539 bs->opaque = NULL;
540 bs->drv = NULL;
541 return ret;
545 * Opens a file using a protocol (file, host_device, nbd, ...)
547 int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
549 BlockDriverState *bs;
550 BlockDriver *drv;
551 int ret;
553 drv = bdrv_find_protocol(filename);
554 if (!drv) {
555 return -ENOENT;
558 bs = bdrv_new("");
559 ret = bdrv_open_common(bs, filename, flags, drv);
560 if (ret < 0) {
561 bdrv_delete(bs);
562 return ret;
564 bs->growable = 1;
565 *pbs = bs;
566 return 0;
570 * Opens a disk image (raw, qcow2, vmdk, ...)
572 int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
573 BlockDriver *drv)
575 int ret;
576 char tmp_filename[PATH_MAX];
578 if (flags & BDRV_O_SNAPSHOT) {
579 BlockDriverState *bs1;
580 int64_t total_size;
581 int is_protocol = 0;
582 BlockDriver *bdrv_qcow2;
583 QEMUOptionParameter *options;
584 char backing_filename[PATH_MAX];
586 /* if snapshot, we create a temporary backing file and open it
587 instead of opening 'filename' directly */
589 /* if there is a backing file, use it */
590 bs1 = bdrv_new("");
591 ret = bdrv_open(bs1, filename, 0, drv);
592 if (ret < 0) {
593 bdrv_delete(bs1);
594 return ret;
596 total_size = bdrv_getlength(bs1) & BDRV_SECTOR_MASK;
598 if (bs1->drv && bs1->drv->protocol_name)
599 is_protocol = 1;
601 bdrv_delete(bs1);
603 get_tmp_filename(tmp_filename, sizeof(tmp_filename));
605 /* Real path is meaningless for protocols */
606 if (is_protocol)
607 snprintf(backing_filename, sizeof(backing_filename),
608 "%s", filename);
609 else if (!realpath(filename, backing_filename))
610 return -errno;
612 bdrv_qcow2 = bdrv_find_format("qcow2");
613 options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
615 set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size);
616 set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
617 if (drv) {
618 set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
619 drv->format_name);
622 ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
623 free_option_parameters(options);
624 if (ret < 0) {
625 return ret;
628 filename = tmp_filename;
629 drv = bdrv_qcow2;
630 bs->is_temporary = 1;
633 /* Find the right image format driver */
634 if (!drv) {
635 ret = find_image_format(filename, &drv);
638 if (!drv) {
639 goto unlink_and_fail;
642 /* Open the image */
643 ret = bdrv_open_common(bs, filename, flags, drv);
644 if (ret < 0) {
645 goto unlink_and_fail;
648 /* If there is a backing file, use it */
649 if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') {
650 char backing_filename[PATH_MAX];
651 int back_flags;
652 BlockDriver *back_drv = NULL;
654 bs->backing_hd = bdrv_new("");
656 if (path_has_protocol(bs->backing_file)) {
657 pstrcpy(backing_filename, sizeof(backing_filename),
658 bs->backing_file);
659 } else {
660 path_combine(backing_filename, sizeof(backing_filename),
661 filename, bs->backing_file);
664 if (bs->backing_format[0] != '\0') {
665 back_drv = bdrv_find_format(bs->backing_format);
668 /* backing files always opened read-only */
669 back_flags =
670 flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
672 ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv);
673 if (ret < 0) {
674 bdrv_close(bs);
675 return ret;
677 if (bs->is_temporary) {
678 bs->backing_hd->keep_read_only = !(flags & BDRV_O_RDWR);
679 } else {
680 /* base image inherits from "parent" */
681 bs->backing_hd->keep_read_only = bs->keep_read_only;
685 if (!bdrv_key_required(bs)) {
686 bdrv_dev_change_media_cb(bs, true);
689 return 0;
691 unlink_and_fail:
692 if (bs->is_temporary) {
693 unlink(filename);
695 return ret;
698 void bdrv_close(BlockDriverState *bs)
700 if (bs->drv) {
701 if (bs == bs_snapshots) {
702 bs_snapshots = NULL;
704 if (bs->backing_hd) {
705 bdrv_delete(bs->backing_hd);
706 bs->backing_hd = NULL;
708 bs->drv->bdrv_close(bs);
709 g_free(bs->opaque);
710 #ifdef _WIN32
711 if (bs->is_temporary) {
712 unlink(bs->filename);
714 #endif
715 bs->opaque = NULL;
716 bs->drv = NULL;
718 if (bs->file != NULL) {
719 bdrv_close(bs->file);
722 bdrv_dev_change_media_cb(bs, false);
726 void bdrv_close_all(void)
728 BlockDriverState *bs;
730 QTAILQ_FOREACH(bs, &bdrv_states, list) {
731 bdrv_close(bs);
735 /* make a BlockDriverState anonymous by removing from bdrv_state list.
736 Also, NULL terminate the device_name to prevent double remove */
737 void bdrv_make_anon(BlockDriverState *bs)
739 if (bs->device_name[0] != '\0') {
740 QTAILQ_REMOVE(&bdrv_states, bs, list);
742 bs->device_name[0] = '\0';
745 void bdrv_delete(BlockDriverState *bs)
747 assert(!bs->dev);
749 /* remove from list, if necessary */
750 bdrv_make_anon(bs);
752 bdrv_close(bs);
753 if (bs->file != NULL) {
754 bdrv_delete(bs->file);
757 assert(bs != bs_snapshots);
758 g_free(bs);
761 int bdrv_attach_dev(BlockDriverState *bs, void *dev)
762 /* TODO change to DeviceState *dev when all users are qdevified */
764 if (bs->dev) {
765 return -EBUSY;
767 bs->dev = dev;
768 bdrv_iostatus_reset(bs);
769 return 0;
772 /* TODO qdevified devices don't use this, remove when devices are qdevified */
773 void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev)
775 if (bdrv_attach_dev(bs, dev) < 0) {
776 abort();
780 void bdrv_detach_dev(BlockDriverState *bs, void *dev)
781 /* TODO change to DeviceState *dev when all users are qdevified */
783 assert(bs->dev == dev);
784 bs->dev = NULL;
785 bs->dev_ops = NULL;
786 bs->dev_opaque = NULL;
787 bs->buffer_alignment = 512;
790 /* TODO change to return DeviceState * when all users are qdevified */
791 void *bdrv_get_attached_dev(BlockDriverState *bs)
793 return bs->dev;
796 void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops,
797 void *opaque)
799 bs->dev_ops = ops;
800 bs->dev_opaque = opaque;
801 if (bdrv_dev_has_removable_media(bs) && bs == bs_snapshots) {
802 bs_snapshots = NULL;
806 static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load)
808 if (bs->dev_ops && bs->dev_ops->change_media_cb) {
809 bs->dev_ops->change_media_cb(bs->dev_opaque, load);
813 bool bdrv_dev_has_removable_media(BlockDriverState *bs)
815 return !bs->dev || (bs->dev_ops && bs->dev_ops->change_media_cb);
818 bool bdrv_dev_is_tray_open(BlockDriverState *bs)
820 if (bs->dev_ops && bs->dev_ops->is_tray_open) {
821 return bs->dev_ops->is_tray_open(bs->dev_opaque);
823 return false;
826 static void bdrv_dev_resize_cb(BlockDriverState *bs)
828 if (bs->dev_ops && bs->dev_ops->resize_cb) {
829 bs->dev_ops->resize_cb(bs->dev_opaque);
833 bool bdrv_dev_is_medium_locked(BlockDriverState *bs)
835 if (bs->dev_ops && bs->dev_ops->is_medium_locked) {
836 return bs->dev_ops->is_medium_locked(bs->dev_opaque);
838 return false;
842 * Run consistency checks on an image
844 * Returns 0 if the check could be completed (it doesn't mean that the image is
845 * free of errors) or -errno when an internal error occurred. The results of the
846 * check are stored in res.
848 int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res)
850 if (bs->drv->bdrv_check == NULL) {
851 return -ENOTSUP;
854 memset(res, 0, sizeof(*res));
855 return bs->drv->bdrv_check(bs, res);
858 #define COMMIT_BUF_SECTORS 2048
860 /* commit COW file into the raw image */
861 int bdrv_commit(BlockDriverState *bs)
863 BlockDriver *drv = bs->drv;
864 BlockDriver *backing_drv;
865 int64_t sector, total_sectors;
866 int n, ro, open_flags;
867 int ret = 0, rw_ret = 0;
868 uint8_t *buf;
869 char filename[1024];
870 BlockDriverState *bs_rw, *bs_ro;
872 if (!drv)
873 return -ENOMEDIUM;
875 if (!bs->backing_hd) {
876 return -ENOTSUP;
879 if (bs->backing_hd->keep_read_only) {
880 return -EACCES;
883 backing_drv = bs->backing_hd->drv;
884 ro = bs->backing_hd->read_only;
885 strncpy(filename, bs->backing_hd->filename, sizeof(filename));
886 open_flags = bs->backing_hd->open_flags;
888 if (ro) {
889 /* re-open as RW */
890 bdrv_delete(bs->backing_hd);
891 bs->backing_hd = NULL;
892 bs_rw = bdrv_new("");
893 rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR,
894 backing_drv);
895 if (rw_ret < 0) {
896 bdrv_delete(bs_rw);
897 /* try to re-open read-only */
898 bs_ro = bdrv_new("");
899 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
900 backing_drv);
901 if (ret < 0) {
902 bdrv_delete(bs_ro);
903 /* drive not functional anymore */
904 bs->drv = NULL;
905 return ret;
907 bs->backing_hd = bs_ro;
908 return rw_ret;
910 bs->backing_hd = bs_rw;
913 total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
914 buf = g_malloc(COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
916 for (sector = 0; sector < total_sectors; sector += n) {
917 if (drv->bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n)) {
919 if (bdrv_read(bs, sector, buf, n) != 0) {
920 ret = -EIO;
921 goto ro_cleanup;
924 if (bdrv_write(bs->backing_hd, sector, buf, n) != 0) {
925 ret = -EIO;
926 goto ro_cleanup;
931 if (drv->bdrv_make_empty) {
932 ret = drv->bdrv_make_empty(bs);
933 bdrv_flush(bs);
937 * Make sure all data we wrote to the backing device is actually
938 * stable on disk.
940 if (bs->backing_hd)
941 bdrv_flush(bs->backing_hd);
943 ro_cleanup:
944 g_free(buf);
946 if (ro) {
947 /* re-open as RO */
948 bdrv_delete(bs->backing_hd);
949 bs->backing_hd = NULL;
950 bs_ro = bdrv_new("");
951 ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
952 backing_drv);
953 if (ret < 0) {
954 bdrv_delete(bs_ro);
955 /* drive not functional anymore */
956 bs->drv = NULL;
957 return ret;
959 bs->backing_hd = bs_ro;
960 bs->backing_hd->keep_read_only = 0;
963 return ret;
966 void bdrv_commit_all(void)
968 BlockDriverState *bs;
970 QTAILQ_FOREACH(bs, &bdrv_states, list) {
971 bdrv_commit(bs);
976 * Return values:
977 * 0 - success
978 * -EINVAL - backing format specified, but no file
979 * -ENOSPC - can't update the backing file because no space is left in the
980 * image file header
981 * -ENOTSUP - format driver doesn't support changing the backing file
983 int bdrv_change_backing_file(BlockDriverState *bs,
984 const char *backing_file, const char *backing_fmt)
986 BlockDriver *drv = bs->drv;
988 if (drv->bdrv_change_backing_file != NULL) {
989 return drv->bdrv_change_backing_file(bs, backing_file, backing_fmt);
990 } else {
991 return -ENOTSUP;
995 static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
996 size_t size)
998 int64_t len;
1000 if (!bdrv_is_inserted(bs))
1001 return -ENOMEDIUM;
1003 if (bs->growable)
1004 return 0;
1006 len = bdrv_getlength(bs);
1008 if (offset < 0)
1009 return -EIO;
1011 if ((offset > len) || (len - offset < size))
1012 return -EIO;
1014 return 0;
1017 static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
1018 int nb_sectors)
1020 return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
1021 nb_sectors * BDRV_SECTOR_SIZE);
1024 typedef struct RwCo {
1025 BlockDriverState *bs;
1026 int64_t sector_num;
1027 int nb_sectors;
1028 QEMUIOVector *qiov;
1029 bool is_write;
1030 int ret;
1031 } RwCo;
1033 static void coroutine_fn bdrv_rw_co_entry(void *opaque)
1035 RwCo *rwco = opaque;
1037 if (!rwco->is_write) {
1038 rwco->ret = bdrv_co_do_readv(rwco->bs, rwco->sector_num,
1039 rwco->nb_sectors, rwco->qiov);
1040 } else {
1041 rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num,
1042 rwco->nb_sectors, rwco->qiov);
1047 * Process a synchronous request using coroutines
1049 static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
1050 int nb_sectors, bool is_write)
1052 QEMUIOVector qiov;
1053 struct iovec iov = {
1054 .iov_base = (void *)buf,
1055 .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
1057 Coroutine *co;
1058 RwCo rwco = {
1059 .bs = bs,
1060 .sector_num = sector_num,
1061 .nb_sectors = nb_sectors,
1062 .qiov = &qiov,
1063 .is_write = is_write,
1064 .ret = NOT_DONE,
1067 qemu_iovec_init_external(&qiov, &iov, 1);
1069 if (qemu_in_coroutine()) {
1070 /* Fast-path if already in coroutine context */
1071 bdrv_rw_co_entry(&rwco);
1072 } else {
1073 co = qemu_coroutine_create(bdrv_rw_co_entry);
1074 qemu_coroutine_enter(co, &rwco);
1075 while (rwco.ret == NOT_DONE) {
1076 qemu_aio_wait();
1079 return rwco.ret;
1082 /* return < 0 if error. See bdrv_write() for the return codes */
1083 int bdrv_read(BlockDriverState *bs, int64_t sector_num,
1084 uint8_t *buf, int nb_sectors)
1086 return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false);
1089 static void set_dirty_bitmap(BlockDriverState *bs, int64_t sector_num,
1090 int nb_sectors, int dirty)
1092 int64_t start, end;
1093 unsigned long val, idx, bit;
1095 start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
1096 end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
1098 for (; start <= end; start++) {
1099 idx = start / (sizeof(unsigned long) * 8);
1100 bit = start % (sizeof(unsigned long) * 8);
1101 val = bs->dirty_bitmap[idx];
1102 if (dirty) {
1103 if (!(val & (1UL << bit))) {
1104 bs->dirty_count++;
1105 val |= 1UL << bit;
1107 } else {
1108 if (val & (1UL << bit)) {
1109 bs->dirty_count--;
1110 val &= ~(1UL << bit);
1113 bs->dirty_bitmap[idx] = val;
1117 /* Return < 0 if error. Important errors are:
1118 -EIO generic I/O error (may happen for all errors)
1119 -ENOMEDIUM No media inserted.
1120 -EINVAL Invalid sector number or nb_sectors
1121 -EACCES Trying to write a read-only device
1123 int bdrv_write(BlockDriverState *bs, int64_t sector_num,
1124 const uint8_t *buf, int nb_sectors)
1126 return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true);
1129 int bdrv_pread(BlockDriverState *bs, int64_t offset,
1130 void *buf, int count1)
1132 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
1133 int len, nb_sectors, count;
1134 int64_t sector_num;
1135 int ret;
1137 count = count1;
1138 /* first read to align to sector start */
1139 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
1140 if (len > count)
1141 len = count;
1142 sector_num = offset >> BDRV_SECTOR_BITS;
1143 if (len > 0) {
1144 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1145 return ret;
1146 memcpy(buf, tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), len);
1147 count -= len;
1148 if (count == 0)
1149 return count1;
1150 sector_num++;
1151 buf += len;
1154 /* read the sectors "in place" */
1155 nb_sectors = count >> BDRV_SECTOR_BITS;
1156 if (nb_sectors > 0) {
1157 if ((ret = bdrv_read(bs, sector_num, buf, nb_sectors)) < 0)
1158 return ret;
1159 sector_num += nb_sectors;
1160 len = nb_sectors << BDRV_SECTOR_BITS;
1161 buf += len;
1162 count -= len;
1165 /* add data from the last sector */
1166 if (count > 0) {
1167 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1168 return ret;
1169 memcpy(buf, tmp_buf, count);
1171 return count1;
1174 int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
1175 const void *buf, int count1)
1177 uint8_t tmp_buf[BDRV_SECTOR_SIZE];
1178 int len, nb_sectors, count;
1179 int64_t sector_num;
1180 int ret;
1182 count = count1;
1183 /* first write to align to sector start */
1184 len = (BDRV_SECTOR_SIZE - offset) & (BDRV_SECTOR_SIZE - 1);
1185 if (len > count)
1186 len = count;
1187 sector_num = offset >> BDRV_SECTOR_BITS;
1188 if (len > 0) {
1189 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1190 return ret;
1191 memcpy(tmp_buf + (offset & (BDRV_SECTOR_SIZE - 1)), buf, len);
1192 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1193 return ret;
1194 count -= len;
1195 if (count == 0)
1196 return count1;
1197 sector_num++;
1198 buf += len;
1201 /* write the sectors "in place" */
1202 nb_sectors = count >> BDRV_SECTOR_BITS;
1203 if (nb_sectors > 0) {
1204 if ((ret = bdrv_write(bs, sector_num, buf, nb_sectors)) < 0)
1205 return ret;
1206 sector_num += nb_sectors;
1207 len = nb_sectors << BDRV_SECTOR_BITS;
1208 buf += len;
1209 count -= len;
1212 /* add data from the last sector */
1213 if (count > 0) {
1214 if ((ret = bdrv_read(bs, sector_num, tmp_buf, 1)) < 0)
1215 return ret;
1216 memcpy(tmp_buf, buf, count);
1217 if ((ret = bdrv_write(bs, sector_num, tmp_buf, 1)) < 0)
1218 return ret;
1220 return count1;
1224 * Writes to the file and ensures that no writes are reordered across this
1225 * request (acts as a barrier)
1227 * Returns 0 on success, -errno in error cases.
1229 int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
1230 const void *buf, int count)
1232 int ret;
1234 ret = bdrv_pwrite(bs, offset, buf, count);
1235 if (ret < 0) {
1236 return ret;
1239 /* No flush needed for cache modes that use O_DSYNC */
1240 if ((bs->open_flags & BDRV_O_CACHE_WB) != 0) {
1241 bdrv_flush(bs);
1244 return 0;
1248 * Handle a read request in coroutine context
1250 static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
1251 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1253 BlockDriver *drv = bs->drv;
1255 if (!drv) {
1256 return -ENOMEDIUM;
1258 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1259 return -EIO;
1262 return drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
1265 int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
1266 int nb_sectors, QEMUIOVector *qiov)
1268 trace_bdrv_co_readv(bs, sector_num, nb_sectors);
1270 return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov);
1274 * Handle a write request in coroutine context
1276 static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
1277 int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
1279 BlockDriver *drv = bs->drv;
1280 int ret;
1282 if (!bs->drv) {
1283 return -ENOMEDIUM;
1285 if (bs->read_only) {
1286 return -EACCES;
1288 if (bdrv_check_request(bs, sector_num, nb_sectors)) {
1289 return -EIO;
1292 ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
1294 if (bs->dirty_bitmap) {
1295 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
1298 if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
1299 bs->wr_highest_sector = sector_num + nb_sectors - 1;
1302 return ret;
1305 int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
1306 int nb_sectors, QEMUIOVector *qiov)
1308 trace_bdrv_co_writev(bs, sector_num, nb_sectors);
1310 return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov);
1314 * Truncate file to 'offset' bytes (needed only for file protocols)
1316 int bdrv_truncate(BlockDriverState *bs, int64_t offset)
1318 BlockDriver *drv = bs->drv;
1319 int ret;
1320 if (!drv)
1321 return -ENOMEDIUM;
1322 if (!drv->bdrv_truncate)
1323 return -ENOTSUP;
1324 if (bs->read_only)
1325 return -EACCES;
1326 if (bdrv_in_use(bs))
1327 return -EBUSY;
1328 ret = drv->bdrv_truncate(bs, offset);
1329 if (ret == 0) {
1330 ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
1331 bdrv_dev_resize_cb(bs);
1333 return ret;
1337 * Length of a allocated file in bytes. Sparse files are counted by actual
1338 * allocated space. Return < 0 if error or unknown.
1340 int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
1342 BlockDriver *drv = bs->drv;
1343 if (!drv) {
1344 return -ENOMEDIUM;
1346 if (drv->bdrv_get_allocated_file_size) {
1347 return drv->bdrv_get_allocated_file_size(bs);
1349 if (bs->file) {
1350 return bdrv_get_allocated_file_size(bs->file);
1352 return -ENOTSUP;
1356 * Length of a file in bytes. Return < 0 if error or unknown.
1358 int64_t bdrv_getlength(BlockDriverState *bs)
1360 BlockDriver *drv = bs->drv;
1361 if (!drv)
1362 return -ENOMEDIUM;
1364 if (bs->growable || bdrv_dev_has_removable_media(bs)) {
1365 if (drv->bdrv_getlength) {
1366 return drv->bdrv_getlength(bs);
1369 return bs->total_sectors * BDRV_SECTOR_SIZE;
1372 /* return 0 as number of sectors if no device present or error */
1373 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
1375 int64_t length;
1376 length = bdrv_getlength(bs);
1377 if (length < 0)
1378 length = 0;
1379 else
1380 length = length >> BDRV_SECTOR_BITS;
1381 *nb_sectors_ptr = length;
1384 struct partition {
1385 uint8_t boot_ind; /* 0x80 - active */
1386 uint8_t head; /* starting head */
1387 uint8_t sector; /* starting sector */
1388 uint8_t cyl; /* starting cylinder */
1389 uint8_t sys_ind; /* What partition type */
1390 uint8_t end_head; /* end head */
1391 uint8_t end_sector; /* end sector */
1392 uint8_t end_cyl; /* end cylinder */
1393 uint32_t start_sect; /* starting sector counting from 0 */
1394 uint32_t nr_sects; /* nr of sectors in partition */
1395 } QEMU_PACKED;
1397 /* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
1398 static int guess_disk_lchs(BlockDriverState *bs,
1399 int *pcylinders, int *pheads, int *psectors)
1401 uint8_t buf[BDRV_SECTOR_SIZE];
1402 int ret, i, heads, sectors, cylinders;
1403 struct partition *p;
1404 uint32_t nr_sects;
1405 uint64_t nb_sectors;
1407 bdrv_get_geometry(bs, &nb_sectors);
1409 ret = bdrv_read(bs, 0, buf, 1);
1410 if (ret < 0)
1411 return -1;
1412 /* test msdos magic */
1413 if (buf[510] != 0x55 || buf[511] != 0xaa)
1414 return -1;
1415 for(i = 0; i < 4; i++) {
1416 p = ((struct partition *)(buf + 0x1be)) + i;
1417 nr_sects = le32_to_cpu(p->nr_sects);
1418 if (nr_sects && p->end_head) {
1419 /* We make the assumption that the partition terminates on
1420 a cylinder boundary */
1421 heads = p->end_head + 1;
1422 sectors = p->end_sector & 63;
1423 if (sectors == 0)
1424 continue;
1425 cylinders = nb_sectors / (heads * sectors);
1426 if (cylinders < 1 || cylinders > 16383)
1427 continue;
1428 *pheads = heads;
1429 *psectors = sectors;
1430 *pcylinders = cylinders;
1431 #if 0
1432 printf("guessed geometry: LCHS=%d %d %d\n",
1433 cylinders, heads, sectors);
1434 #endif
1435 return 0;
1438 return -1;
1441 void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
1443 int translation, lba_detected = 0;
1444 int cylinders, heads, secs;
1445 uint64_t nb_sectors;
1447 /* if a geometry hint is available, use it */
1448 bdrv_get_geometry(bs, &nb_sectors);
1449 bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
1450 translation = bdrv_get_translation_hint(bs);
1451 if (cylinders != 0) {
1452 *pcyls = cylinders;
1453 *pheads = heads;
1454 *psecs = secs;
1455 } else {
1456 if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
1457 if (heads > 16) {
1458 /* if heads > 16, it means that a BIOS LBA
1459 translation was active, so the default
1460 hardware geometry is OK */
1461 lba_detected = 1;
1462 goto default_geometry;
1463 } else {
1464 *pcyls = cylinders;
1465 *pheads = heads;
1466 *psecs = secs;
1467 /* disable any translation to be in sync with
1468 the logical geometry */
1469 if (translation == BIOS_ATA_TRANSLATION_AUTO) {
1470 bdrv_set_translation_hint(bs,
1471 BIOS_ATA_TRANSLATION_NONE);
1474 } else {
1475 default_geometry:
1476 /* if no geometry, use a standard physical disk geometry */
1477 cylinders = nb_sectors / (16 * 63);
1479 if (cylinders > 16383)
1480 cylinders = 16383;
1481 else if (cylinders < 2)
1482 cylinders = 2;
1483 *pcyls = cylinders;
1484 *pheads = 16;
1485 *psecs = 63;
1486 if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
1487 if ((*pcyls * *pheads) <= 131072) {
1488 bdrv_set_translation_hint(bs,
1489 BIOS_ATA_TRANSLATION_LARGE);
1490 } else {
1491 bdrv_set_translation_hint(bs,
1492 BIOS_ATA_TRANSLATION_LBA);
1496 bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
1500 void bdrv_set_geometry_hint(BlockDriverState *bs,
1501 int cyls, int heads, int secs)
1503 bs->cyls = cyls;
1504 bs->heads = heads;
1505 bs->secs = secs;
1508 void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
1510 bs->translation = translation;
1513 void bdrv_get_geometry_hint(BlockDriverState *bs,
1514 int *pcyls, int *pheads, int *psecs)
1516 *pcyls = bs->cyls;
1517 *pheads = bs->heads;
1518 *psecs = bs->secs;
1521 /* Recognize floppy formats */
1522 typedef struct FDFormat {
1523 FDriveType drive;
1524 uint8_t last_sect;
1525 uint8_t max_track;
1526 uint8_t max_head;
1527 } FDFormat;
1529 static const FDFormat fd_formats[] = {
1530 /* First entry is default format */
1531 /* 1.44 MB 3"1/2 floppy disks */
1532 { FDRIVE_DRV_144, 18, 80, 1, },
1533 { FDRIVE_DRV_144, 20, 80, 1, },
1534 { FDRIVE_DRV_144, 21, 80, 1, },
1535 { FDRIVE_DRV_144, 21, 82, 1, },
1536 { FDRIVE_DRV_144, 21, 83, 1, },
1537 { FDRIVE_DRV_144, 22, 80, 1, },
1538 { FDRIVE_DRV_144, 23, 80, 1, },
1539 { FDRIVE_DRV_144, 24, 80, 1, },
1540 /* 2.88 MB 3"1/2 floppy disks */
1541 { FDRIVE_DRV_288, 36, 80, 1, },
1542 { FDRIVE_DRV_288, 39, 80, 1, },
1543 { FDRIVE_DRV_288, 40, 80, 1, },
1544 { FDRIVE_DRV_288, 44, 80, 1, },
1545 { FDRIVE_DRV_288, 48, 80, 1, },
1546 /* 720 kB 3"1/2 floppy disks */
1547 { FDRIVE_DRV_144, 9, 80, 1, },
1548 { FDRIVE_DRV_144, 10, 80, 1, },
1549 { FDRIVE_DRV_144, 10, 82, 1, },
1550 { FDRIVE_DRV_144, 10, 83, 1, },
1551 { FDRIVE_DRV_144, 13, 80, 1, },
1552 { FDRIVE_DRV_144, 14, 80, 1, },
1553 /* 1.2 MB 5"1/4 floppy disks */
1554 { FDRIVE_DRV_120, 15, 80, 1, },
1555 { FDRIVE_DRV_120, 18, 80, 1, },
1556 { FDRIVE_DRV_120, 18, 82, 1, },
1557 { FDRIVE_DRV_120, 18, 83, 1, },
1558 { FDRIVE_DRV_120, 20, 80, 1, },
1559 /* 720 kB 5"1/4 floppy disks */
1560 { FDRIVE_DRV_120, 9, 80, 1, },
1561 { FDRIVE_DRV_120, 11, 80, 1, },
1562 /* 360 kB 5"1/4 floppy disks */
1563 { FDRIVE_DRV_120, 9, 40, 1, },
1564 { FDRIVE_DRV_120, 9, 40, 0, },
1565 { FDRIVE_DRV_120, 10, 41, 1, },
1566 { FDRIVE_DRV_120, 10, 42, 1, },
1567 /* 320 kB 5"1/4 floppy disks */
1568 { FDRIVE_DRV_120, 8, 40, 1, },
1569 { FDRIVE_DRV_120, 8, 40, 0, },
1570 /* 360 kB must match 5"1/4 better than 3"1/2... */
1571 { FDRIVE_DRV_144, 9, 80, 0, },
1572 /* end */
1573 { FDRIVE_DRV_NONE, -1, -1, 0, },
1576 void bdrv_get_floppy_geometry_hint(BlockDriverState *bs, int *nb_heads,
1577 int *max_track, int *last_sect,
1578 FDriveType drive_in, FDriveType *drive)
1580 const FDFormat *parse;
1581 uint64_t nb_sectors, size;
1582 int i, first_match, match;
1584 bdrv_get_geometry_hint(bs, nb_heads, max_track, last_sect);
1585 if (*nb_heads != 0 && *max_track != 0 && *last_sect != 0) {
1586 /* User defined disk */
1587 } else {
1588 bdrv_get_geometry(bs, &nb_sectors);
1589 match = -1;
1590 first_match = -1;
1591 for (i = 0; ; i++) {
1592 parse = &fd_formats[i];
1593 if (parse->drive == FDRIVE_DRV_NONE) {
1594 break;
1596 if (drive_in == parse->drive ||
1597 drive_in == FDRIVE_DRV_NONE) {
1598 size = (parse->max_head + 1) * parse->max_track *
1599 parse->last_sect;
1600 if (nb_sectors == size) {
1601 match = i;
1602 break;
1604 if (first_match == -1) {
1605 first_match = i;
1609 if (match == -1) {
1610 if (first_match == -1) {
1611 match = 1;
1612 } else {
1613 match = first_match;
1615 parse = &fd_formats[match];
1617 *nb_heads = parse->max_head + 1;
1618 *max_track = parse->max_track;
1619 *last_sect = parse->last_sect;
1620 *drive = parse->drive;
1624 int bdrv_get_translation_hint(BlockDriverState *bs)
1626 return bs->translation;
1629 void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error,
1630 BlockErrorAction on_write_error)
1632 bs->on_read_error = on_read_error;
1633 bs->on_write_error = on_write_error;
1636 BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read)
1638 return is_read ? bs->on_read_error : bs->on_write_error;
1641 int bdrv_is_read_only(BlockDriverState *bs)
1643 return bs->read_only;
1646 int bdrv_is_sg(BlockDriverState *bs)
1648 return bs->sg;
1651 int bdrv_enable_write_cache(BlockDriverState *bs)
1653 return bs->enable_write_cache;
1656 int bdrv_is_encrypted(BlockDriverState *bs)
1658 if (bs->backing_hd && bs->backing_hd->encrypted)
1659 return 1;
1660 return bs->encrypted;
1663 int bdrv_key_required(BlockDriverState *bs)
1665 BlockDriverState *backing_hd = bs->backing_hd;
1667 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
1668 return 1;
1669 return (bs->encrypted && !bs->valid_key);
1672 int bdrv_set_key(BlockDriverState *bs, const char *key)
1674 int ret;
1675 if (bs->backing_hd && bs->backing_hd->encrypted) {
1676 ret = bdrv_set_key(bs->backing_hd, key);
1677 if (ret < 0)
1678 return ret;
1679 if (!bs->encrypted)
1680 return 0;
1682 if (!bs->encrypted) {
1683 return -EINVAL;
1684 } else if (!bs->drv || !bs->drv->bdrv_set_key) {
1685 return -ENOMEDIUM;
1687 ret = bs->drv->bdrv_set_key(bs, key);
1688 if (ret < 0) {
1689 bs->valid_key = 0;
1690 } else if (!bs->valid_key) {
1691 bs->valid_key = 1;
1692 /* call the change callback now, we skipped it on open */
1693 bdrv_dev_change_media_cb(bs, true);
1695 return ret;
1698 void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
1700 if (!bs->drv) {
1701 buf[0] = '\0';
1702 } else {
1703 pstrcpy(buf, buf_size, bs->drv->format_name);
1707 void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
1708 void *opaque)
1710 BlockDriver *drv;
1712 QLIST_FOREACH(drv, &bdrv_drivers, list) {
1713 it(opaque, drv->format_name);
1717 BlockDriverState *bdrv_find(const char *name)
1719 BlockDriverState *bs;
1721 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1722 if (!strcmp(name, bs->device_name)) {
1723 return bs;
1726 return NULL;
1729 BlockDriverState *bdrv_next(BlockDriverState *bs)
1731 if (!bs) {
1732 return QTAILQ_FIRST(&bdrv_states);
1734 return QTAILQ_NEXT(bs, list);
1737 void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
1739 BlockDriverState *bs;
1741 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1742 it(opaque, bs);
1746 const char *bdrv_get_device_name(BlockDriverState *bs)
1748 return bs->device_name;
1751 void bdrv_flush_all(void)
1753 BlockDriverState *bs;
1755 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1756 if (!bdrv_is_read_only(bs) && bdrv_is_inserted(bs)) {
1757 bdrv_flush(bs);
1762 int bdrv_has_zero_init(BlockDriverState *bs)
1764 assert(bs->drv);
1766 if (bs->drv->bdrv_has_zero_init) {
1767 return bs->drv->bdrv_has_zero_init(bs);
1770 return 1;
1774 * Returns true iff the specified sector is present in the disk image. Drivers
1775 * not implementing the functionality are assumed to not support backing files,
1776 * hence all their sectors are reported as allocated.
1778 * 'pnum' is set to the number of sectors (including and immediately following
1779 * the specified sector) that are known to be in the same
1780 * allocated/unallocated state.
1782 * 'nb_sectors' is the max value 'pnum' should be set to.
1784 int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
1785 int *pnum)
1787 int64_t n;
1788 if (!bs->drv->bdrv_is_allocated) {
1789 if (sector_num >= bs->total_sectors) {
1790 *pnum = 0;
1791 return 0;
1793 n = bs->total_sectors - sector_num;
1794 *pnum = (n < nb_sectors) ? (n) : (nb_sectors);
1795 return 1;
1797 return bs->drv->bdrv_is_allocated(bs, sector_num, nb_sectors, pnum);
1800 void bdrv_mon_event(const BlockDriverState *bdrv,
1801 BlockMonEventAction action, int is_read)
1803 QObject *data;
1804 const char *action_str;
1806 switch (action) {
1807 case BDRV_ACTION_REPORT:
1808 action_str = "report";
1809 break;
1810 case BDRV_ACTION_IGNORE:
1811 action_str = "ignore";
1812 break;
1813 case BDRV_ACTION_STOP:
1814 action_str = "stop";
1815 break;
1816 default:
1817 abort();
1820 data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }",
1821 bdrv->device_name,
1822 action_str,
1823 is_read ? "read" : "write");
1824 monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data);
1826 qobject_decref(data);
1829 static void bdrv_print_dict(QObject *obj, void *opaque)
1831 QDict *bs_dict;
1832 Monitor *mon = opaque;
1834 bs_dict = qobject_to_qdict(obj);
1836 monitor_printf(mon, "%s: removable=%d",
1837 qdict_get_str(bs_dict, "device"),
1838 qdict_get_bool(bs_dict, "removable"));
1840 if (qdict_get_bool(bs_dict, "removable")) {
1841 monitor_printf(mon, " locked=%d", qdict_get_bool(bs_dict, "locked"));
1842 monitor_printf(mon, " tray-open=%d",
1843 qdict_get_bool(bs_dict, "tray-open"));
1846 if (qdict_haskey(bs_dict, "io-status")) {
1847 monitor_printf(mon, " io-status=%s", qdict_get_str(bs_dict, "io-status"));
1850 if (qdict_haskey(bs_dict, "inserted")) {
1851 QDict *qdict = qobject_to_qdict(qdict_get(bs_dict, "inserted"));
1853 monitor_printf(mon, " file=");
1854 monitor_print_filename(mon, qdict_get_str(qdict, "file"));
1855 if (qdict_haskey(qdict, "backing_file")) {
1856 monitor_printf(mon, " backing_file=");
1857 monitor_print_filename(mon, qdict_get_str(qdict, "backing_file"));
1859 monitor_printf(mon, " ro=%d drv=%s encrypted=%d",
1860 qdict_get_bool(qdict, "ro"),
1861 qdict_get_str(qdict, "drv"),
1862 qdict_get_bool(qdict, "encrypted"));
1863 } else {
1864 monitor_printf(mon, " [not inserted]");
1867 monitor_printf(mon, "\n");
1870 void bdrv_info_print(Monitor *mon, const QObject *data)
1872 qlist_iter(qobject_to_qlist(data), bdrv_print_dict, mon);
1875 static const char *const io_status_name[BDRV_IOS_MAX] = {
1876 [BDRV_IOS_OK] = "ok",
1877 [BDRV_IOS_FAILED] = "failed",
1878 [BDRV_IOS_ENOSPC] = "nospace",
1881 void bdrv_info(Monitor *mon, QObject **ret_data)
1883 QList *bs_list;
1884 BlockDriverState *bs;
1886 bs_list = qlist_new();
1888 QTAILQ_FOREACH(bs, &bdrv_states, list) {
1889 QObject *bs_obj;
1890 QDict *bs_dict;
1892 bs_obj = qobject_from_jsonf("{ 'device': %s, 'type': 'unknown', "
1893 "'removable': %i, 'locked': %i }",
1894 bs->device_name,
1895 bdrv_dev_has_removable_media(bs),
1896 bdrv_dev_is_medium_locked(bs));
1897 bs_dict = qobject_to_qdict(bs_obj);
1899 if (bdrv_dev_has_removable_media(bs)) {
1900 qdict_put(bs_dict, "tray-open",
1901 qbool_from_int(bdrv_dev_is_tray_open(bs)));
1904 if (bdrv_iostatus_is_enabled(bs)) {
1905 qdict_put(bs_dict, "io-status",
1906 qstring_from_str(io_status_name[bs->iostatus]));
1909 if (bs->drv) {
1910 QObject *obj;
1912 obj = qobject_from_jsonf("{ 'file': %s, 'ro': %i, 'drv': %s, "
1913 "'encrypted': %i }",
1914 bs->filename, bs->read_only,
1915 bs->drv->format_name,
1916 bdrv_is_encrypted(bs));
1917 if (bs->backing_file[0] != '\0') {
1918 QDict *qdict = qobject_to_qdict(obj);
1919 qdict_put(qdict, "backing_file",
1920 qstring_from_str(bs->backing_file));
1923 qdict_put_obj(bs_dict, "inserted", obj);
1925 qlist_append_obj(bs_list, bs_obj);
1928 *ret_data = QOBJECT(bs_list);
1931 static void bdrv_stats_iter(QObject *data, void *opaque)
1933 QDict *qdict;
1934 Monitor *mon = opaque;
1936 qdict = qobject_to_qdict(data);
1937 monitor_printf(mon, "%s:", qdict_get_str(qdict, "device"));
1939 qdict = qobject_to_qdict(qdict_get(qdict, "stats"));
1940 monitor_printf(mon, " rd_bytes=%" PRId64
1941 " wr_bytes=%" PRId64
1942 " rd_operations=%" PRId64
1943 " wr_operations=%" PRId64
1944 " flush_operations=%" PRId64
1945 " wr_total_time_ns=%" PRId64
1946 " rd_total_time_ns=%" PRId64
1947 " flush_total_time_ns=%" PRId64
1948 "\n",
1949 qdict_get_int(qdict, "rd_bytes"),
1950 qdict_get_int(qdict, "wr_bytes"),
1951 qdict_get_int(qdict, "rd_operations"),
1952 qdict_get_int(qdict, "wr_operations"),
1953 qdict_get_int(qdict, "flush_operations"),
1954 qdict_get_int(qdict, "wr_total_time_ns"),
1955 qdict_get_int(qdict, "rd_total_time_ns"),
1956 qdict_get_int(qdict, "flush_total_time_ns"));
1959 void bdrv_stats_print(Monitor *mon, const QObject *data)
1961 qlist_iter(qobject_to_qlist(data), bdrv_stats_iter, mon);
1964 static QObject* bdrv_info_stats_bs(BlockDriverState *bs)
1966 QObject *res;
1967 QDict *dict;
1969 res = qobject_from_jsonf("{ 'stats': {"
1970 "'rd_bytes': %" PRId64 ","
1971 "'wr_bytes': %" PRId64 ","
1972 "'rd_operations': %" PRId64 ","
1973 "'wr_operations': %" PRId64 ","
1974 "'wr_highest_offset': %" PRId64 ","
1975 "'flush_operations': %" PRId64 ","
1976 "'wr_total_time_ns': %" PRId64 ","
1977 "'rd_total_time_ns': %" PRId64 ","
1978 "'flush_total_time_ns': %" PRId64
1979 "} }",
1980 bs->nr_bytes[BDRV_ACCT_READ],
1981 bs->nr_bytes[BDRV_ACCT_WRITE],
1982 bs->nr_ops[BDRV_ACCT_READ],
1983 bs->nr_ops[BDRV_ACCT_WRITE],
1984 bs->wr_highest_sector *
1985 (uint64_t)BDRV_SECTOR_SIZE,
1986 bs->nr_ops[BDRV_ACCT_FLUSH],
1987 bs->total_time_ns[BDRV_ACCT_WRITE],
1988 bs->total_time_ns[BDRV_ACCT_READ],
1989 bs->total_time_ns[BDRV_ACCT_FLUSH]);
1990 dict = qobject_to_qdict(res);
1992 if (*bs->device_name) {
1993 qdict_put(dict, "device", qstring_from_str(bs->device_name));
1996 if (bs->file) {
1997 QObject *parent = bdrv_info_stats_bs(bs->file);
1998 qdict_put_obj(dict, "parent", parent);
2001 return res;
2004 void bdrv_info_stats(Monitor *mon, QObject **ret_data)
2006 QObject *obj;
2007 QList *devices;
2008 BlockDriverState *bs;
2010 devices = qlist_new();
2012 QTAILQ_FOREACH(bs, &bdrv_states, list) {
2013 obj = bdrv_info_stats_bs(bs);
2014 qlist_append_obj(devices, obj);
2017 *ret_data = QOBJECT(devices);
2020 const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
2022 if (bs->backing_hd && bs->backing_hd->encrypted)
2023 return bs->backing_file;
2024 else if (bs->encrypted)
2025 return bs->filename;
2026 else
2027 return NULL;
2030 void bdrv_get_backing_filename(BlockDriverState *bs,
2031 char *filename, int filename_size)
2033 pstrcpy(filename, filename_size, bs->backing_file);
2036 int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
2037 const uint8_t *buf, int nb_sectors)
2039 BlockDriver *drv = bs->drv;
2040 if (!drv)
2041 return -ENOMEDIUM;
2042 if (!drv->bdrv_write_compressed)
2043 return -ENOTSUP;
2044 if (bdrv_check_request(bs, sector_num, nb_sectors))
2045 return -EIO;
2047 if (bs->dirty_bitmap) {
2048 set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
2051 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
2054 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
2056 BlockDriver *drv = bs->drv;
2057 if (!drv)
2058 return -ENOMEDIUM;
2059 if (!drv->bdrv_get_info)
2060 return -ENOTSUP;
2061 memset(bdi, 0, sizeof(*bdi));
2062 return drv->bdrv_get_info(bs, bdi);
2065 int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
2066 int64_t pos, int size)
2068 BlockDriver *drv = bs->drv;
2069 if (!drv)
2070 return -ENOMEDIUM;
2071 if (drv->bdrv_save_vmstate)
2072 return drv->bdrv_save_vmstate(bs, buf, pos, size);
2073 if (bs->file)
2074 return bdrv_save_vmstate(bs->file, buf, pos, size);
2075 return -ENOTSUP;
2078 int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
2079 int64_t pos, int size)
2081 BlockDriver *drv = bs->drv;
2082 if (!drv)
2083 return -ENOMEDIUM;
2084 if (drv->bdrv_load_vmstate)
2085 return drv->bdrv_load_vmstate(bs, buf, pos, size);
2086 if (bs->file)
2087 return bdrv_load_vmstate(bs->file, buf, pos, size);
2088 return -ENOTSUP;
2091 void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
2093 BlockDriver *drv = bs->drv;
2095 if (!drv || !drv->bdrv_debug_event) {
2096 return;
2099 return drv->bdrv_debug_event(bs, event);
2103 /**************************************************************/
2104 /* handling of snapshots */
2106 int bdrv_can_snapshot(BlockDriverState *bs)
2108 BlockDriver *drv = bs->drv;
2109 if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
2110 return 0;
2113 if (!drv->bdrv_snapshot_create) {
2114 if (bs->file != NULL) {
2115 return bdrv_can_snapshot(bs->file);
2117 return 0;
2120 return 1;
2123 int bdrv_is_snapshot(BlockDriverState *bs)
2125 return !!(bs->open_flags & BDRV_O_SNAPSHOT);
2128 BlockDriverState *bdrv_snapshots(void)
2130 BlockDriverState *bs;
2132 if (bs_snapshots) {
2133 return bs_snapshots;
2136 bs = NULL;
2137 while ((bs = bdrv_next(bs))) {
2138 if (bdrv_can_snapshot(bs)) {
2139 bs_snapshots = bs;
2140 return bs;
2143 return NULL;
2146 int bdrv_snapshot_create(BlockDriverState *bs,
2147 QEMUSnapshotInfo *sn_info)
2149 BlockDriver *drv = bs->drv;
2150 if (!drv)
2151 return -ENOMEDIUM;
2152 if (drv->bdrv_snapshot_create)
2153 return drv->bdrv_snapshot_create(bs, sn_info);
2154 if (bs->file)
2155 return bdrv_snapshot_create(bs->file, sn_info);
2156 return -ENOTSUP;
2159 int bdrv_snapshot_goto(BlockDriverState *bs,
2160 const char *snapshot_id)
2162 BlockDriver *drv = bs->drv;
2163 int ret, open_ret;
2165 if (!drv)
2166 return -ENOMEDIUM;
2167 if (drv->bdrv_snapshot_goto)
2168 return drv->bdrv_snapshot_goto(bs, snapshot_id);
2170 if (bs->file) {
2171 drv->bdrv_close(bs);
2172 ret = bdrv_snapshot_goto(bs->file, snapshot_id);
2173 open_ret = drv->bdrv_open(bs, bs->open_flags);
2174 if (open_ret < 0) {
2175 bdrv_delete(bs->file);
2176 bs->drv = NULL;
2177 return open_ret;
2179 return ret;
2182 return -ENOTSUP;
2185 int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
2187 BlockDriver *drv = bs->drv;
2188 if (!drv)
2189 return -ENOMEDIUM;
2190 if (drv->bdrv_snapshot_delete)
2191 return drv->bdrv_snapshot_delete(bs, snapshot_id);
2192 if (bs->file)
2193 return bdrv_snapshot_delete(bs->file, snapshot_id);
2194 return -ENOTSUP;
2197 int bdrv_snapshot_list(BlockDriverState *bs,
2198 QEMUSnapshotInfo **psn_info)
2200 BlockDriver *drv = bs->drv;
2201 if (!drv)
2202 return -ENOMEDIUM;
2203 if (drv->bdrv_snapshot_list)
2204 return drv->bdrv_snapshot_list(bs, psn_info);
2205 if (bs->file)
2206 return bdrv_snapshot_list(bs->file, psn_info);
2207 return -ENOTSUP;
2210 int bdrv_snapshot_load_tmp(BlockDriverState *bs,
2211 const char *snapshot_name)
2213 BlockDriver *drv = bs->drv;
2214 if (!drv) {
2215 return -ENOMEDIUM;
2217 if (!bs->read_only) {
2218 return -EINVAL;
2220 if (drv->bdrv_snapshot_load_tmp) {
2221 return drv->bdrv_snapshot_load_tmp(bs, snapshot_name);
2223 return -ENOTSUP;
2226 #define NB_SUFFIXES 4
2228 char *get_human_readable_size(char *buf, int buf_size, int64_t size)
2230 static const char suffixes[NB_SUFFIXES] = "KMGT";
2231 int64_t base;
2232 int i;
2234 if (size <= 999) {
2235 snprintf(buf, buf_size, "%" PRId64, size);
2236 } else {
2237 base = 1024;
2238 for(i = 0; i < NB_SUFFIXES; i++) {
2239 if (size < (10 * base)) {
2240 snprintf(buf, buf_size, "%0.1f%c",
2241 (double)size / base,
2242 suffixes[i]);
2243 break;
2244 } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
2245 snprintf(buf, buf_size, "%" PRId64 "%c",
2246 ((size + (base >> 1)) / base),
2247 suffixes[i]);
2248 break;
2250 base = base * 1024;
2253 return buf;
2256 char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
2258 char buf1[128], date_buf[128], clock_buf[128];
2259 #ifdef _WIN32
2260 struct tm *ptm;
2261 #else
2262 struct tm tm;
2263 #endif
2264 time_t ti;
2265 int64_t secs;
2267 if (!sn) {
2268 snprintf(buf, buf_size,
2269 "%-10s%-20s%7s%20s%15s",
2270 "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
2271 } else {
2272 ti = sn->date_sec;
2273 #ifdef _WIN32
2274 ptm = localtime(&ti);
2275 strftime(date_buf, sizeof(date_buf),
2276 "%Y-%m-%d %H:%M:%S", ptm);
2277 #else
2278 localtime_r(&ti, &tm);
2279 strftime(date_buf, sizeof(date_buf),
2280 "%Y-%m-%d %H:%M:%S", &tm);
2281 #endif
2282 secs = sn->vm_clock_nsec / 1000000000;
2283 snprintf(clock_buf, sizeof(clock_buf),
2284 "%02d:%02d:%02d.%03d",
2285 (int)(secs / 3600),
2286 (int)((secs / 60) % 60),
2287 (int)(secs % 60),
2288 (int)((sn->vm_clock_nsec / 1000000) % 1000));
2289 snprintf(buf, buf_size,
2290 "%-10s%-20s%7s%20s%15s",
2291 sn->id_str, sn->name,
2292 get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
2293 date_buf,
2294 clock_buf);
2296 return buf;
2299 /**************************************************************/
2300 /* async I/Os */
2302 BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
2303 QEMUIOVector *qiov, int nb_sectors,
2304 BlockDriverCompletionFunc *cb, void *opaque)
2306 trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
2308 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
2309 cb, opaque, false);
2312 BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
2313 QEMUIOVector *qiov, int nb_sectors,
2314 BlockDriverCompletionFunc *cb, void *opaque)
2316 trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
2318 return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors,
2319 cb, opaque, true);
2323 typedef struct MultiwriteCB {
2324 int error;
2325 int num_requests;
2326 int num_callbacks;
2327 struct {
2328 BlockDriverCompletionFunc *cb;
2329 void *opaque;
2330 QEMUIOVector *free_qiov;
2331 void *free_buf;
2332 } callbacks[];
2333 } MultiwriteCB;
2335 static void multiwrite_user_cb(MultiwriteCB *mcb)
2337 int i;
2339 for (i = 0; i < mcb->num_callbacks; i++) {
2340 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
2341 if (mcb->callbacks[i].free_qiov) {
2342 qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
2344 g_free(mcb->callbacks[i].free_qiov);
2345 qemu_vfree(mcb->callbacks[i].free_buf);
2349 static void multiwrite_cb(void *opaque, int ret)
2351 MultiwriteCB *mcb = opaque;
2353 trace_multiwrite_cb(mcb, ret);
2355 if (ret < 0 && !mcb->error) {
2356 mcb->error = ret;
2359 mcb->num_requests--;
2360 if (mcb->num_requests == 0) {
2361 multiwrite_user_cb(mcb);
2362 g_free(mcb);
2366 static int multiwrite_req_compare(const void *a, const void *b)
2368 const BlockRequest *req1 = a, *req2 = b;
2371 * Note that we can't simply subtract req2->sector from req1->sector
2372 * here as that could overflow the return value.
2374 if (req1->sector > req2->sector) {
2375 return 1;
2376 } else if (req1->sector < req2->sector) {
2377 return -1;
2378 } else {
2379 return 0;
2384 * Takes a bunch of requests and tries to merge them. Returns the number of
2385 * requests that remain after merging.
2387 static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
2388 int num_reqs, MultiwriteCB *mcb)
2390 int i, outidx;
2392 // Sort requests by start sector
2393 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
2395 // Check if adjacent requests touch the same clusters. If so, combine them,
2396 // filling up gaps with zero sectors.
2397 outidx = 0;
2398 for (i = 1; i < num_reqs; i++) {
2399 int merge = 0;
2400 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
2402 // This handles the cases that are valid for all block drivers, namely
2403 // exactly sequential writes and overlapping writes.
2404 if (reqs[i].sector <= oldreq_last) {
2405 merge = 1;
2408 // The block driver may decide that it makes sense to combine requests
2409 // even if there is a gap of some sectors between them. In this case,
2410 // the gap is filled with zeros (therefore only applicable for yet
2411 // unused space in format like qcow2).
2412 if (!merge && bs->drv->bdrv_merge_requests) {
2413 merge = bs->drv->bdrv_merge_requests(bs, &reqs[outidx], &reqs[i]);
2416 if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
2417 merge = 0;
2420 if (merge) {
2421 size_t size;
2422 QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
2423 qemu_iovec_init(qiov,
2424 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
2426 // Add the first request to the merged one. If the requests are
2427 // overlapping, drop the last sectors of the first request.
2428 size = (reqs[i].sector - reqs[outidx].sector) << 9;
2429 qemu_iovec_concat(qiov, reqs[outidx].qiov, size);
2431 // We might need to add some zeros between the two requests
2432 if (reqs[i].sector > oldreq_last) {
2433 size_t zero_bytes = (reqs[i].sector - oldreq_last) << 9;
2434 uint8_t *buf = qemu_blockalign(bs, zero_bytes);
2435 memset(buf, 0, zero_bytes);
2436 qemu_iovec_add(qiov, buf, zero_bytes);
2437 mcb->callbacks[i].free_buf = buf;
2440 // Add the second request
2441 qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size);
2443 reqs[outidx].nb_sectors = qiov->size >> 9;
2444 reqs[outidx].qiov = qiov;
2446 mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
2447 } else {
2448 outidx++;
2449 reqs[outidx].sector = reqs[i].sector;
2450 reqs[outidx].nb_sectors = reqs[i].nb_sectors;
2451 reqs[outidx].qiov = reqs[i].qiov;
2455 return outidx + 1;
2459 * Submit multiple AIO write requests at once.
2461 * On success, the function returns 0 and all requests in the reqs array have
2462 * been submitted. In error case this function returns -1, and any of the
2463 * requests may or may not be submitted yet. In particular, this means that the
2464 * callback will be called for some of the requests, for others it won't. The
2465 * caller must check the error field of the BlockRequest to wait for the right
2466 * callbacks (if error != 0, no callback will be called).
2468 * The implementation may modify the contents of the reqs array, e.g. to merge
2469 * requests. However, the fields opaque and error are left unmodified as they
2470 * are used to signal failure for a single request to the caller.
2472 int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
2474 BlockDriverAIOCB *acb;
2475 MultiwriteCB *mcb;
2476 int i;
2478 /* don't submit writes if we don't have a medium */
2479 if (bs->drv == NULL) {
2480 for (i = 0; i < num_reqs; i++) {
2481 reqs[i].error = -ENOMEDIUM;
2483 return -1;
2486 if (num_reqs == 0) {
2487 return 0;
2490 // Create MultiwriteCB structure
2491 mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
2492 mcb->num_requests = 0;
2493 mcb->num_callbacks = num_reqs;
2495 for (i = 0; i < num_reqs; i++) {
2496 mcb->callbacks[i].cb = reqs[i].cb;
2497 mcb->callbacks[i].opaque = reqs[i].opaque;
2500 // Check for mergable requests
2501 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
2503 trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
2506 * Run the aio requests. As soon as one request can't be submitted
2507 * successfully, fail all requests that are not yet submitted (we must
2508 * return failure for all requests anyway)
2510 * num_requests cannot be set to the right value immediately: If
2511 * bdrv_aio_writev fails for some request, num_requests would be too high
2512 * and therefore multiwrite_cb() would never recognize the multiwrite
2513 * request as completed. We also cannot use the loop variable i to set it
2514 * when the first request fails because the callback may already have been
2515 * called for previously submitted requests. Thus, num_requests must be
2516 * incremented for each request that is submitted.
2518 * The problem that callbacks may be called early also means that we need
2519 * to take care that num_requests doesn't become 0 before all requests are
2520 * submitted - multiwrite_cb() would consider the multiwrite request
2521 * completed. A dummy request that is "completed" by a manual call to
2522 * multiwrite_cb() takes care of this.
2524 mcb->num_requests = 1;
2526 // Run the aio requests
2527 for (i = 0; i < num_reqs; i++) {
2528 mcb->num_requests++;
2529 acb = bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
2530 reqs[i].nb_sectors, multiwrite_cb, mcb);
2532 if (acb == NULL) {
2533 // We can only fail the whole thing if no request has been
2534 // submitted yet. Otherwise we'll wait for the submitted AIOs to
2535 // complete and report the error in the callback.
2536 if (i == 0) {
2537 trace_bdrv_aio_multiwrite_earlyfail(mcb);
2538 goto fail;
2539 } else {
2540 trace_bdrv_aio_multiwrite_latefail(mcb, i);
2541 multiwrite_cb(mcb, -EIO);
2542 break;
2547 /* Complete the dummy request */
2548 multiwrite_cb(mcb, 0);
2550 return 0;
2552 fail:
2553 for (i = 0; i < mcb->num_callbacks; i++) {
2554 reqs[i].error = -EIO;
2556 g_free(mcb);
2557 return -1;
2560 void bdrv_aio_cancel(BlockDriverAIOCB *acb)
2562 acb->pool->cancel(acb);
2566 /**************************************************************/
2567 /* async block device emulation */
2569 typedef struct BlockDriverAIOCBSync {
2570 BlockDriverAIOCB common;
2571 QEMUBH *bh;
2572 int ret;
2573 /* vector translation state */
2574 QEMUIOVector *qiov;
2575 uint8_t *bounce;
2576 int is_write;
2577 } BlockDriverAIOCBSync;
2579 static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
2581 BlockDriverAIOCBSync *acb =
2582 container_of(blockacb, BlockDriverAIOCBSync, common);
2583 qemu_bh_delete(acb->bh);
2584 acb->bh = NULL;
2585 qemu_aio_release(acb);
2588 static AIOPool bdrv_em_aio_pool = {
2589 .aiocb_size = sizeof(BlockDriverAIOCBSync),
2590 .cancel = bdrv_aio_cancel_em,
2593 static void bdrv_aio_bh_cb(void *opaque)
2595 BlockDriverAIOCBSync *acb = opaque;
2597 if (!acb->is_write)
2598 qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
2599 qemu_vfree(acb->bounce);
2600 acb->common.cb(acb->common.opaque, acb->ret);
2601 qemu_bh_delete(acb->bh);
2602 acb->bh = NULL;
2603 qemu_aio_release(acb);
2606 static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
2607 int64_t sector_num,
2608 QEMUIOVector *qiov,
2609 int nb_sectors,
2610 BlockDriverCompletionFunc *cb,
2611 void *opaque,
2612 int is_write)
2615 BlockDriverAIOCBSync *acb;
2617 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
2618 acb->is_write = is_write;
2619 acb->qiov = qiov;
2620 acb->bounce = qemu_blockalign(bs, qiov->size);
2622 if (!acb->bh)
2623 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
2625 if (is_write) {
2626 qemu_iovec_to_buffer(acb->qiov, acb->bounce);
2627 acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
2628 } else {
2629 acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
2632 qemu_bh_schedule(acb->bh);
2634 return &acb->common;
2637 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
2638 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2639 BlockDriverCompletionFunc *cb, void *opaque)
2641 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
2644 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
2645 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
2646 BlockDriverCompletionFunc *cb, void *opaque)
2648 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
2652 typedef struct BlockDriverAIOCBCoroutine {
2653 BlockDriverAIOCB common;
2654 BlockRequest req;
2655 bool is_write;
2656 QEMUBH* bh;
2657 } BlockDriverAIOCBCoroutine;
2659 static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb)
2661 qemu_aio_flush();
2664 static AIOPool bdrv_em_co_aio_pool = {
2665 .aiocb_size = sizeof(BlockDriverAIOCBCoroutine),
2666 .cancel = bdrv_aio_co_cancel_em,
2669 static void bdrv_co_em_bh(void *opaque)
2671 BlockDriverAIOCBCoroutine *acb = opaque;
2673 acb->common.cb(acb->common.opaque, acb->req.error);
2674 qemu_bh_delete(acb->bh);
2675 qemu_aio_release(acb);
2678 /* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
2679 static void coroutine_fn bdrv_co_do_rw(void *opaque)
2681 BlockDriverAIOCBCoroutine *acb = opaque;
2682 BlockDriverState *bs = acb->common.bs;
2684 if (!acb->is_write) {
2685 acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
2686 acb->req.nb_sectors, acb->req.qiov);
2687 } else {
2688 acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
2689 acb->req.nb_sectors, acb->req.qiov);
2692 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
2693 qemu_bh_schedule(acb->bh);
2696 static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
2697 int64_t sector_num,
2698 QEMUIOVector *qiov,
2699 int nb_sectors,
2700 BlockDriverCompletionFunc *cb,
2701 void *opaque,
2702 bool is_write)
2704 Coroutine *co;
2705 BlockDriverAIOCBCoroutine *acb;
2707 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
2708 acb->req.sector = sector_num;
2709 acb->req.nb_sectors = nb_sectors;
2710 acb->req.qiov = qiov;
2711 acb->is_write = is_write;
2713 co = qemu_coroutine_create(bdrv_co_do_rw);
2714 qemu_coroutine_enter(co, acb);
2716 return &acb->common;
2719 static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
2721 BlockDriverAIOCBCoroutine *acb = opaque;
2722 BlockDriverState *bs = acb->common.bs;
2724 acb->req.error = bdrv_co_flush(bs);
2725 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
2726 qemu_bh_schedule(acb->bh);
2729 BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
2730 BlockDriverCompletionFunc *cb, void *opaque)
2732 trace_bdrv_aio_flush(bs, opaque);
2734 Coroutine *co;
2735 BlockDriverAIOCBCoroutine *acb;
2737 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
2738 co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
2739 qemu_coroutine_enter(co, acb);
2741 return &acb->common;
2744 static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
2746 BlockDriverAIOCBCoroutine *acb = opaque;
2747 BlockDriverState *bs = acb->common.bs;
2749 acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
2750 acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
2751 qemu_bh_schedule(acb->bh);
2754 BlockDriverAIOCB *bdrv_aio_discard(BlockDriverState *bs,
2755 int64_t sector_num, int nb_sectors,
2756 BlockDriverCompletionFunc *cb, void *opaque)
2758 Coroutine *co;
2759 BlockDriverAIOCBCoroutine *acb;
2761 trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
2763 acb = qemu_aio_get(&bdrv_em_co_aio_pool, bs, cb, opaque);
2764 acb->req.sector = sector_num;
2765 acb->req.nb_sectors = nb_sectors;
2766 co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
2767 qemu_coroutine_enter(co, acb);
2769 return &acb->common;
2772 void bdrv_init(void)
2774 module_call_init(MODULE_INIT_BLOCK);
2777 void bdrv_init_with_whitelist(void)
2779 use_bdrv_whitelist = 1;
2780 bdrv_init();
2783 void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
2784 BlockDriverCompletionFunc *cb, void *opaque)
2786 BlockDriverAIOCB *acb;
2788 if (pool->free_aiocb) {
2789 acb = pool->free_aiocb;
2790 pool->free_aiocb = acb->next;
2791 } else {
2792 acb = g_malloc0(pool->aiocb_size);
2793 acb->pool = pool;
2795 acb->bs = bs;
2796 acb->cb = cb;
2797 acb->opaque = opaque;
2798 return acb;
2801 void qemu_aio_release(void *p)
2803 BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
2804 AIOPool *pool = acb->pool;
2805 acb->next = pool->free_aiocb;
2806 pool->free_aiocb = acb;
2809 /**************************************************************/
2810 /* Coroutine block device emulation */
2812 typedef struct CoroutineIOCompletion {
2813 Coroutine *coroutine;
2814 int ret;
2815 } CoroutineIOCompletion;
2817 static void bdrv_co_io_em_complete(void *opaque, int ret)
2819 CoroutineIOCompletion *co = opaque;
2821 co->ret = ret;
2822 qemu_coroutine_enter(co->coroutine, NULL);
2825 static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
2826 int nb_sectors, QEMUIOVector *iov,
2827 bool is_write)
2829 CoroutineIOCompletion co = {
2830 .coroutine = qemu_coroutine_self(),
2832 BlockDriverAIOCB *acb;
2834 if (is_write) {
2835 acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
2836 bdrv_co_io_em_complete, &co);
2837 } else {
2838 acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
2839 bdrv_co_io_em_complete, &co);
2842 trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
2843 if (!acb) {
2844 return -EIO;
2846 qemu_coroutine_yield();
2848 return co.ret;
2851 static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
2852 int64_t sector_num, int nb_sectors,
2853 QEMUIOVector *iov)
2855 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
2858 static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
2859 int64_t sector_num, int nb_sectors,
2860 QEMUIOVector *iov)
2862 return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
2865 static void coroutine_fn bdrv_flush_co_entry(void *opaque)
2867 RwCo *rwco = opaque;
2869 rwco->ret = bdrv_co_flush(rwco->bs);
2872 int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
2874 if (bs->open_flags & BDRV_O_NO_FLUSH) {
2875 return 0;
2876 } else if (!bs->drv) {
2877 return 0;
2878 } else if (bs->drv->bdrv_co_flush) {
2879 return bs->drv->bdrv_co_flush(bs);
2880 } else if (bs->drv->bdrv_aio_flush) {
2881 BlockDriverAIOCB *acb;
2882 CoroutineIOCompletion co = {
2883 .coroutine = qemu_coroutine_self(),
2886 acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
2887 if (acb == NULL) {
2888 return -EIO;
2889 } else {
2890 qemu_coroutine_yield();
2891 return co.ret;
2893 } else {
2895 * Some block drivers always operate in either writethrough or unsafe
2896 * mode and don't support bdrv_flush therefore. Usually qemu doesn't
2897 * know how the server works (because the behaviour is hardcoded or
2898 * depends on server-side configuration), so we can't ensure that
2899 * everything is safe on disk. Returning an error doesn't work because
2900 * that would break guests even if the server operates in writethrough
2901 * mode.
2903 * Let's hope the user knows what he's doing.
2905 return 0;
2909 int bdrv_flush(BlockDriverState *bs)
2911 Coroutine *co;
2912 RwCo rwco = {
2913 .bs = bs,
2914 .ret = NOT_DONE,
2917 if (qemu_in_coroutine()) {
2918 /* Fast-path if already in coroutine context */
2919 bdrv_flush_co_entry(&rwco);
2920 } else {
2921 co = qemu_coroutine_create(bdrv_flush_co_entry);
2922 qemu_coroutine_enter(co, &rwco);
2923 while (rwco.ret == NOT_DONE) {
2924 qemu_aio_wait();
2928 return rwco.ret;
2931 static void coroutine_fn bdrv_discard_co_entry(void *opaque)
2933 RwCo *rwco = opaque;
2935 rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
2938 int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
2939 int nb_sectors)
2941 if (!bs->drv) {
2942 return -ENOMEDIUM;
2943 } else if (bdrv_check_request(bs, sector_num, nb_sectors)) {
2944 return -EIO;
2945 } else if (bs->read_only) {
2946 return -EROFS;
2947 } else if (bs->drv->bdrv_co_discard) {
2948 return bs->drv->bdrv_co_discard(bs, sector_num, nb_sectors);
2949 } else if (bs->drv->bdrv_aio_discard) {
2950 BlockDriverAIOCB *acb;
2951 CoroutineIOCompletion co = {
2952 .coroutine = qemu_coroutine_self(),
2955 acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
2956 bdrv_co_io_em_complete, &co);
2957 if (acb == NULL) {
2958 return -EIO;
2959 } else {
2960 qemu_coroutine_yield();
2961 return co.ret;
2963 } else {
2964 return 0;
2968 int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
2970 Coroutine *co;
2971 RwCo rwco = {
2972 .bs = bs,
2973 .sector_num = sector_num,
2974 .nb_sectors = nb_sectors,
2975 .ret = NOT_DONE,
2978 if (qemu_in_coroutine()) {
2979 /* Fast-path if already in coroutine context */
2980 bdrv_discard_co_entry(&rwco);
2981 } else {
2982 co = qemu_coroutine_create(bdrv_discard_co_entry);
2983 qemu_coroutine_enter(co, &rwco);
2984 while (rwco.ret == NOT_DONE) {
2985 qemu_aio_wait();
2989 return rwco.ret;
2992 /**************************************************************/
2993 /* removable device support */
2996 * Return TRUE if the media is present
2998 int bdrv_is_inserted(BlockDriverState *bs)
3000 BlockDriver *drv = bs->drv;
3002 if (!drv)
3003 return 0;
3004 if (!drv->bdrv_is_inserted)
3005 return 1;
3006 return drv->bdrv_is_inserted(bs);
3010 * Return whether the media changed since the last call to this
3011 * function, or -ENOTSUP if we don't know. Most drivers don't know.
3013 int bdrv_media_changed(BlockDriverState *bs)
3015 BlockDriver *drv = bs->drv;
3017 if (drv && drv->bdrv_media_changed) {
3018 return drv->bdrv_media_changed(bs);
3020 return -ENOTSUP;
3024 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
3026 void bdrv_eject(BlockDriverState *bs, int eject_flag)
3028 BlockDriver *drv = bs->drv;
3030 if (drv && drv->bdrv_eject) {
3031 drv->bdrv_eject(bs, eject_flag);
3036 * Lock or unlock the media (if it is locked, the user won't be able
3037 * to eject it manually).
3039 void bdrv_lock_medium(BlockDriverState *bs, bool locked)
3041 BlockDriver *drv = bs->drv;
3043 trace_bdrv_lock_medium(bs, locked);
3045 if (drv && drv->bdrv_lock_medium) {
3046 drv->bdrv_lock_medium(bs, locked);
3050 /* needed for generic scsi interface */
3052 int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
3054 BlockDriver *drv = bs->drv;
3056 if (drv && drv->bdrv_ioctl)
3057 return drv->bdrv_ioctl(bs, req, buf);
3058 return -ENOTSUP;
3061 BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
3062 unsigned long int req, void *buf,
3063 BlockDriverCompletionFunc *cb, void *opaque)
3065 BlockDriver *drv = bs->drv;
3067 if (drv && drv->bdrv_aio_ioctl)
3068 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
3069 return NULL;
3072 void bdrv_set_buffer_alignment(BlockDriverState *bs, int align)
3074 bs->buffer_alignment = align;
3077 void *qemu_blockalign(BlockDriverState *bs, size_t size)
3079 return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);
3082 void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable)
3084 int64_t bitmap_size;
3086 bs->dirty_count = 0;
3087 if (enable) {
3088 if (!bs->dirty_bitmap) {
3089 bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS) +
3090 BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
3091 bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
3093 bs->dirty_bitmap = g_malloc0(bitmap_size);
3095 } else {
3096 if (bs->dirty_bitmap) {
3097 g_free(bs->dirty_bitmap);
3098 bs->dirty_bitmap = NULL;
3103 int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
3105 int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
3107 if (bs->dirty_bitmap &&
3108 (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
3109 return !!(bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] &
3110 (1UL << (chunk % (sizeof(unsigned long) * 8))));
3111 } else {
3112 return 0;
3116 void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
3117 int nr_sectors)
3119 set_dirty_bitmap(bs, cur_sector, nr_sectors, 0);
3122 int64_t bdrv_get_dirty_count(BlockDriverState *bs)
3124 return bs->dirty_count;
3127 void bdrv_set_in_use(BlockDriverState *bs, int in_use)
3129 assert(bs->in_use != in_use);
3130 bs->in_use = in_use;
3133 int bdrv_in_use(BlockDriverState *bs)
3135 return bs->in_use;
3138 void bdrv_iostatus_enable(BlockDriverState *bs)
3140 bs->iostatus = BDRV_IOS_OK;
3143 /* The I/O status is only enabled if the drive explicitly
3144 * enables it _and_ the VM is configured to stop on errors */
3145 bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
3147 return (bs->iostatus != BDRV_IOS_INVAL &&
3148 (bs->on_write_error == BLOCK_ERR_STOP_ENOSPC ||
3149 bs->on_write_error == BLOCK_ERR_STOP_ANY ||
3150 bs->on_read_error == BLOCK_ERR_STOP_ANY));
3153 void bdrv_iostatus_disable(BlockDriverState *bs)
3155 bs->iostatus = BDRV_IOS_INVAL;
3158 void bdrv_iostatus_reset(BlockDriverState *bs)
3160 if (bdrv_iostatus_is_enabled(bs)) {
3161 bs->iostatus = BDRV_IOS_OK;
3165 /* XXX: Today this is set by device models because it makes the implementation
3166 quite simple. However, the block layer knows about the error, so it's
3167 possible to implement this without device models being involved */
3168 void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
3170 if (bdrv_iostatus_is_enabled(bs) && bs->iostatus == BDRV_IOS_OK) {
3171 assert(error >= 0);
3172 bs->iostatus = error == ENOSPC ? BDRV_IOS_ENOSPC : BDRV_IOS_FAILED;
3176 void
3177 bdrv_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie, int64_t bytes,
3178 enum BlockAcctType type)
3180 assert(type < BDRV_MAX_IOTYPE);
3182 cookie->bytes = bytes;
3183 cookie->start_time_ns = get_clock();
3184 cookie->type = type;
3187 void
3188 bdrv_acct_done(BlockDriverState *bs, BlockAcctCookie *cookie)
3190 assert(cookie->type < BDRV_MAX_IOTYPE);
3192 bs->nr_bytes[cookie->type] += cookie->bytes;
3193 bs->nr_ops[cookie->type]++;
3194 bs->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
3197 int bdrv_img_create(const char *filename, const char *fmt,
3198 const char *base_filename, const char *base_fmt,
3199 char *options, uint64_t img_size, int flags)
3201 QEMUOptionParameter *param = NULL, *create_options = NULL;
3202 QEMUOptionParameter *backing_fmt, *backing_file, *size;
3203 BlockDriverState *bs = NULL;
3204 BlockDriver *drv, *proto_drv;
3205 BlockDriver *backing_drv = NULL;
3206 int ret = 0;
3208 /* Find driver and parse its options */
3209 drv = bdrv_find_format(fmt);
3210 if (!drv) {
3211 error_report("Unknown file format '%s'", fmt);
3212 ret = -EINVAL;
3213 goto out;
3216 proto_drv = bdrv_find_protocol(filename);
3217 if (!proto_drv) {
3218 error_report("Unknown protocol '%s'", filename);
3219 ret = -EINVAL;
3220 goto out;
3223 create_options = append_option_parameters(create_options,
3224 drv->create_options);
3225 create_options = append_option_parameters(create_options,
3226 proto_drv->create_options);
3228 /* Create parameter list with default values */
3229 param = parse_option_parameters("", create_options, param);
3231 set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size);
3233 /* Parse -o options */
3234 if (options) {
3235 param = parse_option_parameters(options, create_options, param);
3236 if (param == NULL) {
3237 error_report("Invalid options for file format '%s'.", fmt);
3238 ret = -EINVAL;
3239 goto out;
3243 if (base_filename) {
3244 if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE,
3245 base_filename)) {
3246 error_report("Backing file not supported for file format '%s'",
3247 fmt);
3248 ret = -EINVAL;
3249 goto out;
3253 if (base_fmt) {
3254 if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) {
3255 error_report("Backing file format not supported for file "
3256 "format '%s'", fmt);
3257 ret = -EINVAL;
3258 goto out;
3262 backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE);
3263 if (backing_file && backing_file->value.s) {
3264 if (!strcmp(filename, backing_file->value.s)) {
3265 error_report("Error: Trying to create an image with the "
3266 "same filename as the backing file");
3267 ret = -EINVAL;
3268 goto out;
3272 backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT);
3273 if (backing_fmt && backing_fmt->value.s) {
3274 backing_drv = bdrv_find_format(backing_fmt->value.s);
3275 if (!backing_drv) {
3276 error_report("Unknown backing file format '%s'",
3277 backing_fmt->value.s);
3278 ret = -EINVAL;
3279 goto out;
3283 // The size for the image must always be specified, with one exception:
3284 // If we are using a backing file, we can obtain the size from there
3285 size = get_option_parameter(param, BLOCK_OPT_SIZE);
3286 if (size && size->value.n == -1) {
3287 if (backing_file && backing_file->value.s) {
3288 uint64_t size;
3289 char buf[32];
3291 bs = bdrv_new("");
3293 ret = bdrv_open(bs, backing_file->value.s, flags, backing_drv);
3294 if (ret < 0) {
3295 error_report("Could not open '%s'", backing_file->value.s);
3296 goto out;
3298 bdrv_get_geometry(bs, &size);
3299 size *= 512;
3301 snprintf(buf, sizeof(buf), "%" PRId64, size);
3302 set_option_parameter(param, BLOCK_OPT_SIZE, buf);
3303 } else {
3304 error_report("Image creation needs a size parameter");
3305 ret = -EINVAL;
3306 goto out;
3310 printf("Formatting '%s', fmt=%s ", filename, fmt);
3311 print_option_parameters(param);
3312 puts("");
3314 ret = bdrv_create(drv, filename, param);
3316 if (ret < 0) {
3317 if (ret == -ENOTSUP) {
3318 error_report("Formatting or formatting option not supported for "
3319 "file format '%s'", fmt);
3320 } else if (ret == -EFBIG) {
3321 error_report("The image size is too large for file format '%s'",
3322 fmt);
3323 } else {
3324 error_report("%s: error while creating %s: %s", filename, fmt,
3325 strerror(-ret));
3329 out:
3330 free_option_parameters(create_options);
3331 free_option_parameters(param);
3333 if (bs) {
3334 bdrv_delete(bs);
3337 return ret;