virtio-blk: Pass read errors to the guest
[qemu.git] / block.c
blob1e49bc05cca4c4355eec6d05e8999c814033490a
1 /*
2 * QEMU System Emulator block driver
4 * Copyright (c) 2003 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
24 #include "config-host.h"
25 #include "qemu-common.h"
26 #include "monitor.h"
27 #include "block_int.h"
28 #include "module.h"
30 #ifdef CONFIG_BSD
31 #include <sys/types.h>
32 #include <sys/stat.h>
33 #include <sys/ioctl.h>
34 #include <sys/queue.h>
35 #ifndef __DragonFly__
36 #include <sys/disk.h>
37 #endif
38 #endif
40 #ifdef _WIN32
41 #include <windows.h>
42 #endif
44 #define SECTOR_BITS 9
45 #define SECTOR_SIZE (1 << SECTOR_BITS)
47 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
48 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
49 BlockDriverCompletionFunc *cb, void *opaque);
50 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
51 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
52 BlockDriverCompletionFunc *cb, void *opaque);
53 static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
54 BlockDriverCompletionFunc *cb, void *opaque);
55 static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
56 uint8_t *buf, int nb_sectors);
57 static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
58 const uint8_t *buf, int nb_sectors);
60 BlockDriverState *bdrv_first;
62 static BlockDriver *first_drv;
64 /* If non-zero, use only whitelisted block drivers */
65 static int use_bdrv_whitelist;
67 int path_is_absolute(const char *path)
69 const char *p;
70 #ifdef _WIN32
71 /* specific case for names like: "\\.\d:" */
72 if (*path == '/' || *path == '\\')
73 return 1;
74 #endif
75 p = strchr(path, ':');
76 if (p)
77 p++;
78 else
79 p = path;
80 #ifdef _WIN32
81 return (*p == '/' || *p == '\\');
82 #else
83 return (*p == '/');
84 #endif
87 /* if filename is absolute, just copy it to dest. Otherwise, build a
88 path to it by considering it is relative to base_path. URL are
89 supported. */
90 void path_combine(char *dest, int dest_size,
91 const char *base_path,
92 const char *filename)
94 const char *p, *p1;
95 int len;
97 if (dest_size <= 0)
98 return;
99 if (path_is_absolute(filename)) {
100 pstrcpy(dest, dest_size, filename);
101 } else {
102 p = strchr(base_path, ':');
103 if (p)
104 p++;
105 else
106 p = base_path;
107 p1 = strrchr(base_path, '/');
108 #ifdef _WIN32
110 const char *p2;
111 p2 = strrchr(base_path, '\\');
112 if (!p1 || p2 > p1)
113 p1 = p2;
115 #endif
116 if (p1)
117 p1++;
118 else
119 p1 = base_path;
120 if (p1 > p)
121 p = p1;
122 len = p - base_path;
123 if (len > dest_size - 1)
124 len = dest_size - 1;
125 memcpy(dest, base_path, len);
126 dest[len] = '\0';
127 pstrcat(dest, dest_size, filename);
131 void bdrv_register(BlockDriver *bdrv)
133 if (!bdrv->bdrv_aio_readv) {
134 /* add AIO emulation layer */
135 bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
136 bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
137 } else if (!bdrv->bdrv_read) {
138 /* add synchronous IO emulation layer */
139 bdrv->bdrv_read = bdrv_read_em;
140 bdrv->bdrv_write = bdrv_write_em;
143 if (!bdrv->bdrv_aio_flush)
144 bdrv->bdrv_aio_flush = bdrv_aio_flush_em;
146 bdrv->next = first_drv;
147 first_drv = bdrv;
150 /* create a new block device (by default it is empty) */
151 BlockDriverState *bdrv_new(const char *device_name)
153 BlockDriverState **pbs, *bs;
155 bs = qemu_mallocz(sizeof(BlockDriverState));
156 pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
157 if (device_name[0] != '\0') {
158 /* insert at the end */
159 pbs = &bdrv_first;
160 while (*pbs != NULL)
161 pbs = &(*pbs)->next;
162 *pbs = bs;
164 return bs;
167 BlockDriver *bdrv_find_format(const char *format_name)
169 BlockDriver *drv1;
170 for(drv1 = first_drv; drv1 != NULL; drv1 = drv1->next) {
171 if (!strcmp(drv1->format_name, format_name))
172 return drv1;
174 return NULL;
177 static int bdrv_is_whitelisted(BlockDriver *drv)
179 static const char *whitelist[] = {
180 CONFIG_BDRV_WHITELIST
182 const char **p;
184 if (!whitelist[0])
185 return 1; /* no whitelist, anything goes */
187 for (p = whitelist; *p; p++) {
188 if (!strcmp(drv->format_name, *p)) {
189 return 1;
192 return 0;
195 BlockDriver *bdrv_find_whitelisted_format(const char *format_name)
197 BlockDriver *drv = bdrv_find_format(format_name);
198 return drv && bdrv_is_whitelisted(drv) ? drv : NULL;
201 int bdrv_create(BlockDriver *drv, const char* filename,
202 QEMUOptionParameter *options)
204 if (!drv->bdrv_create)
205 return -ENOTSUP;
207 return drv->bdrv_create(filename, options);
210 #ifdef _WIN32
211 void get_tmp_filename(char *filename, int size)
213 char temp_dir[MAX_PATH];
215 GetTempPath(MAX_PATH, temp_dir);
216 GetTempFileName(temp_dir, "qem", 0, filename);
218 #else
219 void get_tmp_filename(char *filename, int size)
221 int fd;
222 const char *tmpdir;
223 /* XXX: race condition possible */
224 tmpdir = getenv("TMPDIR");
225 if (!tmpdir)
226 tmpdir = "/tmp";
227 snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
228 fd = mkstemp(filename);
229 close(fd);
231 #endif
233 #ifdef _WIN32
234 static int is_windows_drive_prefix(const char *filename)
236 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
237 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
238 filename[1] == ':');
241 int is_windows_drive(const char *filename)
243 if (is_windows_drive_prefix(filename) &&
244 filename[2] == '\0')
245 return 1;
246 if (strstart(filename, "\\\\.\\", NULL) ||
247 strstart(filename, "//./", NULL))
248 return 1;
249 return 0;
251 #endif
253 static BlockDriver *find_protocol(const char *filename)
255 BlockDriver *drv1;
256 char protocol[128];
257 int len;
258 const char *p;
260 #ifdef _WIN32
261 if (is_windows_drive(filename) ||
262 is_windows_drive_prefix(filename))
263 return bdrv_find_format("raw");
264 #endif
265 p = strchr(filename, ':');
266 if (!p)
267 return bdrv_find_format("raw");
268 len = p - filename;
269 if (len > sizeof(protocol) - 1)
270 len = sizeof(protocol) - 1;
271 memcpy(protocol, filename, len);
272 protocol[len] = '\0';
273 for(drv1 = first_drv; drv1 != NULL; drv1 = drv1->next) {
274 if (drv1->protocol_name &&
275 !strcmp(drv1->protocol_name, protocol))
276 return drv1;
278 return NULL;
282 * Detect host devices. By convention, /dev/cdrom[N] is always
283 * recognized as a host CDROM.
285 static BlockDriver *find_hdev_driver(const char *filename)
287 int score_max = 0, score;
288 BlockDriver *drv = NULL, *d;
290 for (d = first_drv; d; d = d->next) {
291 if (d->bdrv_probe_device) {
292 score = d->bdrv_probe_device(filename);
293 if (score > score_max) {
294 score_max = score;
295 drv = d;
300 return drv;
303 static BlockDriver *find_image_format(const char *filename)
305 int ret, score, score_max;
306 BlockDriver *drv1, *drv;
307 uint8_t buf[2048];
308 BlockDriverState *bs;
310 drv = find_protocol(filename);
311 /* no need to test disk image formats for vvfat */
312 if (drv && strcmp(drv->format_name, "vvfat") == 0)
313 return drv;
315 ret = bdrv_file_open(&bs, filename, BDRV_O_RDONLY);
316 if (ret < 0)
317 return NULL;
318 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
319 bdrv_delete(bs);
320 if (ret < 0) {
321 return NULL;
324 score_max = 0;
325 for(drv1 = first_drv; drv1 != NULL; drv1 = drv1->next) {
326 if (drv1->bdrv_probe) {
327 score = drv1->bdrv_probe(buf, ret, filename);
328 if (score > score_max) {
329 score_max = score;
330 drv = drv1;
334 return drv;
337 int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
339 BlockDriverState *bs;
340 int ret;
342 bs = bdrv_new("");
343 ret = bdrv_open2(bs, filename, flags | BDRV_O_FILE, NULL);
344 if (ret < 0) {
345 bdrv_delete(bs);
346 return ret;
348 bs->growable = 1;
349 *pbs = bs;
350 return 0;
353 int bdrv_open(BlockDriverState *bs, const char *filename, int flags)
355 return bdrv_open2(bs, filename, flags, NULL);
358 int bdrv_open2(BlockDriverState *bs, const char *filename, int flags,
359 BlockDriver *drv)
361 int ret, open_flags, try_rw;
362 char tmp_filename[PATH_MAX];
363 char backing_filename[PATH_MAX];
365 bs->is_temporary = 0;
366 bs->encrypted = 0;
367 bs->valid_key = 0;
368 /* buffer_alignment defaulted to 512, drivers can change this value */
369 bs->buffer_alignment = 512;
371 if (flags & BDRV_O_SNAPSHOT) {
372 BlockDriverState *bs1;
373 int64_t total_size;
374 int is_protocol = 0;
375 BlockDriver *bdrv_qcow2;
376 QEMUOptionParameter *options;
378 /* if snapshot, we create a temporary backing file and open it
379 instead of opening 'filename' directly */
381 /* if there is a backing file, use it */
382 bs1 = bdrv_new("");
383 ret = bdrv_open2(bs1, filename, 0, drv);
384 if (ret < 0) {
385 bdrv_delete(bs1);
386 return ret;
388 total_size = bdrv_getlength(bs1) >> SECTOR_BITS;
390 if (bs1->drv && bs1->drv->protocol_name)
391 is_protocol = 1;
393 bdrv_delete(bs1);
395 get_tmp_filename(tmp_filename, sizeof(tmp_filename));
397 /* Real path is meaningless for protocols */
398 if (is_protocol)
399 snprintf(backing_filename, sizeof(backing_filename),
400 "%s", filename);
401 else
402 realpath(filename, backing_filename);
404 bdrv_qcow2 = bdrv_find_format("qcow2");
405 options = parse_option_parameters("", bdrv_qcow2->create_options, NULL);
407 set_option_parameter_int(options, BLOCK_OPT_SIZE, total_size * 512);
408 set_option_parameter(options, BLOCK_OPT_BACKING_FILE, backing_filename);
409 if (drv) {
410 set_option_parameter(options, BLOCK_OPT_BACKING_FMT,
411 drv->format_name);
414 ret = bdrv_create(bdrv_qcow2, tmp_filename, options);
415 if (ret < 0) {
416 return ret;
419 filename = tmp_filename;
420 drv = bdrv_qcow2;
421 bs->is_temporary = 1;
424 pstrcpy(bs->filename, sizeof(bs->filename), filename);
425 if (flags & BDRV_O_FILE) {
426 drv = find_protocol(filename);
427 } else if (!drv) {
428 drv = find_hdev_driver(filename);
429 if (!drv) {
430 drv = find_image_format(filename);
433 if (!drv) {
434 ret = -ENOENT;
435 goto unlink_and_fail;
437 bs->drv = drv;
438 bs->opaque = qemu_mallocz(drv->instance_size);
441 * Yes, BDRV_O_NOCACHE aka O_DIRECT means we have to present a
442 * write cache to the guest. We do need the fdatasync to flush
443 * out transactions for block allocations, and we maybe have a
444 * volatile write cache in our backing device to deal with.
446 if (flags & (BDRV_O_CACHE_WB|BDRV_O_NOCACHE))
447 bs->enable_write_cache = 1;
449 /* Note: for compatibility, we open disk image files as RDWR, and
450 RDONLY as fallback */
451 try_rw = !bs->read_only || bs->is_temporary;
452 if (!(flags & BDRV_O_FILE))
453 open_flags = (try_rw ? BDRV_O_RDWR : 0) |
454 (flags & (BDRV_O_CACHE_MASK|BDRV_O_NATIVE_AIO));
455 else
456 open_flags = flags & ~(BDRV_O_FILE | BDRV_O_SNAPSHOT);
457 if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv))
458 ret = -ENOTSUP;
459 else
460 ret = drv->bdrv_open(bs, filename, open_flags);
461 if ((ret == -EACCES || ret == -EPERM) && !(flags & BDRV_O_FILE)) {
462 ret = drv->bdrv_open(bs, filename, open_flags & ~BDRV_O_RDWR);
463 bs->read_only = 1;
465 if (ret < 0) {
466 qemu_free(bs->opaque);
467 bs->opaque = NULL;
468 bs->drv = NULL;
469 unlink_and_fail:
470 if (bs->is_temporary)
471 unlink(filename);
472 return ret;
474 if (drv->bdrv_getlength) {
475 bs->total_sectors = bdrv_getlength(bs) >> SECTOR_BITS;
477 #ifndef _WIN32
478 if (bs->is_temporary) {
479 unlink(filename);
481 #endif
482 if (bs->backing_file[0] != '\0') {
483 /* if there is a backing file, use it */
484 BlockDriver *back_drv = NULL;
485 bs->backing_hd = bdrv_new("");
486 /* pass on read_only property to the backing_hd */
487 bs->backing_hd->read_only = bs->read_only;
488 path_combine(backing_filename, sizeof(backing_filename),
489 filename, bs->backing_file);
490 if (bs->backing_format[0] != '\0')
491 back_drv = bdrv_find_format(bs->backing_format);
492 ret = bdrv_open2(bs->backing_hd, backing_filename, open_flags,
493 back_drv);
494 if (ret < 0) {
495 bdrv_close(bs);
496 return ret;
500 if (!bdrv_key_required(bs)) {
501 /* call the change callback */
502 bs->media_changed = 1;
503 if (bs->change_cb)
504 bs->change_cb(bs->change_opaque);
506 return 0;
509 void bdrv_close(BlockDriverState *bs)
511 if (bs->drv) {
512 if (bs->backing_hd)
513 bdrv_delete(bs->backing_hd);
514 bs->drv->bdrv_close(bs);
515 qemu_free(bs->opaque);
516 #ifdef _WIN32
517 if (bs->is_temporary) {
518 unlink(bs->filename);
520 #endif
521 bs->opaque = NULL;
522 bs->drv = NULL;
524 /* call the change callback */
525 bs->media_changed = 1;
526 if (bs->change_cb)
527 bs->change_cb(bs->change_opaque);
531 void bdrv_delete(BlockDriverState *bs)
533 BlockDriverState **pbs;
535 pbs = &bdrv_first;
536 while (*pbs != bs && *pbs != NULL)
537 pbs = &(*pbs)->next;
538 if (*pbs == bs)
539 *pbs = bs->next;
541 bdrv_close(bs);
542 qemu_free(bs);
546 * Run consistency checks on an image
548 * Returns the number of errors or -errno when an internal error occurs
550 int bdrv_check(BlockDriverState *bs)
552 if (bs->drv->bdrv_check == NULL) {
553 return -ENOTSUP;
556 return bs->drv->bdrv_check(bs);
559 /* commit COW file into the raw image */
560 int bdrv_commit(BlockDriverState *bs)
562 BlockDriver *drv = bs->drv;
563 int64_t i, total_sectors;
564 int n, j;
565 unsigned char sector[512];
567 if (!drv)
568 return -ENOMEDIUM;
570 if (bs->read_only) {
571 return -EACCES;
574 if (!bs->backing_hd) {
575 return -ENOTSUP;
578 total_sectors = bdrv_getlength(bs) >> SECTOR_BITS;
579 for (i = 0; i < total_sectors;) {
580 if (drv->bdrv_is_allocated(bs, i, 65536, &n)) {
581 for(j = 0; j < n; j++) {
582 if (bdrv_read(bs, i, sector, 1) != 0) {
583 return -EIO;
586 if (bdrv_write(bs->backing_hd, i, sector, 1) != 0) {
587 return -EIO;
589 i++;
591 } else {
592 i += n;
596 if (drv->bdrv_make_empty)
597 return drv->bdrv_make_empty(bs);
599 return 0;
602 static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
603 size_t size)
605 int64_t len;
607 if (!bdrv_is_inserted(bs))
608 return -ENOMEDIUM;
610 if (bs->growable)
611 return 0;
613 len = bdrv_getlength(bs);
615 if (offset < 0)
616 return -EIO;
618 if ((offset > len) || (len - offset < size))
619 return -EIO;
621 return 0;
624 static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
625 int nb_sectors)
627 return bdrv_check_byte_request(bs, sector_num * 512, nb_sectors * 512);
630 /* return < 0 if error. See bdrv_write() for the return codes */
631 int bdrv_read(BlockDriverState *bs, int64_t sector_num,
632 uint8_t *buf, int nb_sectors)
634 BlockDriver *drv = bs->drv;
636 if (!drv)
637 return -ENOMEDIUM;
638 if (bdrv_check_request(bs, sector_num, nb_sectors))
639 return -EIO;
641 return drv->bdrv_read(bs, sector_num, buf, nb_sectors);
644 /* Return < 0 if error. Important errors are:
645 -EIO generic I/O error (may happen for all errors)
646 -ENOMEDIUM No media inserted.
647 -EINVAL Invalid sector number or nb_sectors
648 -EACCES Trying to write a read-only device
650 int bdrv_write(BlockDriverState *bs, int64_t sector_num,
651 const uint8_t *buf, int nb_sectors)
653 BlockDriver *drv = bs->drv;
654 if (!bs->drv)
655 return -ENOMEDIUM;
656 if (bs->read_only)
657 return -EACCES;
658 if (bdrv_check_request(bs, sector_num, nb_sectors))
659 return -EIO;
661 return drv->bdrv_write(bs, sector_num, buf, nb_sectors);
664 int bdrv_pread(BlockDriverState *bs, int64_t offset,
665 void *buf, int count1)
667 uint8_t tmp_buf[SECTOR_SIZE];
668 int len, nb_sectors, count;
669 int64_t sector_num;
671 count = count1;
672 /* first read to align to sector start */
673 len = (SECTOR_SIZE - offset) & (SECTOR_SIZE - 1);
674 if (len > count)
675 len = count;
676 sector_num = offset >> SECTOR_BITS;
677 if (len > 0) {
678 if (bdrv_read(bs, sector_num, tmp_buf, 1) < 0)
679 return -EIO;
680 memcpy(buf, tmp_buf + (offset & (SECTOR_SIZE - 1)), len);
681 count -= len;
682 if (count == 0)
683 return count1;
684 sector_num++;
685 buf += len;
688 /* read the sectors "in place" */
689 nb_sectors = count >> SECTOR_BITS;
690 if (nb_sectors > 0) {
691 if (bdrv_read(bs, sector_num, buf, nb_sectors) < 0)
692 return -EIO;
693 sector_num += nb_sectors;
694 len = nb_sectors << SECTOR_BITS;
695 buf += len;
696 count -= len;
699 /* add data from the last sector */
700 if (count > 0) {
701 if (bdrv_read(bs, sector_num, tmp_buf, 1) < 0)
702 return -EIO;
703 memcpy(buf, tmp_buf, count);
705 return count1;
708 int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
709 const void *buf, int count1)
711 uint8_t tmp_buf[SECTOR_SIZE];
712 int len, nb_sectors, count;
713 int64_t sector_num;
715 count = count1;
716 /* first write to align to sector start */
717 len = (SECTOR_SIZE - offset) & (SECTOR_SIZE - 1);
718 if (len > count)
719 len = count;
720 sector_num = offset >> SECTOR_BITS;
721 if (len > 0) {
722 if (bdrv_read(bs, sector_num, tmp_buf, 1) < 0)
723 return -EIO;
724 memcpy(tmp_buf + (offset & (SECTOR_SIZE - 1)), buf, len);
725 if (bdrv_write(bs, sector_num, tmp_buf, 1) < 0)
726 return -EIO;
727 count -= len;
728 if (count == 0)
729 return count1;
730 sector_num++;
731 buf += len;
734 /* write the sectors "in place" */
735 nb_sectors = count >> SECTOR_BITS;
736 if (nb_sectors > 0) {
737 if (bdrv_write(bs, sector_num, buf, nb_sectors) < 0)
738 return -EIO;
739 sector_num += nb_sectors;
740 len = nb_sectors << SECTOR_BITS;
741 buf += len;
742 count -= len;
745 /* add data from the last sector */
746 if (count > 0) {
747 if (bdrv_read(bs, sector_num, tmp_buf, 1) < 0)
748 return -EIO;
749 memcpy(tmp_buf, buf, count);
750 if (bdrv_write(bs, sector_num, tmp_buf, 1) < 0)
751 return -EIO;
753 return count1;
757 * Truncate file to 'offset' bytes (needed only for file protocols)
759 int bdrv_truncate(BlockDriverState *bs, int64_t offset)
761 BlockDriver *drv = bs->drv;
762 if (!drv)
763 return -ENOMEDIUM;
764 if (!drv->bdrv_truncate)
765 return -ENOTSUP;
766 if (bs->read_only)
767 return -EACCES;
768 return drv->bdrv_truncate(bs, offset);
772 * Length of a file in bytes. Return < 0 if error or unknown.
774 int64_t bdrv_getlength(BlockDriverState *bs)
776 BlockDriver *drv = bs->drv;
777 if (!drv)
778 return -ENOMEDIUM;
779 if (!drv->bdrv_getlength) {
780 /* legacy mode */
781 return bs->total_sectors * SECTOR_SIZE;
783 return drv->bdrv_getlength(bs);
786 /* return 0 as number of sectors if no device present or error */
787 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
789 int64_t length;
790 length = bdrv_getlength(bs);
791 if (length < 0)
792 length = 0;
793 else
794 length = length >> SECTOR_BITS;
795 *nb_sectors_ptr = length;
798 struct partition {
799 uint8_t boot_ind; /* 0x80 - active */
800 uint8_t head; /* starting head */
801 uint8_t sector; /* starting sector */
802 uint8_t cyl; /* starting cylinder */
803 uint8_t sys_ind; /* What partition type */
804 uint8_t end_head; /* end head */
805 uint8_t end_sector; /* end sector */
806 uint8_t end_cyl; /* end cylinder */
807 uint32_t start_sect; /* starting sector counting from 0 */
808 uint32_t nr_sects; /* nr of sectors in partition */
809 } __attribute__((packed));
811 /* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
812 static int guess_disk_lchs(BlockDriverState *bs,
813 int *pcylinders, int *pheads, int *psectors)
815 uint8_t buf[512];
816 int ret, i, heads, sectors, cylinders;
817 struct partition *p;
818 uint32_t nr_sects;
819 uint64_t nb_sectors;
821 bdrv_get_geometry(bs, &nb_sectors);
823 ret = bdrv_read(bs, 0, buf, 1);
824 if (ret < 0)
825 return -1;
826 /* test msdos magic */
827 if (buf[510] != 0x55 || buf[511] != 0xaa)
828 return -1;
829 for(i = 0; i < 4; i++) {
830 p = ((struct partition *)(buf + 0x1be)) + i;
831 nr_sects = le32_to_cpu(p->nr_sects);
832 if (nr_sects && p->end_head) {
833 /* We make the assumption that the partition terminates on
834 a cylinder boundary */
835 heads = p->end_head + 1;
836 sectors = p->end_sector & 63;
837 if (sectors == 0)
838 continue;
839 cylinders = nb_sectors / (heads * sectors);
840 if (cylinders < 1 || cylinders > 16383)
841 continue;
842 *pheads = heads;
843 *psectors = sectors;
844 *pcylinders = cylinders;
845 #if 0
846 printf("guessed geometry: LCHS=%d %d %d\n",
847 cylinders, heads, sectors);
848 #endif
849 return 0;
852 return -1;
855 void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
857 int translation, lba_detected = 0;
858 int cylinders, heads, secs;
859 uint64_t nb_sectors;
861 /* if a geometry hint is available, use it */
862 bdrv_get_geometry(bs, &nb_sectors);
863 bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
864 translation = bdrv_get_translation_hint(bs);
865 if (cylinders != 0) {
866 *pcyls = cylinders;
867 *pheads = heads;
868 *psecs = secs;
869 } else {
870 if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
871 if (heads > 16) {
872 /* if heads > 16, it means that a BIOS LBA
873 translation was active, so the default
874 hardware geometry is OK */
875 lba_detected = 1;
876 goto default_geometry;
877 } else {
878 *pcyls = cylinders;
879 *pheads = heads;
880 *psecs = secs;
881 /* disable any translation to be in sync with
882 the logical geometry */
883 if (translation == BIOS_ATA_TRANSLATION_AUTO) {
884 bdrv_set_translation_hint(bs,
885 BIOS_ATA_TRANSLATION_NONE);
888 } else {
889 default_geometry:
890 /* if no geometry, use a standard physical disk geometry */
891 cylinders = nb_sectors / (16 * 63);
893 if (cylinders > 16383)
894 cylinders = 16383;
895 else if (cylinders < 2)
896 cylinders = 2;
897 *pcyls = cylinders;
898 *pheads = 16;
899 *psecs = 63;
900 if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
901 if ((*pcyls * *pheads) <= 131072) {
902 bdrv_set_translation_hint(bs,
903 BIOS_ATA_TRANSLATION_LARGE);
904 } else {
905 bdrv_set_translation_hint(bs,
906 BIOS_ATA_TRANSLATION_LBA);
910 bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
914 void bdrv_set_geometry_hint(BlockDriverState *bs,
915 int cyls, int heads, int secs)
917 bs->cyls = cyls;
918 bs->heads = heads;
919 bs->secs = secs;
922 void bdrv_set_type_hint(BlockDriverState *bs, int type)
924 bs->type = type;
925 bs->removable = ((type == BDRV_TYPE_CDROM ||
926 type == BDRV_TYPE_FLOPPY));
929 void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
931 bs->translation = translation;
934 void bdrv_get_geometry_hint(BlockDriverState *bs,
935 int *pcyls, int *pheads, int *psecs)
937 *pcyls = bs->cyls;
938 *pheads = bs->heads;
939 *psecs = bs->secs;
942 int bdrv_get_type_hint(BlockDriverState *bs)
944 return bs->type;
947 int bdrv_get_translation_hint(BlockDriverState *bs)
949 return bs->translation;
952 int bdrv_is_removable(BlockDriverState *bs)
954 return bs->removable;
957 int bdrv_is_read_only(BlockDriverState *bs)
959 return bs->read_only;
962 int bdrv_set_read_only(BlockDriverState *bs, int read_only)
964 int ret = bs->read_only;
965 bs->read_only = read_only;
966 return ret;
969 int bdrv_is_sg(BlockDriverState *bs)
971 return bs->sg;
974 int bdrv_enable_write_cache(BlockDriverState *bs)
976 return bs->enable_write_cache;
979 /* XXX: no longer used */
980 void bdrv_set_change_cb(BlockDriverState *bs,
981 void (*change_cb)(void *opaque), void *opaque)
983 bs->change_cb = change_cb;
984 bs->change_opaque = opaque;
987 int bdrv_is_encrypted(BlockDriverState *bs)
989 if (bs->backing_hd && bs->backing_hd->encrypted)
990 return 1;
991 return bs->encrypted;
994 int bdrv_key_required(BlockDriverState *bs)
996 BlockDriverState *backing_hd = bs->backing_hd;
998 if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
999 return 1;
1000 return (bs->encrypted && !bs->valid_key);
1003 int bdrv_set_key(BlockDriverState *bs, const char *key)
1005 int ret;
1006 if (bs->backing_hd && bs->backing_hd->encrypted) {
1007 ret = bdrv_set_key(bs->backing_hd, key);
1008 if (ret < 0)
1009 return ret;
1010 if (!bs->encrypted)
1011 return 0;
1013 if (!bs->encrypted || !bs->drv || !bs->drv->bdrv_set_key)
1014 return -1;
1015 ret = bs->drv->bdrv_set_key(bs, key);
1016 if (ret < 0) {
1017 bs->valid_key = 0;
1018 } else if (!bs->valid_key) {
1019 bs->valid_key = 1;
1020 /* call the change callback now, we skipped it on open */
1021 bs->media_changed = 1;
1022 if (bs->change_cb)
1023 bs->change_cb(bs->change_opaque);
1025 return ret;
1028 void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
1030 if (!bs->drv) {
1031 buf[0] = '\0';
1032 } else {
1033 pstrcpy(buf, buf_size, bs->drv->format_name);
1037 void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
1038 void *opaque)
1040 BlockDriver *drv;
1042 for (drv = first_drv; drv != NULL; drv = drv->next) {
1043 it(opaque, drv->format_name);
1047 BlockDriverState *bdrv_find(const char *name)
1049 BlockDriverState *bs;
1051 for (bs = bdrv_first; bs != NULL; bs = bs->next) {
1052 if (!strcmp(name, bs->device_name))
1053 return bs;
1055 return NULL;
1058 void bdrv_iterate(void (*it)(void *opaque, BlockDriverState *bs), void *opaque)
1060 BlockDriverState *bs;
1062 for (bs = bdrv_first; bs != NULL; bs = bs->next) {
1063 it(opaque, bs);
1067 const char *bdrv_get_device_name(BlockDriverState *bs)
1069 return bs->device_name;
1072 void bdrv_flush(BlockDriverState *bs)
1074 if (!bs->drv)
1075 return;
1076 if (bs->drv->bdrv_flush)
1077 bs->drv->bdrv_flush(bs);
1078 if (bs->backing_hd)
1079 bdrv_flush(bs->backing_hd);
1082 void bdrv_flush_all(void)
1084 BlockDriverState *bs;
1086 for (bs = bdrv_first; bs != NULL; bs = bs->next)
1087 if (bs->drv && !bdrv_is_read_only(bs) &&
1088 (!bdrv_is_removable(bs) || bdrv_is_inserted(bs)))
1089 bdrv_flush(bs);
1093 * Returns true iff the specified sector is present in the disk image. Drivers
1094 * not implementing the functionality are assumed to not support backing files,
1095 * hence all their sectors are reported as allocated.
1097 * 'pnum' is set to the number of sectors (including and immediately following
1098 * the specified sector) that are known to be in the same
1099 * allocated/unallocated state.
1101 * 'nb_sectors' is the max value 'pnum' should be set to.
1103 int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
1104 int *pnum)
1106 int64_t n;
1107 if (!bs->drv->bdrv_is_allocated) {
1108 if (sector_num >= bs->total_sectors) {
1109 *pnum = 0;
1110 return 0;
1112 n = bs->total_sectors - sector_num;
1113 *pnum = (n < nb_sectors) ? (n) : (nb_sectors);
1114 return 1;
1116 return bs->drv->bdrv_is_allocated(bs, sector_num, nb_sectors, pnum);
1119 void bdrv_info(Monitor *mon)
1121 BlockDriverState *bs;
1123 for (bs = bdrv_first; bs != NULL; bs = bs->next) {
1124 monitor_printf(mon, "%s:", bs->device_name);
1125 monitor_printf(mon, " type=");
1126 switch(bs->type) {
1127 case BDRV_TYPE_HD:
1128 monitor_printf(mon, "hd");
1129 break;
1130 case BDRV_TYPE_CDROM:
1131 monitor_printf(mon, "cdrom");
1132 break;
1133 case BDRV_TYPE_FLOPPY:
1134 monitor_printf(mon, "floppy");
1135 break;
1137 monitor_printf(mon, " removable=%d", bs->removable);
1138 if (bs->removable) {
1139 monitor_printf(mon, " locked=%d", bs->locked);
1141 if (bs->drv) {
1142 monitor_printf(mon, " file=");
1143 monitor_print_filename(mon, bs->filename);
1144 if (bs->backing_file[0] != '\0') {
1145 monitor_printf(mon, " backing_file=");
1146 monitor_print_filename(mon, bs->backing_file);
1148 monitor_printf(mon, " ro=%d", bs->read_only);
1149 monitor_printf(mon, " drv=%s", bs->drv->format_name);
1150 monitor_printf(mon, " encrypted=%d", bdrv_is_encrypted(bs));
1151 } else {
1152 monitor_printf(mon, " [not inserted]");
1154 monitor_printf(mon, "\n");
1158 /* The "info blockstats" command. */
1159 void bdrv_info_stats(Monitor *mon)
1161 BlockDriverState *bs;
1163 for (bs = bdrv_first; bs != NULL; bs = bs->next) {
1164 monitor_printf(mon, "%s:"
1165 " rd_bytes=%" PRIu64
1166 " wr_bytes=%" PRIu64
1167 " rd_operations=%" PRIu64
1168 " wr_operations=%" PRIu64
1169 "\n",
1170 bs->device_name,
1171 bs->rd_bytes, bs->wr_bytes,
1172 bs->rd_ops, bs->wr_ops);
1176 const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
1178 if (bs->backing_hd && bs->backing_hd->encrypted)
1179 return bs->backing_file;
1180 else if (bs->encrypted)
1181 return bs->filename;
1182 else
1183 return NULL;
1186 void bdrv_get_backing_filename(BlockDriverState *bs,
1187 char *filename, int filename_size)
1189 if (!bs->backing_hd) {
1190 pstrcpy(filename, filename_size, "");
1191 } else {
1192 pstrcpy(filename, filename_size, bs->backing_file);
1196 int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
1197 const uint8_t *buf, int nb_sectors)
1199 BlockDriver *drv = bs->drv;
1200 if (!drv)
1201 return -ENOMEDIUM;
1202 if (!drv->bdrv_write_compressed)
1203 return -ENOTSUP;
1204 if (bdrv_check_request(bs, sector_num, nb_sectors))
1205 return -EIO;
1206 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
1209 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
1211 BlockDriver *drv = bs->drv;
1212 if (!drv)
1213 return -ENOMEDIUM;
1214 if (!drv->bdrv_get_info)
1215 return -ENOTSUP;
1216 memset(bdi, 0, sizeof(*bdi));
1217 return drv->bdrv_get_info(bs, bdi);
1220 int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
1221 int64_t pos, int size)
1223 BlockDriver *drv = bs->drv;
1224 if (!drv)
1225 return -ENOMEDIUM;
1226 if (!drv->bdrv_save_vmstate)
1227 return -ENOTSUP;
1228 return drv->bdrv_save_vmstate(bs, buf, pos, size);
1231 int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
1232 int64_t pos, int size)
1234 BlockDriver *drv = bs->drv;
1235 if (!drv)
1236 return -ENOMEDIUM;
1237 if (!drv->bdrv_load_vmstate)
1238 return -ENOTSUP;
1239 return drv->bdrv_load_vmstate(bs, buf, pos, size);
1242 /**************************************************************/
1243 /* handling of snapshots */
1245 int bdrv_snapshot_create(BlockDriverState *bs,
1246 QEMUSnapshotInfo *sn_info)
1248 BlockDriver *drv = bs->drv;
1249 if (!drv)
1250 return -ENOMEDIUM;
1251 if (!drv->bdrv_snapshot_create)
1252 return -ENOTSUP;
1253 return drv->bdrv_snapshot_create(bs, sn_info);
1256 int bdrv_snapshot_goto(BlockDriverState *bs,
1257 const char *snapshot_id)
1259 BlockDriver *drv = bs->drv;
1260 if (!drv)
1261 return -ENOMEDIUM;
1262 if (!drv->bdrv_snapshot_goto)
1263 return -ENOTSUP;
1264 return drv->bdrv_snapshot_goto(bs, snapshot_id);
1267 int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
1269 BlockDriver *drv = bs->drv;
1270 if (!drv)
1271 return -ENOMEDIUM;
1272 if (!drv->bdrv_snapshot_delete)
1273 return -ENOTSUP;
1274 return drv->bdrv_snapshot_delete(bs, snapshot_id);
1277 int bdrv_snapshot_list(BlockDriverState *bs,
1278 QEMUSnapshotInfo **psn_info)
1280 BlockDriver *drv = bs->drv;
1281 if (!drv)
1282 return -ENOMEDIUM;
1283 if (!drv->bdrv_snapshot_list)
1284 return -ENOTSUP;
1285 return drv->bdrv_snapshot_list(bs, psn_info);
1288 #define NB_SUFFIXES 4
1290 char *get_human_readable_size(char *buf, int buf_size, int64_t size)
1292 static const char suffixes[NB_SUFFIXES] = "KMGT";
1293 int64_t base;
1294 int i;
1296 if (size <= 999) {
1297 snprintf(buf, buf_size, "%" PRId64, size);
1298 } else {
1299 base = 1024;
1300 for(i = 0; i < NB_SUFFIXES; i++) {
1301 if (size < (10 * base)) {
1302 snprintf(buf, buf_size, "%0.1f%c",
1303 (double)size / base,
1304 suffixes[i]);
1305 break;
1306 } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
1307 snprintf(buf, buf_size, "%" PRId64 "%c",
1308 ((size + (base >> 1)) / base),
1309 suffixes[i]);
1310 break;
1312 base = base * 1024;
1315 return buf;
1318 char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
1320 char buf1[128], date_buf[128], clock_buf[128];
1321 #ifdef _WIN32
1322 struct tm *ptm;
1323 #else
1324 struct tm tm;
1325 #endif
1326 time_t ti;
1327 int64_t secs;
1329 if (!sn) {
1330 snprintf(buf, buf_size,
1331 "%-10s%-20s%7s%20s%15s",
1332 "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
1333 } else {
1334 ti = sn->date_sec;
1335 #ifdef _WIN32
1336 ptm = localtime(&ti);
1337 strftime(date_buf, sizeof(date_buf),
1338 "%Y-%m-%d %H:%M:%S", ptm);
1339 #else
1340 localtime_r(&ti, &tm);
1341 strftime(date_buf, sizeof(date_buf),
1342 "%Y-%m-%d %H:%M:%S", &tm);
1343 #endif
1344 secs = sn->vm_clock_nsec / 1000000000;
1345 snprintf(clock_buf, sizeof(clock_buf),
1346 "%02d:%02d:%02d.%03d",
1347 (int)(secs / 3600),
1348 (int)((secs / 60) % 60),
1349 (int)(secs % 60),
1350 (int)((sn->vm_clock_nsec / 1000000) % 1000));
1351 snprintf(buf, buf_size,
1352 "%-10s%-20s%7s%20s%15s",
1353 sn->id_str, sn->name,
1354 get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
1355 date_buf,
1356 clock_buf);
1358 return buf;
1362 /**************************************************************/
1363 /* async I/Os */
1365 BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
1366 QEMUIOVector *qiov, int nb_sectors,
1367 BlockDriverCompletionFunc *cb, void *opaque)
1369 BlockDriver *drv = bs->drv;
1370 BlockDriverAIOCB *ret;
1372 if (!drv)
1373 return NULL;
1374 if (bdrv_check_request(bs, sector_num, nb_sectors))
1375 return NULL;
1377 ret = drv->bdrv_aio_readv(bs, sector_num, qiov, nb_sectors,
1378 cb, opaque);
1380 if (ret) {
1381 /* Update stats even though technically transfer has not happened. */
1382 bs->rd_bytes += (unsigned) nb_sectors * SECTOR_SIZE;
1383 bs->rd_ops ++;
1386 return ret;
1389 BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
1390 QEMUIOVector *qiov, int nb_sectors,
1391 BlockDriverCompletionFunc *cb, void *opaque)
1393 BlockDriver *drv = bs->drv;
1394 BlockDriverAIOCB *ret;
1396 if (!drv)
1397 return NULL;
1398 if (bs->read_only)
1399 return NULL;
1400 if (bdrv_check_request(bs, sector_num, nb_sectors))
1401 return NULL;
1403 ret = drv->bdrv_aio_writev(bs, sector_num, qiov, nb_sectors,
1404 cb, opaque);
1406 if (ret) {
1407 /* Update stats even though technically transfer has not happened. */
1408 bs->wr_bytes += (unsigned) nb_sectors * SECTOR_SIZE;
1409 bs->wr_ops ++;
1412 return ret;
1416 typedef struct MultiwriteCB {
1417 int error;
1418 int num_requests;
1419 int num_callbacks;
1420 struct {
1421 BlockDriverCompletionFunc *cb;
1422 void *opaque;
1423 QEMUIOVector *free_qiov;
1424 void *free_buf;
1425 } callbacks[];
1426 } MultiwriteCB;
1428 static void multiwrite_user_cb(MultiwriteCB *mcb)
1430 int i;
1432 for (i = 0; i < mcb->num_callbacks; i++) {
1433 mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
1434 qemu_free(mcb->callbacks[i].free_qiov);
1435 qemu_free(mcb->callbacks[i].free_buf);
1439 static void multiwrite_cb(void *opaque, int ret)
1441 MultiwriteCB *mcb = opaque;
1443 if (ret < 0) {
1444 mcb->error = ret;
1445 multiwrite_user_cb(mcb);
1448 mcb->num_requests--;
1449 if (mcb->num_requests == 0) {
1450 if (mcb->error == 0) {
1451 multiwrite_user_cb(mcb);
1453 qemu_free(mcb);
1457 static int multiwrite_req_compare(const void *a, const void *b)
1459 return (((BlockRequest*) a)->sector - ((BlockRequest*) b)->sector);
1463 * Takes a bunch of requests and tries to merge them. Returns the number of
1464 * requests that remain after merging.
1466 static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
1467 int num_reqs, MultiwriteCB *mcb)
1469 int i, outidx;
1471 // Sort requests by start sector
1472 qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
1474 // Check if adjacent requests touch the same clusters. If so, combine them,
1475 // filling up gaps with zero sectors.
1476 outidx = 0;
1477 for (i = 1; i < num_reqs; i++) {
1478 int merge = 0;
1479 int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
1481 // This handles the cases that are valid for all block drivers, namely
1482 // exactly sequential writes and overlapping writes.
1483 if (reqs[i].sector <= oldreq_last) {
1484 merge = 1;
1487 // The block driver may decide that it makes sense to combine requests
1488 // even if there is a gap of some sectors between them. In this case,
1489 // the gap is filled with zeros (therefore only applicable for yet
1490 // unused space in format like qcow2).
1491 if (!merge && bs->drv->bdrv_merge_requests) {
1492 merge = bs->drv->bdrv_merge_requests(bs, &reqs[outidx], &reqs[i]);
1495 if (merge) {
1496 size_t size;
1497 QEMUIOVector *qiov = qemu_mallocz(sizeof(*qiov));
1498 qemu_iovec_init(qiov,
1499 reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
1501 // Add the first request to the merged one. If the requests are
1502 // overlapping, drop the last sectors of the first request.
1503 size = (reqs[i].sector - reqs[outidx].sector) << 9;
1504 qemu_iovec_concat(qiov, reqs[outidx].qiov, size);
1506 // We might need to add some zeros between the two requests
1507 if (reqs[i].sector > oldreq_last) {
1508 size_t zero_bytes = (reqs[i].sector - oldreq_last) << 9;
1509 uint8_t *buf = qemu_blockalign(bs, zero_bytes);
1510 memset(buf, 0, zero_bytes);
1511 qemu_iovec_add(qiov, buf, zero_bytes);
1512 mcb->callbacks[i].free_buf = buf;
1515 // Add the second request
1516 qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size);
1518 reqs[outidx].nb_sectors += reqs[i].nb_sectors;
1519 reqs[outidx].qiov = qiov;
1521 mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
1522 } else {
1523 outidx++;
1524 reqs[outidx].sector = reqs[i].sector;
1525 reqs[outidx].nb_sectors = reqs[i].nb_sectors;
1526 reqs[outidx].qiov = reqs[i].qiov;
1530 return outidx + 1;
1534 * Submit multiple AIO write requests at once.
1536 * On success, the function returns 0 and all requests in the reqs array have
1537 * been submitted. In error case this function returns -1, and any of the
1538 * requests may or may not be submitted yet. In particular, this means that the
1539 * callback will be called for some of the requests, for others it won't. The
1540 * caller must check the error field of the BlockRequest to wait for the right
1541 * callbacks (if error != 0, no callback will be called).
1543 * The implementation may modify the contents of the reqs array, e.g. to merge
1544 * requests. However, the fields opaque and error are left unmodified as they
1545 * are used to signal failure for a single request to the caller.
1547 int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
1549 BlockDriverAIOCB *acb;
1550 MultiwriteCB *mcb;
1551 int i;
1553 if (num_reqs == 0) {
1554 return 0;
1557 // Create MultiwriteCB structure
1558 mcb = qemu_mallocz(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
1559 mcb->num_requests = 0;
1560 mcb->num_callbacks = num_reqs;
1562 for (i = 0; i < num_reqs; i++) {
1563 mcb->callbacks[i].cb = reqs[i].cb;
1564 mcb->callbacks[i].opaque = reqs[i].opaque;
1567 // Check for mergable requests
1568 num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
1570 // Run the aio requests
1571 for (i = 0; i < num_reqs; i++) {
1572 acb = bdrv_aio_writev(bs, reqs[i].sector, reqs[i].qiov,
1573 reqs[i].nb_sectors, multiwrite_cb, mcb);
1575 if (acb == NULL) {
1576 // We can only fail the whole thing if no request has been
1577 // submitted yet. Otherwise we'll wait for the submitted AIOs to
1578 // complete and report the error in the callback.
1579 if (mcb->num_requests == 0) {
1580 reqs[i].error = EIO;
1581 goto fail;
1582 } else {
1583 mcb->error = EIO;
1584 break;
1586 } else {
1587 mcb->num_requests++;
1591 return 0;
1593 fail:
1594 free(mcb);
1595 return -1;
1598 BlockDriverAIOCB *bdrv_aio_flush(BlockDriverState *bs,
1599 BlockDriverCompletionFunc *cb, void *opaque)
1601 BlockDriver *drv = bs->drv;
1603 if (!drv)
1604 return NULL;
1607 * Note that unlike bdrv_flush the driver is reponsible for flushing a
1608 * backing image if it exists.
1610 return drv->bdrv_aio_flush(bs, cb, opaque);
1613 void bdrv_aio_cancel(BlockDriverAIOCB *acb)
1615 acb->pool->cancel(acb);
1619 /**************************************************************/
1620 /* async block device emulation */
1622 typedef struct BlockDriverAIOCBSync {
1623 BlockDriverAIOCB common;
1624 QEMUBH *bh;
1625 int ret;
1626 /* vector translation state */
1627 QEMUIOVector *qiov;
1628 uint8_t *bounce;
1629 int is_write;
1630 } BlockDriverAIOCBSync;
1632 static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
1634 BlockDriverAIOCBSync *acb = (BlockDriverAIOCBSync *)blockacb;
1635 qemu_bh_delete(acb->bh);
1636 acb->bh = NULL;
1637 qemu_aio_release(acb);
1640 static AIOPool bdrv_em_aio_pool = {
1641 .aiocb_size = sizeof(BlockDriverAIOCBSync),
1642 .cancel = bdrv_aio_cancel_em,
1645 static void bdrv_aio_bh_cb(void *opaque)
1647 BlockDriverAIOCBSync *acb = opaque;
1649 if (!acb->is_write)
1650 qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
1651 qemu_vfree(acb->bounce);
1652 acb->common.cb(acb->common.opaque, acb->ret);
1653 qemu_bh_delete(acb->bh);
1654 acb->bh = NULL;
1655 qemu_aio_release(acb);
1658 static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
1659 int64_t sector_num,
1660 QEMUIOVector *qiov,
1661 int nb_sectors,
1662 BlockDriverCompletionFunc *cb,
1663 void *opaque,
1664 int is_write)
1667 BlockDriverAIOCBSync *acb;
1669 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
1670 acb->is_write = is_write;
1671 acb->qiov = qiov;
1672 acb->bounce = qemu_blockalign(bs, qiov->size);
1674 if (!acb->bh)
1675 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
1677 if (is_write) {
1678 qemu_iovec_to_buffer(acb->qiov, acb->bounce);
1679 acb->ret = bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
1680 } else {
1681 acb->ret = bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
1684 qemu_bh_schedule(acb->bh);
1686 return &acb->common;
1689 static BlockDriverAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
1690 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
1691 BlockDriverCompletionFunc *cb, void *opaque)
1693 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
1696 static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
1697 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
1698 BlockDriverCompletionFunc *cb, void *opaque)
1700 return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
1703 static BlockDriverAIOCB *bdrv_aio_flush_em(BlockDriverState *bs,
1704 BlockDriverCompletionFunc *cb, void *opaque)
1706 BlockDriverAIOCBSync *acb;
1708 acb = qemu_aio_get(&bdrv_em_aio_pool, bs, cb, opaque);
1709 acb->is_write = 1; /* don't bounce in the completion hadler */
1710 acb->qiov = NULL;
1711 acb->bounce = NULL;
1712 acb->ret = 0;
1714 if (!acb->bh)
1715 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
1717 bdrv_flush(bs);
1718 qemu_bh_schedule(acb->bh);
1719 return &acb->common;
1722 /**************************************************************/
1723 /* sync block device emulation */
1725 static void bdrv_rw_em_cb(void *opaque, int ret)
1727 *(int *)opaque = ret;
1730 #define NOT_DONE 0x7fffffff
1732 static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
1733 uint8_t *buf, int nb_sectors)
1735 int async_ret;
1736 BlockDriverAIOCB *acb;
1737 struct iovec iov;
1738 QEMUIOVector qiov;
1740 async_context_push();
1742 async_ret = NOT_DONE;
1743 iov.iov_base = (void *)buf;
1744 iov.iov_len = nb_sectors * 512;
1745 qemu_iovec_init_external(&qiov, &iov, 1);
1746 acb = bdrv_aio_readv(bs, sector_num, &qiov, nb_sectors,
1747 bdrv_rw_em_cb, &async_ret);
1748 if (acb == NULL) {
1749 async_ret = -1;
1750 goto fail;
1753 while (async_ret == NOT_DONE) {
1754 qemu_aio_wait();
1758 fail:
1759 async_context_pop();
1760 return async_ret;
1763 static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
1764 const uint8_t *buf, int nb_sectors)
1766 int async_ret;
1767 BlockDriverAIOCB *acb;
1768 struct iovec iov;
1769 QEMUIOVector qiov;
1771 async_context_push();
1773 async_ret = NOT_DONE;
1774 iov.iov_base = (void *)buf;
1775 iov.iov_len = nb_sectors * 512;
1776 qemu_iovec_init_external(&qiov, &iov, 1);
1777 acb = bdrv_aio_writev(bs, sector_num, &qiov, nb_sectors,
1778 bdrv_rw_em_cb, &async_ret);
1779 if (acb == NULL) {
1780 async_ret = -1;
1781 goto fail;
1783 while (async_ret == NOT_DONE) {
1784 qemu_aio_wait();
1787 fail:
1788 async_context_pop();
1789 return async_ret;
1792 void bdrv_init(void)
1794 module_call_init(MODULE_INIT_BLOCK);
1797 void bdrv_init_with_whitelist(void)
1799 use_bdrv_whitelist = 1;
1800 bdrv_init();
1803 void *qemu_aio_get(AIOPool *pool, BlockDriverState *bs,
1804 BlockDriverCompletionFunc *cb, void *opaque)
1806 BlockDriverAIOCB *acb;
1808 if (pool->free_aiocb) {
1809 acb = pool->free_aiocb;
1810 pool->free_aiocb = acb->next;
1811 } else {
1812 acb = qemu_mallocz(pool->aiocb_size);
1813 acb->pool = pool;
1815 acb->bs = bs;
1816 acb->cb = cb;
1817 acb->opaque = opaque;
1818 return acb;
1821 void qemu_aio_release(void *p)
1823 BlockDriverAIOCB *acb = (BlockDriverAIOCB *)p;
1824 AIOPool *pool = acb->pool;
1825 acb->next = pool->free_aiocb;
1826 pool->free_aiocb = acb;
1829 /**************************************************************/
1830 /* removable device support */
1833 * Return TRUE if the media is present
1835 int bdrv_is_inserted(BlockDriverState *bs)
1837 BlockDriver *drv = bs->drv;
1838 int ret;
1839 if (!drv)
1840 return 0;
1841 if (!drv->bdrv_is_inserted)
1842 return 1;
1843 ret = drv->bdrv_is_inserted(bs);
1844 return ret;
1848 * Return TRUE if the media changed since the last call to this
1849 * function. It is currently only used for floppy disks
1851 int bdrv_media_changed(BlockDriverState *bs)
1853 BlockDriver *drv = bs->drv;
1854 int ret;
1856 if (!drv || !drv->bdrv_media_changed)
1857 ret = -ENOTSUP;
1858 else
1859 ret = drv->bdrv_media_changed(bs);
1860 if (ret == -ENOTSUP)
1861 ret = bs->media_changed;
1862 bs->media_changed = 0;
1863 return ret;
1867 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
1869 int bdrv_eject(BlockDriverState *bs, int eject_flag)
1871 BlockDriver *drv = bs->drv;
1872 int ret;
1874 if (bs->locked) {
1875 return -EBUSY;
1878 if (!drv || !drv->bdrv_eject) {
1879 ret = -ENOTSUP;
1880 } else {
1881 ret = drv->bdrv_eject(bs, eject_flag);
1883 if (ret == -ENOTSUP) {
1884 if (eject_flag)
1885 bdrv_close(bs);
1886 ret = 0;
1889 return ret;
1892 int bdrv_is_locked(BlockDriverState *bs)
1894 return bs->locked;
1898 * Lock or unlock the media (if it is locked, the user won't be able
1899 * to eject it manually).
1901 void bdrv_set_locked(BlockDriverState *bs, int locked)
1903 BlockDriver *drv = bs->drv;
1905 bs->locked = locked;
1906 if (drv && drv->bdrv_set_locked) {
1907 drv->bdrv_set_locked(bs, locked);
1911 /* needed for generic scsi interface */
1913 int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
1915 BlockDriver *drv = bs->drv;
1917 if (drv && drv->bdrv_ioctl)
1918 return drv->bdrv_ioctl(bs, req, buf);
1919 return -ENOTSUP;
1922 BlockDriverAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
1923 unsigned long int req, void *buf,
1924 BlockDriverCompletionFunc *cb, void *opaque)
1926 BlockDriver *drv = bs->drv;
1928 if (drv && drv->bdrv_aio_ioctl)
1929 return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
1930 return NULL;
1933 void *qemu_blockalign(BlockDriverState *bs, size_t size)
1935 return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment : 512, size);