Virtio-blk async IO
[qemu-kvm/fedora.git] / block.c
blob0e9577c1646cbb586c873bab84f03ebec4fe8177
1 /*
2 * QEMU System Emulator block driver
4 * Copyright (c) 2003 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
24 #include "qemu-common.h"
25 #ifndef QEMU_IMG
26 #include "console.h"
27 #endif
28 #include "block_int.h"
29 #include "osdep.h"
31 #ifdef _BSD
32 #include <sys/types.h>
33 #include <sys/stat.h>
34 #include <sys/ioctl.h>
35 #include <sys/queue.h>
36 #include <sys/disk.h>
37 #endif
39 #define SECTOR_BITS 9
40 #define SECTOR_SIZE (1 << SECTOR_BITS)
42 typedef struct BlockDriverAIOCBSync {
43 BlockDriverAIOCB common;
44 QEMUBH *bh;
45 int ret;
46 } BlockDriverAIOCBSync;
48 static BlockDriverAIOCB *bdrv_aio_read_em(BlockDriverState *bs,
49 int64_t sector_num, uint8_t *buf, int nb_sectors,
50 BlockDriverCompletionFunc *cb, void *opaque);
51 static BlockDriverAIOCB *bdrv_aio_write_em(BlockDriverState *bs,
52 int64_t sector_num, const uint8_t *buf, int nb_sectors,
53 BlockDriverCompletionFunc *cb, void *opaque);
54 static void bdrv_aio_cancel_em(BlockDriverAIOCB *acb);
55 static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
56 uint8_t *buf, int nb_sectors);
57 static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
58 const uint8_t *buf, int nb_sectors);
60 BlockDriverState *bdrv_first;
61 static BlockDriver *first_drv;
63 int path_is_absolute(const char *path)
65 const char *p;
66 #ifdef _WIN32
67 /* specific case for names like: "\\.\d:" */
68 if (*path == '/' || *path == '\\')
69 return 1;
70 #endif
71 p = strchr(path, ':');
72 if (p)
73 p++;
74 else
75 p = path;
76 #ifdef _WIN32
77 return (*p == '/' || *p == '\\');
78 #else
79 return (*p == '/');
80 #endif
83 /* if filename is absolute, just copy it to dest. Otherwise, build a
84 path to it by considering it is relative to base_path. URL are
85 supported. */
86 void path_combine(char *dest, int dest_size,
87 const char *base_path,
88 const char *filename)
90 const char *p, *p1;
91 int len;
93 if (dest_size <= 0)
94 return;
95 if (path_is_absolute(filename)) {
96 pstrcpy(dest, dest_size, filename);
97 } else {
98 p = strchr(base_path, ':');
99 if (p)
100 p++;
101 else
102 p = base_path;
103 p1 = strrchr(base_path, '/');
104 #ifdef _WIN32
106 const char *p2;
107 p2 = strrchr(base_path, '\\');
108 if (!p1 || p2 > p1)
109 p1 = p2;
111 #endif
112 if (p1)
113 p1++;
114 else
115 p1 = base_path;
116 if (p1 > p)
117 p = p1;
118 len = p - base_path;
119 if (len > dest_size - 1)
120 len = dest_size - 1;
121 memcpy(dest, base_path, len);
122 dest[len] = '\0';
123 pstrcat(dest, dest_size, filename);
128 static void bdrv_register(BlockDriver *bdrv)
130 if (!bdrv->bdrv_aio_read) {
131 /* add AIO emulation layer */
132 bdrv->bdrv_aio_read = bdrv_aio_read_em;
133 bdrv->bdrv_aio_write = bdrv_aio_write_em;
134 bdrv->bdrv_aio_cancel = bdrv_aio_cancel_em;
135 bdrv->aiocb_size = sizeof(BlockDriverAIOCBSync);
136 } else if (!bdrv->bdrv_read && !bdrv->bdrv_pread) {
137 /* add synchronous IO emulation layer */
138 bdrv->bdrv_read = bdrv_read_em;
139 bdrv->bdrv_write = bdrv_write_em;
141 bdrv->next = first_drv;
142 first_drv = bdrv;
145 /* create a new block device (by default it is empty) */
146 BlockDriverState *bdrv_new(const char *device_name)
148 BlockDriverState **pbs, *bs;
150 bs = qemu_mallocz(sizeof(BlockDriverState));
151 if(!bs)
152 return NULL;
153 pstrcpy(bs->device_name, sizeof(bs->device_name), device_name);
154 if (device_name[0] != '\0') {
155 /* insert at the end */
156 pbs = &bdrv_first;
157 while (*pbs != NULL)
158 pbs = &(*pbs)->next;
159 *pbs = bs;
161 return bs;
164 BlockDriver *bdrv_find_format(const char *format_name)
166 BlockDriver *drv1;
167 for(drv1 = first_drv; drv1 != NULL; drv1 = drv1->next) {
168 if (!strcmp(drv1->format_name, format_name))
169 return drv1;
171 return NULL;
174 int bdrv_create(BlockDriver *drv,
175 const char *filename, int64_t size_in_sectors,
176 const char *backing_file, int flags)
178 if (!drv->bdrv_create)
179 return -ENOTSUP;
180 return drv->bdrv_create(filename, size_in_sectors, backing_file, flags);
183 #ifdef _WIN32
184 void get_tmp_filename(char *filename, int size)
186 char temp_dir[MAX_PATH];
188 GetTempPath(MAX_PATH, temp_dir);
189 GetTempFileName(temp_dir, "qem", 0, filename);
191 #else
192 void get_tmp_filename(char *filename, int size)
194 int fd;
195 char *tmpdir;
196 /* XXX: race condition possible */
197 tmpdir = getenv("TMPDIR");
198 if (!tmpdir)
199 tmpdir = "/tmp";
200 snprintf(filename, size, "%s/vl.XXXXXX", tmpdir);
201 fd = mkstemp(filename);
202 close(fd);
204 #endif
206 #ifdef _WIN32
207 static int is_windows_drive_prefix(const char *filename)
209 return (((filename[0] >= 'a' && filename[0] <= 'z') ||
210 (filename[0] >= 'A' && filename[0] <= 'Z')) &&
211 filename[1] == ':');
214 static int is_windows_drive(const char *filename)
216 if (is_windows_drive_prefix(filename) &&
217 filename[2] == '\0')
218 return 1;
219 if (strstart(filename, "\\\\.\\", NULL) ||
220 strstart(filename, "//./", NULL))
221 return 1;
222 return 0;
224 #endif
226 static BlockDriver *find_protocol(const char *filename)
228 BlockDriver *drv1;
229 char protocol[128];
230 int len;
231 const char *p;
233 #ifdef _WIN32
234 if (is_windows_drive(filename) ||
235 is_windows_drive_prefix(filename))
236 return &bdrv_raw;
237 #endif
238 p = strchr(filename, ':');
239 if (!p)
240 return &bdrv_raw;
241 len = p - filename;
242 if (len > sizeof(protocol) - 1)
243 len = sizeof(protocol) - 1;
244 memcpy(protocol, filename, len);
245 protocol[len] = '\0';
246 for(drv1 = first_drv; drv1 != NULL; drv1 = drv1->next) {
247 if (drv1->protocol_name &&
248 !strcmp(drv1->protocol_name, protocol))
249 return drv1;
251 return NULL;
254 /* XXX: force raw format if block or character device ? It would
255 simplify the BSD case */
256 static BlockDriver *find_image_format(const char *filename)
258 int ret, score, score_max;
259 BlockDriver *drv1, *drv;
260 uint8_t buf[2048];
261 BlockDriverState *bs;
263 /* detect host devices. By convention, /dev/cdrom[N] is always
264 recognized as a host CDROM */
265 if (strstart(filename, "/dev/cdrom", NULL))
266 return &bdrv_host_device;
267 #ifdef _WIN32
268 if (is_windows_drive(filename))
269 return &bdrv_host_device;
270 #else
272 struct stat st;
273 if (stat(filename, &st) >= 0 &&
274 (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode))) {
275 return &bdrv_host_device;
278 #endif
280 drv = find_protocol(filename);
281 /* no need to test disk image formats for vvfat */
282 if (drv == &bdrv_vvfat)
283 return drv;
285 ret = bdrv_file_open(&bs, filename, BDRV_O_RDONLY);
286 if (ret < 0)
287 return NULL;
288 ret = bdrv_pread(bs, 0, buf, sizeof(buf));
289 bdrv_delete(bs);
290 if (ret < 0) {
291 return NULL;
294 score_max = 0;
295 for(drv1 = first_drv; drv1 != NULL; drv1 = drv1->next) {
296 if (drv1->bdrv_probe) {
297 score = drv1->bdrv_probe(buf, ret, filename);
298 if (score > score_max) {
299 score_max = score;
300 drv = drv1;
304 return drv;
307 int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
309 BlockDriverState *bs;
310 int ret;
312 bs = bdrv_new("");
313 if (!bs)
314 return -ENOMEM;
315 ret = bdrv_open2(bs, filename, flags | BDRV_O_FILE, NULL);
316 if (ret < 0) {
317 bdrv_delete(bs);
318 return ret;
320 *pbs = bs;
321 return 0;
324 int bdrv_open(BlockDriverState *bs, const char *filename, int flags)
326 return bdrv_open2(bs, filename, flags, NULL);
329 int bdrv_open2(BlockDriverState *bs, const char *filename, int flags,
330 BlockDriver *drv)
332 int ret, open_flags;
333 char tmp_filename[PATH_MAX];
334 char backing_filename[PATH_MAX];
336 bs->read_only = 0;
337 bs->is_temporary = 0;
338 bs->encrypted = 0;
340 if (flags & BDRV_O_SNAPSHOT) {
341 BlockDriverState *bs1;
342 int64_t total_size;
344 /* if snapshot, we create a temporary backing file and open it
345 instead of opening 'filename' directly */
347 /* if there is a backing file, use it */
348 bs1 = bdrv_new("");
349 if (!bs1) {
350 return -ENOMEM;
352 if (bdrv_open(bs1, filename, 0) < 0) {
353 bdrv_delete(bs1);
354 return -1;
356 total_size = bdrv_getlength(bs1) >> SECTOR_BITS;
357 bdrv_delete(bs1);
359 get_tmp_filename(tmp_filename, sizeof(tmp_filename));
360 realpath(filename, backing_filename);
361 if (bdrv_create(&bdrv_qcow2, tmp_filename,
362 total_size, backing_filename, 0) < 0) {
363 return -1;
365 filename = tmp_filename;
366 bs->is_temporary = 1;
369 pstrcpy(bs->filename, sizeof(bs->filename), filename);
370 if (flags & BDRV_O_FILE) {
371 drv = find_protocol(filename);
372 if (!drv)
373 return -ENOENT;
374 } else {
375 if (!drv) {
376 drv = find_image_format(filename);
377 if (!drv)
378 return -1;
381 bs->drv = drv;
382 bs->opaque = qemu_mallocz(drv->instance_size);
383 if (bs->opaque == NULL && drv->instance_size > 0)
384 return -1;
385 /* Note: for compatibility, we open disk image files as RDWR, and
386 RDONLY as fallback */
387 if (!(flags & BDRV_O_FILE))
388 open_flags = BDRV_O_RDWR | (flags & BDRV_O_DIRECT);
389 else
390 open_flags = flags & ~(BDRV_O_FILE | BDRV_O_SNAPSHOT);
391 ret = drv->bdrv_open(bs, filename, open_flags);
392 if (ret == -EACCES && !(flags & BDRV_O_FILE)) {
393 ret = drv->bdrv_open(bs, filename, BDRV_O_RDONLY);
394 bs->read_only = 1;
396 if (ret < 0) {
397 qemu_free(bs->opaque);
398 bs->opaque = NULL;
399 bs->drv = NULL;
400 return ret;
402 if (drv->bdrv_getlength) {
403 bs->total_sectors = bdrv_getlength(bs) >> SECTOR_BITS;
405 #ifndef _WIN32
406 if (bs->is_temporary) {
407 unlink(filename);
409 #endif
410 if (bs->backing_file[0] != '\0') {
411 /* if there is a backing file, use it */
412 bs->backing_hd = bdrv_new("");
413 if (!bs->backing_hd) {
414 fail:
415 bdrv_close(bs);
416 return -ENOMEM;
418 path_combine(backing_filename, sizeof(backing_filename),
419 filename, bs->backing_file);
420 if (bdrv_open(bs->backing_hd, backing_filename, 0) < 0)
421 goto fail;
424 /* call the change callback */
425 bs->media_changed = 1;
426 if (bs->change_cb)
427 bs->change_cb(bs->change_opaque);
429 return 0;
432 void bdrv_close(BlockDriverState *bs)
434 if (bs->drv) {
435 if (bs->backing_hd)
436 bdrv_delete(bs->backing_hd);
437 bs->drv->bdrv_close(bs);
438 qemu_free(bs->opaque);
439 #ifdef _WIN32
440 if (bs->is_temporary) {
441 unlink(bs->filename);
443 #endif
444 bs->opaque = NULL;
445 bs->drv = NULL;
447 /* call the change callback */
448 bs->media_changed = 1;
449 if (bs->change_cb)
450 bs->change_cb(bs->change_opaque);
454 void bdrv_delete(BlockDriverState *bs)
456 BlockDriverState **pbs;
458 pbs = &bdrv_first;
459 while (*pbs != bs && *pbs != NULL)
460 pbs = &(*pbs)->next;
461 if (*pbs == bs)
462 *pbs = bs->next;
464 bdrv_close(bs);
465 qemu_free(bs);
468 /* commit COW file into the raw image */
469 int bdrv_commit(BlockDriverState *bs)
471 BlockDriver *drv = bs->drv;
472 int64_t i, total_sectors;
473 int n, j;
474 unsigned char sector[512];
476 if (!drv)
477 return -ENOMEDIUM;
479 if (bs->read_only) {
480 return -EACCES;
483 if (!bs->backing_hd) {
484 return -ENOTSUP;
487 total_sectors = bdrv_getlength(bs) >> SECTOR_BITS;
488 for (i = 0; i < total_sectors;) {
489 if (drv->bdrv_is_allocated(bs, i, 65536, &n)) {
490 for(j = 0; j < n; j++) {
491 if (bdrv_read(bs, i, sector, 1) != 0) {
492 return -EIO;
495 if (bdrv_write(bs->backing_hd, i, sector, 1) != 0) {
496 return -EIO;
498 i++;
500 } else {
501 i += n;
505 if (drv->bdrv_make_empty)
506 return drv->bdrv_make_empty(bs);
508 return 0;
511 /* return < 0 if error. See bdrv_write() for the return codes */
512 int bdrv_read(BlockDriverState *bs, int64_t sector_num,
513 uint8_t *buf, int nb_sectors)
515 BlockDriver *drv = bs->drv;
517 if (!drv)
518 return -ENOMEDIUM;
520 if (sector_num == 0 && bs->boot_sector_enabled && nb_sectors > 0) {
521 memcpy(buf, bs->boot_sector_data, 512);
522 sector_num++;
523 nb_sectors--;
524 buf += 512;
525 if (nb_sectors == 0)
526 return 0;
528 if (drv->bdrv_pread) {
529 int ret, len;
530 len = nb_sectors * 512;
531 ret = drv->bdrv_pread(bs, sector_num * 512, buf, len);
532 if (ret < 0)
533 return ret;
534 else if (ret != len)
535 return -EINVAL;
536 else {
537 bs->rd_bytes += (unsigned) len;
538 bs->rd_ops ++;
539 return 0;
541 } else {
542 return drv->bdrv_read(bs, sector_num, buf, nb_sectors);
546 /* Return < 0 if error. Important errors are:
547 -EIO generic I/O error (may happen for all errors)
548 -ENOMEDIUM No media inserted.
549 -EINVAL Invalid sector number or nb_sectors
550 -EACCES Trying to write a read-only device
552 int bdrv_write(BlockDriverState *bs, int64_t sector_num,
553 const uint8_t *buf, int nb_sectors)
555 BlockDriver *drv = bs->drv;
556 if (!bs->drv)
557 return -ENOMEDIUM;
558 if (bs->read_only)
559 return -EACCES;
560 if (sector_num == 0 && bs->boot_sector_enabled && nb_sectors > 0) {
561 memcpy(bs->boot_sector_data, buf, 512);
563 if (drv->bdrv_pwrite) {
564 int ret, len;
565 len = nb_sectors * 512;
566 ret = drv->bdrv_pwrite(bs, sector_num * 512, buf, len);
567 if (ret < 0)
568 return ret;
569 else if (ret != len)
570 return -EIO;
571 else {
572 bs->wr_bytes += (unsigned) len;
573 bs->wr_ops ++;
574 return 0;
576 } else {
577 return drv->bdrv_write(bs, sector_num, buf, nb_sectors);
581 static int bdrv_pread_em(BlockDriverState *bs, int64_t offset,
582 uint8_t *buf, int count1)
584 uint8_t tmp_buf[SECTOR_SIZE];
585 int len, nb_sectors, count;
586 int64_t sector_num;
588 count = count1;
589 /* first read to align to sector start */
590 len = (SECTOR_SIZE - offset) & (SECTOR_SIZE - 1);
591 if (len > count)
592 len = count;
593 sector_num = offset >> SECTOR_BITS;
594 if (len > 0) {
595 if (bdrv_read(bs, sector_num, tmp_buf, 1) < 0)
596 return -EIO;
597 memcpy(buf, tmp_buf + (offset & (SECTOR_SIZE - 1)), len);
598 count -= len;
599 if (count == 0)
600 return count1;
601 sector_num++;
602 buf += len;
605 /* read the sectors "in place" */
606 nb_sectors = count >> SECTOR_BITS;
607 if (nb_sectors > 0) {
608 if (bdrv_read(bs, sector_num, buf, nb_sectors) < 0)
609 return -EIO;
610 sector_num += nb_sectors;
611 len = nb_sectors << SECTOR_BITS;
612 buf += len;
613 count -= len;
616 /* add data from the last sector */
617 if (count > 0) {
618 if (bdrv_read(bs, sector_num, tmp_buf, 1) < 0)
619 return -EIO;
620 memcpy(buf, tmp_buf, count);
622 return count1;
625 static int bdrv_pwrite_em(BlockDriverState *bs, int64_t offset,
626 const uint8_t *buf, int count1)
628 uint8_t tmp_buf[SECTOR_SIZE];
629 int len, nb_sectors, count;
630 int64_t sector_num;
632 count = count1;
633 /* first write to align to sector start */
634 len = (SECTOR_SIZE - offset) & (SECTOR_SIZE - 1);
635 if (len > count)
636 len = count;
637 sector_num = offset >> SECTOR_BITS;
638 if (len > 0) {
639 if (bdrv_read(bs, sector_num, tmp_buf, 1) < 0)
640 return -EIO;
641 memcpy(tmp_buf + (offset & (SECTOR_SIZE - 1)), buf, len);
642 if (bdrv_write(bs, sector_num, tmp_buf, 1) < 0)
643 return -EIO;
644 count -= len;
645 if (count == 0)
646 return count1;
647 sector_num++;
648 buf += len;
651 /* write the sectors "in place" */
652 nb_sectors = count >> SECTOR_BITS;
653 if (nb_sectors > 0) {
654 if (bdrv_write(bs, sector_num, buf, nb_sectors) < 0)
655 return -EIO;
656 sector_num += nb_sectors;
657 len = nb_sectors << SECTOR_BITS;
658 buf += len;
659 count -= len;
662 /* add data from the last sector */
663 if (count > 0) {
664 if (bdrv_read(bs, sector_num, tmp_buf, 1) < 0)
665 return -EIO;
666 memcpy(tmp_buf, buf, count);
667 if (bdrv_write(bs, sector_num, tmp_buf, 1) < 0)
668 return -EIO;
670 return count1;
674 * Read with byte offsets (needed only for file protocols)
676 int bdrv_pread(BlockDriverState *bs, int64_t offset,
677 void *buf1, int count1)
679 BlockDriver *drv = bs->drv;
681 if (!drv)
682 return -ENOMEDIUM;
683 if (!drv->bdrv_pread)
684 return bdrv_pread_em(bs, offset, buf1, count1);
685 return drv->bdrv_pread(bs, offset, buf1, count1);
689 * Write with byte offsets (needed only for file protocols)
691 int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
692 const void *buf1, int count1)
694 BlockDriver *drv = bs->drv;
696 if (!drv)
697 return -ENOMEDIUM;
698 if (!drv->bdrv_pwrite)
699 return bdrv_pwrite_em(bs, offset, buf1, count1);
700 return drv->bdrv_pwrite(bs, offset, buf1, count1);
704 * Truncate file to 'offset' bytes (needed only for file protocols)
706 int bdrv_truncate(BlockDriverState *bs, int64_t offset)
708 BlockDriver *drv = bs->drv;
709 if (!drv)
710 return -ENOMEDIUM;
711 if (!drv->bdrv_truncate)
712 return -ENOTSUP;
713 return drv->bdrv_truncate(bs, offset);
717 * Length of a file in bytes. Return < 0 if error or unknown.
719 int64_t bdrv_getlength(BlockDriverState *bs)
721 BlockDriver *drv = bs->drv;
722 if (!drv)
723 return -ENOMEDIUM;
724 if (!drv->bdrv_getlength) {
725 /* legacy mode */
726 return bs->total_sectors * SECTOR_SIZE;
728 return drv->bdrv_getlength(bs);
731 /* return 0 as number of sectors if no device present or error */
732 void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr)
734 int64_t length;
735 length = bdrv_getlength(bs);
736 if (length < 0)
737 length = 0;
738 else
739 length = length >> SECTOR_BITS;
740 *nb_sectors_ptr = length;
743 /* force a given boot sector. */
744 void bdrv_set_boot_sector(BlockDriverState *bs, const uint8_t *data, int size)
746 bs->boot_sector_enabled = 1;
747 if (size > 512)
748 size = 512;
749 memcpy(bs->boot_sector_data, data, size);
750 memset(bs->boot_sector_data + size, 0, 512 - size);
753 struct partition {
754 uint8_t boot_ind; /* 0x80 - active */
755 uint8_t head; /* starting head */
756 uint8_t sector; /* starting sector */
757 uint8_t cyl; /* starting cylinder */
758 uint8_t sys_ind; /* What partition type */
759 uint8_t end_head; /* end head */
760 uint8_t end_sector; /* end sector */
761 uint8_t end_cyl; /* end cylinder */
762 uint32_t start_sect; /* starting sector counting from 0 */
763 uint32_t nr_sects; /* nr of sectors in partition */
764 } __attribute__((packed));
766 /* try to guess the disk logical geometry from the MSDOS partition table. Return 0 if OK, -1 if could not guess */
767 static int guess_disk_lchs(BlockDriverState *bs,
768 int *pcylinders, int *pheads, int *psectors)
770 uint8_t *buf;
771 int ret, i, heads, sectors, cylinders;
772 struct partition *p;
773 uint32_t nr_sects;
774 int64_t nb_sectors;
776 buf = qemu_memalign(512, 512);
777 if (buf == NULL)
778 return -1;
780 bdrv_get_geometry(bs, &nb_sectors);
782 ret = bdrv_read(bs, 0, buf, 1);
783 if (ret < 0)
784 return -1;
785 /* test msdos magic */
786 if (buf[510] != 0x55 || buf[511] != 0xaa) {
787 qemu_free(buf);
788 return -1;
790 for(i = 0; i < 4; i++) {
791 p = ((struct partition *)(buf + 0x1be)) + i;
792 nr_sects = le32_to_cpu(p->nr_sects);
793 if (nr_sects && p->end_head) {
794 /* We make the assumption that the partition terminates on
795 a cylinder boundary */
796 heads = p->end_head + 1;
797 sectors = p->end_sector & 63;
798 if (sectors == 0)
799 continue;
800 cylinders = nb_sectors / (heads * sectors);
801 if (cylinders < 1 || cylinders > 16383)
802 continue;
803 *pheads = heads;
804 *psectors = sectors;
805 *pcylinders = cylinders;
806 #if 0
807 printf("guessed geometry: LCHS=%d %d %d\n",
808 cylinders, heads, sectors);
809 #endif
810 qemu_free(buf);
811 return 0;
814 qemu_free(buf);
815 return -1;
818 void bdrv_guess_geometry(BlockDriverState *bs, int *pcyls, int *pheads, int *psecs)
820 int translation, lba_detected = 0;
821 int cylinders, heads, secs;
822 int64_t nb_sectors;
824 /* if a geometry hint is available, use it */
825 bdrv_get_geometry(bs, &nb_sectors);
826 bdrv_get_geometry_hint(bs, &cylinders, &heads, &secs);
827 translation = bdrv_get_translation_hint(bs);
828 if (cylinders != 0) {
829 *pcyls = cylinders;
830 *pheads = heads;
831 *psecs = secs;
832 } else {
833 if (guess_disk_lchs(bs, &cylinders, &heads, &secs) == 0) {
834 if (heads > 16) {
835 /* if heads > 16, it means that a BIOS LBA
836 translation was active, so the default
837 hardware geometry is OK */
838 lba_detected = 1;
839 goto default_geometry;
840 } else {
841 *pcyls = cylinders;
842 *pheads = heads;
843 *psecs = secs;
844 /* disable any translation to be in sync with
845 the logical geometry */
846 if (translation == BIOS_ATA_TRANSLATION_AUTO) {
847 bdrv_set_translation_hint(bs,
848 BIOS_ATA_TRANSLATION_NONE);
851 } else {
852 default_geometry:
853 /* if no geometry, use a standard physical disk geometry */
854 cylinders = nb_sectors / (16 * 63);
856 if (cylinders > 16383)
857 cylinders = 16383;
858 else if (cylinders < 2)
859 cylinders = 2;
860 *pcyls = cylinders;
861 *pheads = 16;
862 *psecs = 63;
863 if ((lba_detected == 1) && (translation == BIOS_ATA_TRANSLATION_AUTO)) {
864 if ((*pcyls * *pheads) <= 131072) {
865 bdrv_set_translation_hint(bs,
866 BIOS_ATA_TRANSLATION_LARGE);
867 } else {
868 bdrv_set_translation_hint(bs,
869 BIOS_ATA_TRANSLATION_LBA);
873 bdrv_set_geometry_hint(bs, *pcyls, *pheads, *psecs);
877 void bdrv_set_geometry_hint(BlockDriverState *bs,
878 int cyls, int heads, int secs)
880 bs->cyls = cyls;
881 bs->heads = heads;
882 bs->secs = secs;
885 void bdrv_set_type_hint(BlockDriverState *bs, int type)
887 bs->type = type;
888 bs->removable = ((type == BDRV_TYPE_CDROM ||
889 type == BDRV_TYPE_FLOPPY));
892 void bdrv_set_translation_hint(BlockDriverState *bs, int translation)
894 bs->translation = translation;
897 void bdrv_get_geometry_hint(BlockDriverState *bs,
898 int *pcyls, int *pheads, int *psecs)
900 *pcyls = bs->cyls;
901 *pheads = bs->heads;
902 *psecs = bs->secs;
905 int bdrv_get_type_hint(BlockDriverState *bs)
907 return bs->type;
910 int bdrv_get_translation_hint(BlockDriverState *bs)
912 return bs->translation;
915 int bdrv_is_removable(BlockDriverState *bs)
917 return bs->removable;
920 int bdrv_is_read_only(BlockDriverState *bs)
922 return bs->read_only;
925 int bdrv_is_sg(BlockDriverState *bs)
927 return bs->sg;
930 /* XXX: no longer used */
931 void bdrv_set_change_cb(BlockDriverState *bs,
932 void (*change_cb)(void *opaque), void *opaque)
934 bs->change_cb = change_cb;
935 bs->change_opaque = opaque;
938 int bdrv_is_encrypted(BlockDriverState *bs)
940 if (bs->backing_hd && bs->backing_hd->encrypted)
941 return 1;
942 return bs->encrypted;
945 int bdrv_set_key(BlockDriverState *bs, const char *key)
947 int ret;
948 if (bs->backing_hd && bs->backing_hd->encrypted) {
949 ret = bdrv_set_key(bs->backing_hd, key);
950 if (ret < 0)
951 return ret;
952 if (!bs->encrypted)
953 return 0;
955 if (!bs->encrypted || !bs->drv || !bs->drv->bdrv_set_key)
956 return -1;
957 return bs->drv->bdrv_set_key(bs, key);
960 void bdrv_get_format(BlockDriverState *bs, char *buf, int buf_size)
962 if (!bs->drv) {
963 buf[0] = '\0';
964 } else {
965 pstrcpy(buf, buf_size, bs->drv->format_name);
969 void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
970 void *opaque)
972 BlockDriver *drv;
974 for (drv = first_drv; drv != NULL; drv = drv->next) {
975 it(opaque, drv->format_name);
979 BlockDriverState *bdrv_find(const char *name)
981 BlockDriverState *bs;
983 for (bs = bdrv_first; bs != NULL; bs = bs->next) {
984 if (!strcmp(name, bs->device_name))
985 return bs;
987 return NULL;
990 void bdrv_iterate(void (*it)(void *opaque, const char *name), void *opaque)
992 BlockDriverState *bs;
994 for (bs = bdrv_first; bs != NULL; bs = bs->next) {
995 it(opaque, bs->device_name);
999 const char *bdrv_get_device_name(BlockDriverState *bs)
1001 return bs->device_name;
1004 void bdrv_flush(BlockDriverState *bs)
1006 if (bs->drv->bdrv_flush)
1007 bs->drv->bdrv_flush(bs);
1008 if (bs->backing_hd)
1009 bdrv_flush(bs->backing_hd);
1012 void bdrv_iterate_writeable(void (*it)(BlockDriverState *bs))
1014 BlockDriverState *bs;
1016 for (bs = bdrv_first; bs != NULL; bs = bs->next)
1017 if (bs->drv && !bdrv_is_read_only(bs) &&
1018 (!bdrv_is_removable(bs) || bdrv_is_inserted(bs)))
1019 it(bs);
1022 void bdrv_flush_all(void)
1024 bdrv_iterate_writeable(bdrv_flush);
1027 #ifndef QEMU_IMG
1028 void bdrv_info(void)
1030 BlockDriverState *bs;
1032 for (bs = bdrv_first; bs != NULL; bs = bs->next) {
1033 term_printf("%s:", bs->device_name);
1034 term_printf(" type=");
1035 switch(bs->type) {
1036 case BDRV_TYPE_HD:
1037 term_printf("hd");
1038 break;
1039 case BDRV_TYPE_CDROM:
1040 term_printf("cdrom");
1041 break;
1042 case BDRV_TYPE_FLOPPY:
1043 term_printf("floppy");
1044 break;
1046 term_printf(" removable=%d", bs->removable);
1047 if (bs->removable) {
1048 term_printf(" locked=%d", bs->locked);
1050 if (bs->drv) {
1051 term_printf(" file=");
1052 term_print_filename(bs->filename);
1053 if (bs->backing_file[0] != '\0') {
1054 term_printf(" backing_file=");
1055 term_print_filename(bs->backing_file);
1057 term_printf(" ro=%d", bs->read_only);
1058 term_printf(" drv=%s", bs->drv->format_name);
1059 if (bs->encrypted)
1060 term_printf(" encrypted");
1061 } else {
1062 term_printf(" [not inserted]");
1064 term_printf("\n");
1068 /* The "info blockstats" command. */
1069 void bdrv_info_stats (void)
1071 BlockDriverState *bs;
1073 for (bs = bdrv_first; bs != NULL; bs = bs->next) {
1074 term_printf ("%s:"
1075 " rd_bytes=%" PRIu64
1076 " wr_bytes=%" PRIu64
1077 " rd_operations=%" PRIu64
1078 " wr_operations=%" PRIu64
1079 "\n",
1080 bs->device_name,
1081 bs->rd_bytes, bs->wr_bytes,
1082 bs->rd_ops, bs->wr_ops);
1085 #endif
1087 void bdrv_get_backing_filename(BlockDriverState *bs,
1088 char *filename, int filename_size)
1090 if (!bs->backing_hd) {
1091 pstrcpy(filename, filename_size, "");
1092 } else {
1093 pstrcpy(filename, filename_size, bs->backing_file);
1097 int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
1098 const uint8_t *buf, int nb_sectors)
1100 BlockDriver *drv = bs->drv;
1101 if (!drv)
1102 return -ENOMEDIUM;
1103 if (!drv->bdrv_write_compressed)
1104 return -ENOTSUP;
1105 return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
1108 int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
1110 BlockDriver *drv = bs->drv;
1111 if (!drv)
1112 return -ENOMEDIUM;
1113 if (!drv->bdrv_get_info)
1114 return -ENOTSUP;
1115 memset(bdi, 0, sizeof(*bdi));
1116 return drv->bdrv_get_info(bs, bdi);
1119 /**************************************************************/
1120 /* handling of snapshots */
1122 int bdrv_snapshot_create(BlockDriverState *bs,
1123 QEMUSnapshotInfo *sn_info)
1125 BlockDriver *drv = bs->drv;
1126 if (!drv)
1127 return -ENOMEDIUM;
1128 if (!drv->bdrv_snapshot_create)
1129 return -ENOTSUP;
1130 return drv->bdrv_snapshot_create(bs, sn_info);
1133 int bdrv_snapshot_goto(BlockDriverState *bs,
1134 const char *snapshot_id)
1136 BlockDriver *drv = bs->drv;
1137 if (!drv)
1138 return -ENOMEDIUM;
1139 if (!drv->bdrv_snapshot_goto)
1140 return -ENOTSUP;
1141 return drv->bdrv_snapshot_goto(bs, snapshot_id);
1144 int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
1146 BlockDriver *drv = bs->drv;
1147 if (!drv)
1148 return -ENOMEDIUM;
1149 if (!drv->bdrv_snapshot_delete)
1150 return -ENOTSUP;
1151 return drv->bdrv_snapshot_delete(bs, snapshot_id);
1154 int bdrv_snapshot_list(BlockDriverState *bs,
1155 QEMUSnapshotInfo **psn_info)
1157 BlockDriver *drv = bs->drv;
1158 if (!drv)
1159 return -ENOMEDIUM;
1160 if (!drv->bdrv_snapshot_list)
1161 return -ENOTSUP;
1162 return drv->bdrv_snapshot_list(bs, psn_info);
1165 #define NB_SUFFIXES 4
1167 char *get_human_readable_size(char *buf, int buf_size, int64_t size)
1169 static const char suffixes[NB_SUFFIXES] = "KMGT";
1170 int64_t base;
1171 int i;
1173 if (size <= 999) {
1174 snprintf(buf, buf_size, "%" PRId64, size);
1175 } else {
1176 base = 1024;
1177 for(i = 0; i < NB_SUFFIXES; i++) {
1178 if (size < (10 * base)) {
1179 snprintf(buf, buf_size, "%0.1f%c",
1180 (double)size / base,
1181 suffixes[i]);
1182 break;
1183 } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
1184 snprintf(buf, buf_size, "%" PRId64 "%c",
1185 ((size + (base >> 1)) / base),
1186 suffixes[i]);
1187 break;
1189 base = base * 1024;
1192 return buf;
1195 char *bdrv_snapshot_dump(char *buf, int buf_size, QEMUSnapshotInfo *sn)
1197 char buf1[128], date_buf[128], clock_buf[128];
1198 #ifdef _WIN32
1199 struct tm *ptm;
1200 #else
1201 struct tm tm;
1202 #endif
1203 time_t ti;
1204 int64_t secs;
1206 if (!sn) {
1207 snprintf(buf, buf_size,
1208 "%-10s%-20s%7s%20s%15s",
1209 "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
1210 } else {
1211 ti = sn->date_sec;
1212 #ifdef _WIN32
1213 ptm = localtime(&ti);
1214 strftime(date_buf, sizeof(date_buf),
1215 "%Y-%m-%d %H:%M:%S", ptm);
1216 #else
1217 localtime_r(&ti, &tm);
1218 strftime(date_buf, sizeof(date_buf),
1219 "%Y-%m-%d %H:%M:%S", &tm);
1220 #endif
1221 secs = sn->vm_clock_nsec / 1000000000;
1222 snprintf(clock_buf, sizeof(clock_buf),
1223 "%02d:%02d:%02d.%03d",
1224 (int)(secs / 3600),
1225 (int)((secs / 60) % 60),
1226 (int)(secs % 60),
1227 (int)((sn->vm_clock_nsec / 1000000) % 1000));
1228 snprintf(buf, buf_size,
1229 "%-10s%-20s%7s%20s%15s",
1230 sn->id_str, sn->name,
1231 get_human_readable_size(buf1, sizeof(buf1), sn->vm_state_size),
1232 date_buf,
1233 clock_buf);
1235 return buf;
1239 /**************************************************************/
1240 /* async I/Os */
1242 BlockDriverAIOCB *bdrv_aio_read(BlockDriverState *bs, int64_t sector_num,
1243 uint8_t *buf, int nb_sectors,
1244 BlockDriverCompletionFunc *cb, void *opaque)
1246 BlockDriver *drv = bs->drv;
1247 BlockDriverAIOCB *ret;
1249 if (!drv)
1250 return NULL;
1252 /* XXX: we assume that nb_sectors == 0 is suppored by the async read */
1253 if (sector_num == 0 && bs->boot_sector_enabled && nb_sectors > 0) {
1254 memcpy(buf, bs->boot_sector_data, 512);
1255 sector_num++;
1256 nb_sectors--;
1257 buf += 512;
1260 ret = drv->bdrv_aio_read(bs, sector_num, buf, nb_sectors, cb, opaque);
1262 if (ret) {
1263 /* Update stats even though technically transfer has not happened. */
1264 bs->rd_bytes += (unsigned) nb_sectors * SECTOR_SIZE;
1265 bs->rd_ops ++;
1268 return ret;
1271 BlockDriverAIOCB *bdrv_aio_write(BlockDriverState *bs, int64_t sector_num,
1272 const uint8_t *buf, int nb_sectors,
1273 BlockDriverCompletionFunc *cb, void *opaque)
1275 BlockDriver *drv = bs->drv;
1276 BlockDriverAIOCB *ret;
1278 if (!drv)
1279 return NULL;
1280 if (bs->read_only)
1281 return NULL;
1282 if (sector_num == 0 && bs->boot_sector_enabled && nb_sectors > 0) {
1283 memcpy(bs->boot_sector_data, buf, 512);
1286 ret = drv->bdrv_aio_write(bs, sector_num, buf, nb_sectors, cb, opaque);
1288 if (ret) {
1289 /* Update stats even though technically transfer has not happened. */
1290 bs->wr_bytes += (unsigned) nb_sectors * SECTOR_SIZE;
1291 bs->wr_ops ++;
1294 return ret;
1297 void bdrv_aio_cancel(BlockDriverAIOCB *acb)
1299 BlockDriver *drv = acb->bs->drv;
1301 drv->bdrv_aio_cancel(acb);
1305 /**************************************************************/
1306 /* async block device emulation */
1308 #ifdef QEMU_IMG
1309 static BlockDriverAIOCB *bdrv_aio_read_em(BlockDriverState *bs,
1310 int64_t sector_num, uint8_t *buf, int nb_sectors,
1311 BlockDriverCompletionFunc *cb, void *opaque)
1313 int ret;
1314 ret = bdrv_read(bs, sector_num, buf, nb_sectors);
1315 cb(opaque, ret);
1316 return NULL;
1319 static BlockDriverAIOCB *bdrv_aio_write_em(BlockDriverState *bs,
1320 int64_t sector_num, const uint8_t *buf, int nb_sectors,
1321 BlockDriverCompletionFunc *cb, void *opaque)
1323 int ret;
1324 ret = bdrv_write(bs, sector_num, buf, nb_sectors);
1325 cb(opaque, ret);
1326 return NULL;
1329 static void bdrv_aio_cancel_em(BlockDriverAIOCB *acb)
1332 #else
1333 static void bdrv_aio_bh_cb(void *opaque)
1335 BlockDriverAIOCBSync *acb = opaque;
1336 acb->common.cb(acb->common.opaque, acb->ret);
1337 qemu_aio_release(acb);
1340 static BlockDriverAIOCB *bdrv_aio_read_em(BlockDriverState *bs,
1341 int64_t sector_num, uint8_t *buf, int nb_sectors,
1342 BlockDriverCompletionFunc *cb, void *opaque)
1344 BlockDriverAIOCBSync *acb;
1345 int ret;
1347 acb = qemu_aio_get(bs, cb, opaque);
1348 if (!acb->bh)
1349 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
1350 ret = bdrv_read(bs, sector_num, buf, nb_sectors);
1351 acb->ret = ret;
1352 qemu_bh_schedule(acb->bh);
1353 return &acb->common;
1356 static BlockDriverAIOCB *bdrv_aio_write_em(BlockDriverState *bs,
1357 int64_t sector_num, const uint8_t *buf, int nb_sectors,
1358 BlockDriverCompletionFunc *cb, void *opaque)
1360 BlockDriverAIOCBSync *acb;
1361 int ret;
1363 acb = qemu_aio_get(bs, cb, opaque);
1364 if (!acb->bh)
1365 acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb);
1366 ret = bdrv_write(bs, sector_num, buf, nb_sectors);
1367 acb->ret = ret;
1368 qemu_bh_schedule(acb->bh);
1369 return &acb->common;
1372 static void bdrv_aio_cancel_em(BlockDriverAIOCB *blockacb)
1374 BlockDriverAIOCBSync *acb = (BlockDriverAIOCBSync *)blockacb;
1375 qemu_bh_cancel(acb->bh);
1376 qemu_aio_release(acb);
1378 #endif /* !QEMU_IMG */
1380 /**************************************************************/
1381 /* sync block device emulation */
1383 static void bdrv_rw_em_cb(void *opaque, int ret)
1385 *(int *)opaque = ret;
1388 #define NOT_DONE 0x7fffffff
1390 static int bdrv_read_em(BlockDriverState *bs, int64_t sector_num,
1391 uint8_t *buf, int nb_sectors)
1393 int async_ret;
1394 BlockDriverAIOCB *acb;
1396 async_ret = NOT_DONE;
1397 qemu_aio_wait_start();
1398 acb = bdrv_aio_read(bs, sector_num, buf, nb_sectors,
1399 bdrv_rw_em_cb, &async_ret);
1400 if (acb == NULL) {
1401 qemu_aio_wait_end();
1402 return -1;
1404 while (async_ret == NOT_DONE) {
1405 qemu_aio_wait();
1407 qemu_aio_wait_end();
1408 return async_ret;
1411 static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
1412 const uint8_t *buf, int nb_sectors)
1414 int async_ret;
1415 BlockDriverAIOCB *acb;
1417 async_ret = NOT_DONE;
1418 qemu_aio_wait_start();
1419 acb = bdrv_aio_write(bs, sector_num, buf, nb_sectors,
1420 bdrv_rw_em_cb, &async_ret);
1421 if (acb == NULL) {
1422 qemu_aio_wait_end();
1423 return -1;
1425 while (async_ret == NOT_DONE) {
1426 qemu_aio_wait();
1428 qemu_aio_wait_end();
1429 return async_ret;
1432 void bdrv_init(void)
1434 bdrv_register(&bdrv_raw);
1435 bdrv_register(&bdrv_host_device);
1436 #ifndef _WIN32
1437 bdrv_register(&bdrv_cow);
1438 #endif
1439 bdrv_register(&bdrv_qcow);
1440 bdrv_register(&bdrv_vmdk);
1441 bdrv_register(&bdrv_cloop);
1442 bdrv_register(&bdrv_dmg);
1443 bdrv_register(&bdrv_bochs);
1444 bdrv_register(&bdrv_vpc);
1445 bdrv_register(&bdrv_vvfat);
1446 bdrv_register(&bdrv_qcow2);
1447 bdrv_register(&bdrv_parallels);
1450 void *qemu_aio_get(BlockDriverState *bs, BlockDriverCompletionFunc *cb,
1451 void *opaque)
1453 BlockDriver *drv;
1454 BlockDriverAIOCB *acb;
1456 drv = bs->drv;
1457 if (drv->free_aiocb) {
1458 acb = drv->free_aiocb;
1459 drv->free_aiocb = acb->next;
1460 } else {
1461 acb = qemu_mallocz(drv->aiocb_size);
1462 if (!acb)
1463 return NULL;
1465 acb->bs = bs;
1466 acb->cb = cb;
1467 acb->opaque = opaque;
1468 return acb;
1471 void qemu_aio_release(void *p)
1473 BlockDriverAIOCB *acb = p;
1474 BlockDriver *drv = acb->bs->drv;
1475 acb->next = drv->free_aiocb;
1476 drv->free_aiocb = acb;
1479 /**************************************************************/
1480 /* removable device support */
1483 * Return TRUE if the media is present
1485 int bdrv_is_inserted(BlockDriverState *bs)
1487 BlockDriver *drv = bs->drv;
1488 int ret;
1489 if (!drv)
1490 return 0;
1491 if (!drv->bdrv_is_inserted)
1492 return 1;
1493 ret = drv->bdrv_is_inserted(bs);
1494 return ret;
1498 * Return TRUE if the media changed since the last call to this
1499 * function. It is currently only used for floppy disks
1501 int bdrv_media_changed(BlockDriverState *bs)
1503 BlockDriver *drv = bs->drv;
1504 int ret;
1506 if (!drv || !drv->bdrv_media_changed)
1507 ret = -ENOTSUP;
1508 else
1509 ret = drv->bdrv_media_changed(bs);
1510 if (ret == -ENOTSUP)
1511 ret = bs->media_changed;
1512 bs->media_changed = 0;
1513 return ret;
1517 * If eject_flag is TRUE, eject the media. Otherwise, close the tray
1519 void bdrv_eject(BlockDriverState *bs, int eject_flag)
1521 BlockDriver *drv = bs->drv;
1522 int ret;
1524 if (!drv || !drv->bdrv_eject) {
1525 ret = -ENOTSUP;
1526 } else {
1527 ret = drv->bdrv_eject(bs, eject_flag);
1529 if (ret == -ENOTSUP) {
1530 if (eject_flag)
1531 bdrv_close(bs);
1535 int bdrv_is_locked(BlockDriverState *bs)
1537 return bs->locked;
1541 * Lock or unlock the media (if it is locked, the user won't be able
1542 * to eject it manually).
1544 void bdrv_set_locked(BlockDriverState *bs, int locked)
1546 BlockDriver *drv = bs->drv;
1548 bs->locked = locked;
1549 if (drv && drv->bdrv_set_locked) {
1550 drv->bdrv_set_locked(bs, locked);
1554 /* needed for generic scsi interface */
1556 int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
1558 BlockDriver *drv = bs->drv;
1560 if (drv && drv->bdrv_ioctl)
1561 return drv->bdrv_ioctl(bs, req, buf);
1562 return -ENOTSUP;