Merge commit 'v0.11.0-rc1' into stable-0.11
[qemu-kvm/fedora.git] / block / raw-posix.c
blob74821506a97fd6377963dcd46a46684d042348e6
1 /*
2 * Block driver for RAW files (posix)
4 * Copyright (c) 2006 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
24 #include "qemu-common.h"
25 #include "qemu-timer.h"
26 #include "qemu-char.h"
27 #include "qemu-log.h"
28 #include "block_int.h"
29 #include "module.h"
30 #include "compatfd.h"
31 #include <assert.h>
32 #ifdef CONFIG_AIO
33 #include "posix-aio-compat.h"
34 #endif
36 #ifdef CONFIG_COCOA
37 #include <paths.h>
38 #include <sys/param.h>
39 #include <IOKit/IOKitLib.h>
40 #include <IOKit/IOBSD.h>
41 #include <IOKit/storage/IOMediaBSDClient.h>
42 #include <IOKit/storage/IOMedia.h>
43 #include <IOKit/storage/IOCDMedia.h>
44 //#include <IOKit/storage/IOCDTypes.h>
45 #include <CoreFoundation/CoreFoundation.h>
46 #endif
48 #ifdef __sun__
49 #define _POSIX_PTHREAD_SEMANTICS 1
50 #include <signal.h>
51 #include <sys/dkio.h>
52 #endif
53 #ifdef __linux__
54 #include <sys/ioctl.h>
55 #include <linux/cdrom.h>
56 #include <linux/fd.h>
57 #endif
58 #ifdef __FreeBSD__
59 #include <signal.h>
60 #include <sys/disk.h>
61 #include <sys/cdio.h>
62 #endif
64 #ifdef __OpenBSD__
65 #include <sys/ioctl.h>
66 #include <sys/disklabel.h>
67 #include <sys/dkio.h>
68 #endif
70 #ifdef __DragonFly__
71 #include <sys/ioctl.h>
72 #include <sys/diskslice.h>
73 #endif
75 //#define DEBUG_FLOPPY
77 //#define DEBUG_BLOCK
78 #if defined(DEBUG_BLOCK)
79 #define DEBUG_BLOCK_PRINT(formatCstr, ...) do { if (qemu_log_enabled()) \
80 { qemu_log(formatCstr, ## __VA_ARGS__); qemu_log_flush(); } } while (0)
81 #else
82 #define DEBUG_BLOCK_PRINT(formatCstr, ...)
83 #endif
85 /* OS X does not have O_DSYNC */
86 #ifndef O_DSYNC
87 #ifdef O_SYNC
88 #define O_DSYNC O_SYNC
89 #elif defined(O_FSYNC)
90 #define O_DSYNC O_FSYNC
91 #endif
92 #endif
94 /* Approximate O_DIRECT with O_DSYNC if O_DIRECT isn't available */
95 #ifndef O_DIRECT
96 #define O_DIRECT O_DSYNC
97 #endif
99 #define FTYPE_FILE 0
100 #define FTYPE_CD 1
101 #define FTYPE_FD 2
103 #define ALIGNED_BUFFER_SIZE (32 * 512)
105 /* if the FD is not accessed during that time (in ms), we try to
106 reopen it to see if the disk has been changed */
107 #define FD_OPEN_TIMEOUT 1000
109 typedef struct BDRVRawState {
110 int fd;
111 int type;
112 unsigned int lseek_err_cnt;
113 int open_flags;
114 #if defined(__linux__)
115 /* linux floppy specific */
116 int64_t fd_open_time;
117 int64_t fd_error_time;
118 int fd_got_error;
119 int fd_media_changed;
120 #endif
121 uint8_t* aligned_buf;
122 } BDRVRawState;
124 static int posix_aio_init(void);
126 static int fd_open(BlockDriverState *bs);
127 static int64_t raw_getlength(BlockDriverState *bs);
129 #if defined(__FreeBSD__)
130 static int cdrom_reopen(BlockDriverState *bs);
131 #endif
133 static int raw_open_common(BlockDriverState *bs, const char *filename,
134 int bdrv_flags, int open_flags)
136 BDRVRawState *s = bs->opaque;
137 int fd, ret;
139 posix_aio_init();
141 s->lseek_err_cnt = 0;
143 s->open_flags = open_flags | O_BINARY;
144 s->open_flags &= ~O_ACCMODE;
145 if ((bdrv_flags & BDRV_O_ACCESS) == BDRV_O_RDWR) {
146 s->open_flags |= O_RDWR;
147 } else {
148 s->open_flags |= O_RDONLY;
149 bs->read_only = 1;
152 /* Use O_DSYNC for write-through caching, no flags for write-back caching,
153 * and O_DIRECT for no caching. */
154 if ((bdrv_flags & BDRV_O_NOCACHE))
155 s->open_flags |= O_DIRECT;
156 else if (!(bdrv_flags & BDRV_O_CACHE_WB))
157 s->open_flags |= O_DSYNC;
159 s->fd = -1;
160 fd = open(filename, s->open_flags, 0644);
161 if (fd < 0) {
162 ret = -errno;
163 if (ret == -EROFS)
164 ret = -EACCES;
165 return ret;
167 s->fd = fd;
168 s->aligned_buf = NULL;
169 if ((bdrv_flags & BDRV_O_NOCACHE)) {
170 s->aligned_buf = qemu_blockalign(bs, ALIGNED_BUFFER_SIZE);
171 if (s->aligned_buf == NULL) {
172 ret = -errno;
173 close(fd);
174 return ret;
177 return 0;
180 static int raw_open(BlockDriverState *bs, const char *filename, int flags)
182 BDRVRawState *s = bs->opaque;
183 int open_flags = 0;
185 s->type = FTYPE_FILE;
186 if (flags & BDRV_O_CREAT)
187 open_flags = O_CREAT | O_TRUNC;
189 return raw_open_common(bs, filename, flags, open_flags);
192 /* XXX: use host sector size if necessary with:
193 #ifdef DIOCGSECTORSIZE
195 unsigned int sectorsize = 512;
196 if (!ioctl(fd, DIOCGSECTORSIZE, &sectorsize) &&
197 sectorsize > bufsize)
198 bufsize = sectorsize;
200 #endif
201 #ifdef CONFIG_COCOA
202 u_int32_t blockSize = 512;
203 if ( !ioctl( fd, DKIOCGETBLOCKSIZE, &blockSize ) && blockSize > bufsize) {
204 bufsize = blockSize;
206 #endif
210 * offset and count are in bytes, but must be multiples of 512 for files
211 * opened with O_DIRECT. buf must be aligned to 512 bytes then.
213 * This function may be called without alignment if the caller ensures
214 * that O_DIRECT is not in effect.
216 static int raw_pread_aligned(BlockDriverState *bs, int64_t offset,
217 uint8_t *buf, int count)
219 BDRVRawState *s = bs->opaque;
220 int ret;
222 ret = fd_open(bs);
223 if (ret < 0)
224 return ret;
226 if (offset >= 0 && lseek(s->fd, offset, SEEK_SET) == (off_t)-1) {
227 ++(s->lseek_err_cnt);
228 if(s->lseek_err_cnt <= 10) {
229 DEBUG_BLOCK_PRINT("raw_pread(%d:%s, %" PRId64 ", %p, %d) [%" PRId64
230 "] lseek failed : %d = %s\n",
231 s->fd, bs->filename, offset, buf, count,
232 bs->total_sectors, errno, strerror(errno));
234 return -1;
236 s->lseek_err_cnt=0;
238 ret = read(s->fd, buf, count);
239 if (ret == count)
240 goto label__raw_read__success;
242 /* Allow reads beyond the end (needed for pwrite) */
243 if ((ret == 0) && bs->growable) {
244 int64_t size = raw_getlength(bs);
245 if (offset >= size) {
246 memset(buf, 0, count);
247 ret = count;
248 goto label__raw_read__success;
252 DEBUG_BLOCK_PRINT("raw_pread(%d:%s, %" PRId64 ", %p, %d) [%" PRId64
253 "] read failed %d : %d = %s\n",
254 s->fd, bs->filename, offset, buf, count,
255 bs->total_sectors, ret, errno, strerror(errno));
257 /* Try harder for CDrom. */
258 if (bs->type == BDRV_TYPE_CDROM) {
259 lseek(s->fd, offset, SEEK_SET);
260 ret = read(s->fd, buf, count);
261 if (ret == count)
262 goto label__raw_read__success;
263 lseek(s->fd, offset, SEEK_SET);
264 ret = read(s->fd, buf, count);
265 if (ret == count)
266 goto label__raw_read__success;
268 DEBUG_BLOCK_PRINT("raw_pread(%d:%s, %" PRId64 ", %p, %d) [%" PRId64
269 "] retry read failed %d : %d = %s\n",
270 s->fd, bs->filename, offset, buf, count,
271 bs->total_sectors, ret, errno, strerror(errno));
274 label__raw_read__success:
276 return (ret < 0) ? -errno : ret;
280 * offset and count are in bytes, but must be multiples of 512 for files
281 * opened with O_DIRECT. buf must be aligned to 512 bytes then.
283 * This function may be called without alignment if the caller ensures
284 * that O_DIRECT is not in effect.
286 static int raw_pwrite_aligned(BlockDriverState *bs, int64_t offset,
287 const uint8_t *buf, int count)
289 BDRVRawState *s = bs->opaque;
290 int ret;
292 ret = fd_open(bs);
293 if (ret < 0)
294 return -errno;
296 if (offset >= 0 && lseek(s->fd, offset, SEEK_SET) == (off_t)-1) {
297 ++(s->lseek_err_cnt);
298 if(s->lseek_err_cnt) {
299 DEBUG_BLOCK_PRINT("raw_pwrite(%d:%s, %" PRId64 ", %p, %d) [%"
300 PRId64 "] lseek failed : %d = %s\n",
301 s->fd, bs->filename, offset, buf, count,
302 bs->total_sectors, errno, strerror(errno));
304 return -EIO;
306 s->lseek_err_cnt = 0;
308 ret = write(s->fd, buf, count);
309 if (ret == count)
310 goto label__raw_write__success;
312 DEBUG_BLOCK_PRINT("raw_pwrite(%d:%s, %" PRId64 ", %p, %d) [%" PRId64
313 "] write failed %d : %d = %s\n",
314 s->fd, bs->filename, offset, buf, count,
315 bs->total_sectors, ret, errno, strerror(errno));
317 label__raw_write__success:
319 return (ret < 0) ? -errno : ret;
324 * offset and count are in bytes and possibly not aligned. For files opened
325 * with O_DIRECT, necessary alignments are ensured before calling
326 * raw_pread_aligned to do the actual read.
328 static int raw_pread(BlockDriverState *bs, int64_t offset,
329 uint8_t *buf, int count)
331 BDRVRawState *s = bs->opaque;
332 int size, ret, shift, sum;
334 sum = 0;
336 if (s->aligned_buf != NULL) {
338 if (offset & 0x1ff) {
339 /* align offset on a 512 bytes boundary */
341 shift = offset & 0x1ff;
342 size = (shift + count + 0x1ff) & ~0x1ff;
343 if (size > ALIGNED_BUFFER_SIZE)
344 size = ALIGNED_BUFFER_SIZE;
345 ret = raw_pread_aligned(bs, offset - shift, s->aligned_buf, size);
346 if (ret < 0)
347 return ret;
349 size = 512 - shift;
350 if (size > count)
351 size = count;
352 memcpy(buf, s->aligned_buf + shift, size);
354 buf += size;
355 offset += size;
356 count -= size;
357 sum += size;
359 if (count == 0)
360 return sum;
362 if (count & 0x1ff || (uintptr_t) buf & 0x1ff) {
364 /* read on aligned buffer */
366 while (count) {
368 size = (count + 0x1ff) & ~0x1ff;
369 if (size > ALIGNED_BUFFER_SIZE)
370 size = ALIGNED_BUFFER_SIZE;
372 ret = raw_pread_aligned(bs, offset, s->aligned_buf, size);
373 if (ret < 0)
374 return ret;
376 size = ret;
377 if (size > count)
378 size = count;
380 memcpy(buf, s->aligned_buf, size);
382 buf += size;
383 offset += size;
384 count -= size;
385 sum += size;
388 return sum;
392 return raw_pread_aligned(bs, offset, buf, count) + sum;
395 static int raw_read(BlockDriverState *bs, int64_t sector_num,
396 uint8_t *buf, int nb_sectors)
398 int ret;
400 ret = raw_pread(bs, sector_num * 512, buf, nb_sectors * 512);
401 if (ret == (nb_sectors * 512))
402 ret = 0;
403 return ret;
407 * offset and count are in bytes and possibly not aligned. For files opened
408 * with O_DIRECT, necessary alignments are ensured before calling
409 * raw_pwrite_aligned to do the actual write.
411 static int raw_pwrite(BlockDriverState *bs, int64_t offset,
412 const uint8_t *buf, int count)
414 BDRVRawState *s = bs->opaque;
415 int size, ret, shift, sum;
417 sum = 0;
419 if (s->aligned_buf != NULL) {
421 if (offset & 0x1ff) {
422 /* align offset on a 512 bytes boundary */
423 shift = offset & 0x1ff;
424 ret = raw_pread_aligned(bs, offset - shift, s->aligned_buf, 512);
425 if (ret < 0)
426 return ret;
428 size = 512 - shift;
429 if (size > count)
430 size = count;
431 memcpy(s->aligned_buf + shift, buf, size);
433 ret = raw_pwrite_aligned(bs, offset - shift, s->aligned_buf, 512);
434 if (ret < 0)
435 return ret;
437 buf += size;
438 offset += size;
439 count -= size;
440 sum += size;
442 if (count == 0)
443 return sum;
445 if (count & 0x1ff || (uintptr_t) buf & 0x1ff) {
447 while ((size = (count & ~0x1ff)) != 0) {
449 if (size > ALIGNED_BUFFER_SIZE)
450 size = ALIGNED_BUFFER_SIZE;
452 memcpy(s->aligned_buf, buf, size);
454 ret = raw_pwrite_aligned(bs, offset, s->aligned_buf, size);
455 if (ret < 0)
456 return ret;
458 buf += ret;
459 offset += ret;
460 count -= ret;
461 sum += ret;
463 /* here, count < 512 because (count & ~0x1ff) == 0 */
464 if (count) {
465 ret = raw_pread_aligned(bs, offset, s->aligned_buf, 512);
466 if (ret < 0)
467 return ret;
468 memcpy(s->aligned_buf, buf, count);
470 ret = raw_pwrite_aligned(bs, offset, s->aligned_buf, 512);
471 if (ret < 0)
472 return ret;
473 if (count < ret)
474 ret = count;
476 sum += ret;
478 return sum;
481 return raw_pwrite_aligned(bs, offset, buf, count) + sum;
484 static int raw_write(BlockDriverState *bs, int64_t sector_num,
485 const uint8_t *buf, int nb_sectors)
487 int ret;
488 ret = raw_pwrite(bs, sector_num * 512, buf, nb_sectors * 512);
489 if (ret == (nb_sectors * 512))
490 ret = 0;
491 return ret;
494 #ifdef CONFIG_AIO
495 /***********************************************************/
496 /* Unix AIO using POSIX AIO */
498 typedef struct RawAIOCB {
499 BlockDriverAIOCB common;
500 struct qemu_paiocb aiocb;
501 struct RawAIOCB *next;
502 int ret;
503 } RawAIOCB;
505 typedef struct PosixAioState
507 int fd;
508 RawAIOCB *first_aio;
509 } PosixAioState;
511 static void posix_aio_read(void *opaque)
513 PosixAioState *s = opaque;
514 RawAIOCB *acb, **pacb;
515 int ret;
516 size_t offset;
517 union {
518 struct qemu_signalfd_siginfo siginfo;
519 char buf[128];
520 } sig;
522 /* try to read from signalfd, don't freak out if we can't read anything */
523 offset = 0;
524 while (offset < 128) {
525 ssize_t len;
527 len = read(s->fd, sig.buf + offset, 128 - offset);
528 if (len == -1 && errno == EINTR)
529 continue;
530 if (len == -1 && errno == EAGAIN) {
531 /* there is no natural reason for this to happen,
532 * so we'll spin hard until we get everything just
533 * to be on the safe side. */
534 if (offset > 0)
535 continue;
538 offset += len;
541 for(;;) {
542 pacb = &s->first_aio;
543 for(;;) {
544 acb = *pacb;
545 if (!acb)
546 goto the_end;
547 ret = qemu_paio_error(&acb->aiocb);
548 if (ret == ECANCELED) {
549 /* remove the request */
550 *pacb = acb->next;
551 qemu_aio_release(acb);
552 } else if (ret != EINPROGRESS) {
553 /* end of aio */
554 if (ret == 0) {
555 ret = qemu_paio_return(&acb->aiocb);
556 if (ret == acb->aiocb.aio_nbytes)
557 ret = 0;
558 else
559 ret = -EINVAL;
560 } else {
561 ret = -ret;
563 /* remove the request */
564 *pacb = acb->next;
565 /* call the callback */
566 acb->common.cb(acb->common.opaque, ret);
567 qemu_aio_release(acb);
568 break;
569 } else {
570 pacb = &acb->next;
574 the_end: ;
577 static int posix_aio_flush(void *opaque)
579 PosixAioState *s = opaque;
580 return !!s->first_aio;
583 static PosixAioState *posix_aio_state;
585 static int posix_aio_init(void)
587 sigset_t mask;
588 PosixAioState *s;
589 struct qemu_paioinit ai;
591 if (posix_aio_state)
592 return 0;
594 s = qemu_malloc(sizeof(PosixAioState));
596 /* Make sure to block AIO signal */
597 sigemptyset(&mask);
598 sigaddset(&mask, SIGUSR2);
599 sigprocmask(SIG_BLOCK, &mask, NULL);
601 s->first_aio = NULL;
602 s->fd = qemu_signalfd(&mask);
603 if (s->fd == -1) {
604 fprintf(stderr, "failed to create signalfd\n");
605 return -errno;
608 fcntl(s->fd, F_SETFL, O_NONBLOCK);
610 qemu_aio_set_fd_handler(s->fd, posix_aio_read, NULL, posix_aio_flush, s);
612 memset(&ai, 0, sizeof(ai));
613 ai.aio_threads = 64;
614 ai.aio_num = 64;
615 qemu_paio_init(&ai);
617 posix_aio_state = s;
619 return 0;
622 static void raw_aio_remove(RawAIOCB *acb)
624 RawAIOCB **pacb;
626 /* remove the callback from the queue */
627 pacb = &posix_aio_state->first_aio;
628 for(;;) {
629 if (*pacb == NULL) {
630 fprintf(stderr, "raw_aio_remove: aio request not found!\n");
631 break;
632 } else if (*pacb == acb) {
633 *pacb = acb->next;
634 qemu_aio_release(acb);
635 break;
637 pacb = &(*pacb)->next;
641 static void raw_aio_cancel(BlockDriverAIOCB *blockacb)
643 int ret;
644 RawAIOCB *acb = (RawAIOCB *)blockacb;
646 ret = qemu_paio_cancel(acb->aiocb.aio_fildes, &acb->aiocb);
647 if (ret == QEMU_PAIO_NOTCANCELED) {
648 /* fail safe: if the aio could not be canceled, we wait for
649 it */
650 while (qemu_paio_error(&acb->aiocb) == EINPROGRESS);
653 raw_aio_remove(acb);
656 static AIOPool raw_aio_pool = {
657 .aiocb_size = sizeof(RawAIOCB),
658 .cancel = raw_aio_cancel,
661 static RawAIOCB *raw_aio_setup(BlockDriverState *bs, int64_t sector_num,
662 QEMUIOVector *qiov, int nb_sectors,
663 BlockDriverCompletionFunc *cb, void *opaque)
665 BDRVRawState *s = bs->opaque;
666 RawAIOCB *acb;
668 if (fd_open(bs) < 0)
669 return NULL;
671 acb = qemu_aio_get(&raw_aio_pool, bs, cb, opaque);
672 if (!acb)
673 return NULL;
674 acb->aiocb.aio_fildes = s->fd;
675 acb->aiocb.ev_signo = SIGUSR2;
676 acb->aiocb.aio_iov = qiov->iov;
677 acb->aiocb.aio_niov = qiov->niov;
678 acb->aiocb.aio_nbytes = nb_sectors * 512;
679 acb->aiocb.aio_offset = sector_num * 512;
680 acb->aiocb.aio_flags = 0;
683 * If O_DIRECT is used the buffer needs to be aligned on a sector
684 * boundary. Tell the low level code to ensure that in case it's
685 * not done yet.
687 if (s->aligned_buf)
688 acb->aiocb.aio_flags |= QEMU_AIO_SECTOR_ALIGNED;
690 acb->next = posix_aio_state->first_aio;
691 posix_aio_state->first_aio = acb;
692 return acb;
695 static BlockDriverAIOCB *raw_aio_readv(BlockDriverState *bs,
696 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
697 BlockDriverCompletionFunc *cb, void *opaque)
699 RawAIOCB *acb;
701 acb = raw_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque);
702 if (!acb)
703 return NULL;
704 if (qemu_paio_read(&acb->aiocb) < 0) {
705 raw_aio_remove(acb);
706 return NULL;
708 return &acb->common;
711 static BlockDriverAIOCB *raw_aio_writev(BlockDriverState *bs,
712 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
713 BlockDriverCompletionFunc *cb, void *opaque)
715 RawAIOCB *acb;
717 acb = raw_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque);
718 if (!acb)
719 return NULL;
720 if (qemu_paio_write(&acb->aiocb) < 0) {
721 raw_aio_remove(acb);
722 return NULL;
724 return &acb->common;
726 #else /* CONFIG_AIO */
727 static int posix_aio_init(void)
729 return 0;
731 #endif /* CONFIG_AIO */
734 static void raw_close(BlockDriverState *bs)
736 BDRVRawState *s = bs->opaque;
737 if (s->fd >= 0) {
738 close(s->fd);
739 s->fd = -1;
740 if (s->aligned_buf != NULL)
741 qemu_free(s->aligned_buf);
745 static int raw_truncate(BlockDriverState *bs, int64_t offset)
747 BDRVRawState *s = bs->opaque;
748 if (s->type != FTYPE_FILE)
749 return -ENOTSUP;
750 if (ftruncate(s->fd, offset) < 0)
751 return -errno;
752 return 0;
755 #ifdef __OpenBSD__
756 static int64_t raw_getlength(BlockDriverState *bs)
758 BDRVRawState *s = bs->opaque;
759 int fd = s->fd;
760 struct stat st;
762 if (fstat(fd, &st))
763 return -1;
764 if (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) {
765 struct disklabel dl;
767 if (ioctl(fd, DIOCGDINFO, &dl))
768 return -1;
769 return (uint64_t)dl.d_secsize *
770 dl.d_partitions[DISKPART(st.st_rdev)].p_size;
771 } else
772 return st.st_size;
774 #else /* !__OpenBSD__ */
775 static int64_t raw_getlength(BlockDriverState *bs)
777 BDRVRawState *s = bs->opaque;
778 int fd = s->fd;
779 int64_t size;
780 #ifdef HOST_BSD
781 struct stat sb;
782 #ifdef __FreeBSD__
783 int reopened = 0;
784 #endif
785 #endif
786 #ifdef __sun__
787 struct dk_minfo minfo;
788 int rv;
789 #endif
790 int ret;
792 ret = fd_open(bs);
793 if (ret < 0)
794 return ret;
796 #ifdef HOST_BSD
797 #ifdef __FreeBSD__
798 again:
799 #endif
800 if (!fstat(fd, &sb) && (S_IFCHR & sb.st_mode)) {
801 #ifdef DIOCGMEDIASIZE
802 if (ioctl(fd, DIOCGMEDIASIZE, (off_t *)&size))
803 #elif defined(DIOCGPART)
805 struct partinfo pi;
806 if (ioctl(fd, DIOCGPART, &pi) == 0)
807 size = pi.media_size;
808 else
809 size = 0;
811 if (size == 0)
812 #endif
813 #ifdef CONFIG_COCOA
814 size = LONG_LONG_MAX;
815 #else
816 size = lseek(fd, 0LL, SEEK_END);
817 #endif
818 #ifdef __FreeBSD__
819 switch(s->type) {
820 case FTYPE_CD:
821 /* XXX FreeBSD acd returns UINT_MAX sectors for an empty drive */
822 if (size == 2048LL * (unsigned)-1)
823 size = 0;
824 /* XXX no disc? maybe we need to reopen... */
825 if (size <= 0 && !reopened && cdrom_reopen(bs) >= 0) {
826 reopened = 1;
827 goto again;
830 #endif
831 } else
832 #endif
833 #ifdef __sun__
835 * use the DKIOCGMEDIAINFO ioctl to read the size.
837 rv = ioctl ( fd, DKIOCGMEDIAINFO, &minfo );
838 if ( rv != -1 ) {
839 size = minfo.dki_lbsize * minfo.dki_capacity;
840 } else /* there are reports that lseek on some devices
841 fails, but irc discussion said that contingency
842 on contingency was overkill */
843 #endif
845 size = lseek(fd, 0, SEEK_END);
847 return size;
849 #endif
851 static int raw_create(const char *filename, QEMUOptionParameter *options)
853 int fd;
854 int result = 0;
855 int64_t total_size = 0;
857 /* Read out options */
858 while (options && options->name) {
859 if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
860 total_size = options->value.n / 512;
862 options++;
865 fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY,
866 0644);
867 if (fd < 0) {
868 result = -errno;
869 } else {
870 if (ftruncate(fd, total_size * 512) != 0) {
871 result = -errno;
873 if (close(fd) != 0) {
874 result = -errno;
877 return result;
880 static void raw_flush(BlockDriverState *bs)
882 BDRVRawState *s = bs->opaque;
883 fsync(s->fd);
887 static QEMUOptionParameter raw_create_options[] = {
889 .name = BLOCK_OPT_SIZE,
890 .type = OPT_SIZE,
891 .help = "Virtual disk size"
893 { NULL }
896 static BlockDriver bdrv_raw = {
897 .format_name = "raw",
898 .instance_size = sizeof(BDRVRawState),
899 .bdrv_probe = NULL, /* no probe for protocols */
900 .bdrv_open = raw_open,
901 .bdrv_read = raw_read,
902 .bdrv_write = raw_write,
903 .bdrv_close = raw_close,
904 .bdrv_create = raw_create,
905 .bdrv_flush = raw_flush,
907 #ifdef CONFIG_AIO
908 .bdrv_aio_readv = raw_aio_readv,
909 .bdrv_aio_writev = raw_aio_writev,
910 #endif
912 .bdrv_truncate = raw_truncate,
913 .bdrv_getlength = raw_getlength,
915 .create_options = raw_create_options,
918 /***********************************************/
919 /* host device */
921 #ifdef CONFIG_COCOA
922 static kern_return_t FindEjectableCDMedia( io_iterator_t *mediaIterator );
923 static kern_return_t GetBSDPath( io_iterator_t mediaIterator, char *bsdPath, CFIndex maxPathSize );
925 kern_return_t FindEjectableCDMedia( io_iterator_t *mediaIterator )
927 kern_return_t kernResult;
928 mach_port_t masterPort;
929 CFMutableDictionaryRef classesToMatch;
931 kernResult = IOMasterPort( MACH_PORT_NULL, &masterPort );
932 if ( KERN_SUCCESS != kernResult ) {
933 printf( "IOMasterPort returned %d\n", kernResult );
936 classesToMatch = IOServiceMatching( kIOCDMediaClass );
937 if ( classesToMatch == NULL ) {
938 printf( "IOServiceMatching returned a NULL dictionary.\n" );
939 } else {
940 CFDictionarySetValue( classesToMatch, CFSTR( kIOMediaEjectableKey ), kCFBooleanTrue );
942 kernResult = IOServiceGetMatchingServices( masterPort, classesToMatch, mediaIterator );
943 if ( KERN_SUCCESS != kernResult )
945 printf( "IOServiceGetMatchingServices returned %d\n", kernResult );
948 return kernResult;
951 kern_return_t GetBSDPath( io_iterator_t mediaIterator, char *bsdPath, CFIndex maxPathSize )
953 io_object_t nextMedia;
954 kern_return_t kernResult = KERN_FAILURE;
955 *bsdPath = '\0';
956 nextMedia = IOIteratorNext( mediaIterator );
957 if ( nextMedia )
959 CFTypeRef bsdPathAsCFString;
960 bsdPathAsCFString = IORegistryEntryCreateCFProperty( nextMedia, CFSTR( kIOBSDNameKey ), kCFAllocatorDefault, 0 );
961 if ( bsdPathAsCFString ) {
962 size_t devPathLength;
963 strcpy( bsdPath, _PATH_DEV );
964 strcat( bsdPath, "r" );
965 devPathLength = strlen( bsdPath );
966 if ( CFStringGetCString( bsdPathAsCFString, bsdPath + devPathLength, maxPathSize - devPathLength, kCFStringEncodingASCII ) ) {
967 kernResult = KERN_SUCCESS;
969 CFRelease( bsdPathAsCFString );
971 IOObjectRelease( nextMedia );
974 return kernResult;
977 #endif
979 static int hdev_probe_device(const char *filename)
981 struct stat st;
983 /* allow a dedicated CD-ROM driver to match with a higher priority */
984 if (strstart(filename, "/dev/cdrom", NULL))
985 return 50;
987 if (stat(filename, &st) >= 0 &&
988 (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode))) {
989 return 100;
992 return 0;
995 static int hdev_open(BlockDriverState *bs, const char *filename, int flags)
997 BDRVRawState *s = bs->opaque;
999 #ifdef CONFIG_COCOA
1000 if (strstart(filename, "/dev/cdrom", NULL)) {
1001 kern_return_t kernResult;
1002 io_iterator_t mediaIterator;
1003 char bsdPath[ MAXPATHLEN ];
1004 int fd;
1006 kernResult = FindEjectableCDMedia( &mediaIterator );
1007 kernResult = GetBSDPath( mediaIterator, bsdPath, sizeof( bsdPath ) );
1009 if ( bsdPath[ 0 ] != '\0' ) {
1010 strcat(bsdPath,"s0");
1011 /* some CDs don't have a partition 0 */
1012 fd = open(bsdPath, O_RDONLY | O_BINARY | O_LARGEFILE);
1013 if (fd < 0) {
1014 bsdPath[strlen(bsdPath)-1] = '1';
1015 } else {
1016 close(fd);
1018 filename = bsdPath;
1021 if ( mediaIterator )
1022 IOObjectRelease( mediaIterator );
1024 #endif
1026 s->type = FTYPE_FILE;
1027 #if defined(__linux__) && defined(CONFIG_AIO)
1028 if (strstart(filename, "/dev/sg", NULL)) {
1029 bs->sg = 1;
1031 #endif
1033 return raw_open_common(bs, filename, flags, 0);
1036 #if defined(__linux__)
1037 /* Note: we do not have a reliable method to detect if the floppy is
1038 present. The current method is to try to open the floppy at every
1039 I/O and to keep it opened during a few hundreds of ms. */
1040 static int fd_open(BlockDriverState *bs)
1042 BDRVRawState *s = bs->opaque;
1043 int last_media_present;
1045 if (s->type != FTYPE_FD)
1046 return 0;
1047 last_media_present = (s->fd >= 0);
1048 if (s->fd >= 0 &&
1049 (qemu_get_clock(rt_clock) - s->fd_open_time) >= FD_OPEN_TIMEOUT) {
1050 close(s->fd);
1051 s->fd = -1;
1052 #ifdef DEBUG_FLOPPY
1053 printf("Floppy closed\n");
1054 #endif
1056 if (s->fd < 0) {
1057 if (s->fd_got_error &&
1058 (qemu_get_clock(rt_clock) - s->fd_error_time) < FD_OPEN_TIMEOUT) {
1059 #ifdef DEBUG_FLOPPY
1060 printf("No floppy (open delayed)\n");
1061 #endif
1062 return -EIO;
1064 s->fd = open(bs->filename, s->open_flags & ~O_NONBLOCK);
1065 if (s->fd < 0) {
1066 s->fd_error_time = qemu_get_clock(rt_clock);
1067 s->fd_got_error = 1;
1068 if (last_media_present)
1069 s->fd_media_changed = 1;
1070 #ifdef DEBUG_FLOPPY
1071 printf("No floppy\n");
1072 #endif
1073 return -EIO;
1075 #ifdef DEBUG_FLOPPY
1076 printf("Floppy opened\n");
1077 #endif
1079 if (!last_media_present)
1080 s->fd_media_changed = 1;
1081 s->fd_open_time = qemu_get_clock(rt_clock);
1082 s->fd_got_error = 0;
1083 return 0;
1086 static int hdev_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
1088 BDRVRawState *s = bs->opaque;
1090 return ioctl(s->fd, req, buf);
1093 #ifdef CONFIG_AIO
1094 static BlockDriverAIOCB *hdev_aio_ioctl(BlockDriverState *bs,
1095 unsigned long int req, void *buf,
1096 BlockDriverCompletionFunc *cb, void *opaque)
1098 BDRVRawState *s = bs->opaque;
1099 RawAIOCB *acb;
1101 if (fd_open(bs) < 0)
1102 return NULL;
1104 acb = qemu_aio_get(&raw_aio_pool, bs, cb, opaque);
1105 if (!acb)
1106 return NULL;
1107 acb->aiocb.aio_fildes = s->fd;
1108 acb->aiocb.ev_signo = SIGUSR2;
1109 acb->aiocb.aio_offset = 0;
1110 acb->aiocb.aio_flags = 0;
1112 acb->next = posix_aio_state->first_aio;
1113 posix_aio_state->first_aio = acb;
1115 acb->aiocb.aio_ioctl_buf = buf;
1116 acb->aiocb.aio_ioctl_cmd = req;
1117 if (qemu_paio_ioctl(&acb->aiocb) < 0) {
1118 raw_aio_remove(acb);
1119 return NULL;
1122 return &acb->common;
1124 #endif
1126 #elif defined(__FreeBSD__)
1127 static int fd_open(BlockDriverState *bs)
1129 BDRVRawState *s = bs->opaque;
1131 /* this is just to ensure s->fd is sane (its called by io ops) */
1132 if (s->fd >= 0)
1133 return 0;
1134 return -EIO;
1136 #else /* !linux && !FreeBSD */
1138 static int fd_open(BlockDriverState *bs)
1140 return 0;
1143 #endif /* !linux && !FreeBSD */
1145 static int hdev_create(const char *filename, QEMUOptionParameter *options)
1147 int fd;
1148 int ret = 0;
1149 struct stat stat_buf;
1150 int64_t total_size = 0;
1152 /* Read out options */
1153 while (options && options->name) {
1154 if (!strcmp(options->name, "size")) {
1155 total_size = options->value.n / 512;
1157 options++;
1160 fd = open(filename, O_WRONLY | O_BINARY);
1161 if (fd < 0)
1162 return -EIO;
1164 if (fstat(fd, &stat_buf) < 0)
1165 ret = -EIO;
1166 else if (!S_ISBLK(stat_buf.st_mode) && !S_ISCHR(stat_buf.st_mode))
1167 ret = -EIO;
1168 else if (lseek(fd, 0, SEEK_END) < total_size * 512)
1169 ret = -ENOSPC;
1171 close(fd);
1172 return ret;
1175 static BlockDriver bdrv_host_device = {
1176 .format_name = "host_device",
1177 .instance_size = sizeof(BDRVRawState),
1178 .bdrv_probe_device = hdev_probe_device,
1179 .bdrv_open = hdev_open,
1180 .bdrv_close = raw_close,
1181 .bdrv_create = hdev_create,
1182 .bdrv_flush = raw_flush,
1184 #ifdef CONFIG_AIO
1185 .bdrv_aio_readv = raw_aio_readv,
1186 .bdrv_aio_writev = raw_aio_writev,
1187 #endif
1189 .bdrv_read = raw_read,
1190 .bdrv_write = raw_write,
1191 .bdrv_getlength = raw_getlength,
1193 /* generic scsi device */
1194 #ifdef __linux__
1195 .bdrv_ioctl = hdev_ioctl,
1196 #ifdef CONFIG_AIO
1197 .bdrv_aio_ioctl = hdev_aio_ioctl,
1198 #endif
1199 #endif
1202 #ifdef __linux__
1203 static int floppy_open(BlockDriverState *bs, const char *filename, int flags)
1205 BDRVRawState *s = bs->opaque;
1206 int ret;
1208 posix_aio_init();
1210 s->type = FTYPE_FD;
1212 /* open will not fail even if no floppy is inserted, so add O_NONBLOCK */
1213 ret = raw_open_common(bs, filename, flags, O_NONBLOCK);
1214 if (ret)
1215 return ret;
1217 /* close fd so that we can reopen it as needed */
1218 close(s->fd);
1219 s->fd = -1;
1220 s->fd_media_changed = 1;
1222 return 0;
1225 static int floppy_probe_device(const char *filename)
1227 if (strstart(filename, "/dev/fd", NULL))
1228 return 100;
1229 return 0;
1233 static int floppy_is_inserted(BlockDriverState *bs)
1235 return fd_open(bs) >= 0;
1238 static int floppy_media_changed(BlockDriverState *bs)
1240 BDRVRawState *s = bs->opaque;
1241 int ret;
1244 * XXX: we do not have a true media changed indication.
1245 * It does not work if the floppy is changed without trying to read it.
1247 fd_open(bs);
1248 ret = s->fd_media_changed;
1249 s->fd_media_changed = 0;
1250 #ifdef DEBUG_FLOPPY
1251 printf("Floppy changed=%d\n", ret);
1252 #endif
1253 return ret;
1256 static int floppy_eject(BlockDriverState *bs, int eject_flag)
1258 BDRVRawState *s = bs->opaque;
1259 int fd;
1261 if (s->fd >= 0) {
1262 close(s->fd);
1263 s->fd = -1;
1265 fd = open(bs->filename, s->open_flags | O_NONBLOCK);
1266 if (fd >= 0) {
1267 if (ioctl(fd, FDEJECT, 0) < 0)
1268 perror("FDEJECT");
1269 close(fd);
1272 return 0;
1275 static BlockDriver bdrv_host_floppy = {
1276 .format_name = "host_floppy",
1277 .instance_size = sizeof(BDRVRawState),
1278 .bdrv_probe_device = floppy_probe_device,
1279 .bdrv_open = floppy_open,
1280 .bdrv_close = raw_close,
1281 .bdrv_create = hdev_create,
1282 .bdrv_flush = raw_flush,
1284 #ifdef CONFIG_AIO
1285 .bdrv_aio_readv = raw_aio_readv,
1286 .bdrv_aio_writev = raw_aio_writev,
1287 #endif
1289 .bdrv_read = raw_read,
1290 .bdrv_write = raw_write,
1291 .bdrv_getlength = raw_getlength,
1293 /* removable device support */
1294 .bdrv_is_inserted = floppy_is_inserted,
1295 .bdrv_media_changed = floppy_media_changed,
1296 .bdrv_eject = floppy_eject,
1299 static int cdrom_open(BlockDriverState *bs, const char *filename, int flags)
1301 BDRVRawState *s = bs->opaque;
1303 s->type = FTYPE_CD;
1305 /* open will not fail even if no CD is inserted, so add O_NONBLOCK */
1306 return raw_open_common(bs, filename, flags, O_NONBLOCK);
1309 static int cdrom_probe_device(const char *filename)
1311 if (strstart(filename, "/dev/cd", NULL))
1312 return 100;
1313 return 0;
1316 static int cdrom_is_inserted(BlockDriverState *bs)
1318 BDRVRawState *s = bs->opaque;
1319 int ret;
1321 ret = ioctl(s->fd, CDROM_DRIVE_STATUS, CDSL_CURRENT);
1322 if (ret == CDS_DISC_OK)
1323 return 1;
1324 return 0;
1327 static int cdrom_eject(BlockDriverState *bs, int eject_flag)
1329 BDRVRawState *s = bs->opaque;
1331 if (eject_flag) {
1332 if (ioctl(s->fd, CDROMEJECT, NULL) < 0)
1333 perror("CDROMEJECT");
1334 } else {
1335 if (ioctl(s->fd, CDROMCLOSETRAY, NULL) < 0)
1336 perror("CDROMEJECT");
1339 return 0;
1342 static int cdrom_set_locked(BlockDriverState *bs, int locked)
1344 BDRVRawState *s = bs->opaque;
1346 if (ioctl(s->fd, CDROM_LOCKDOOR, locked) < 0) {
1348 * Note: an error can happen if the distribution automatically
1349 * mounts the CD-ROM
1351 /* perror("CDROM_LOCKDOOR"); */
1354 return 0;
1357 static BlockDriver bdrv_host_cdrom = {
1358 .format_name = "host_cdrom",
1359 .instance_size = sizeof(BDRVRawState),
1360 .bdrv_probe_device = cdrom_probe_device,
1361 .bdrv_open = cdrom_open,
1362 .bdrv_close = raw_close,
1363 .bdrv_create = hdev_create,
1364 .bdrv_flush = raw_flush,
1366 #ifdef CONFIG_AIO
1367 .bdrv_aio_readv = raw_aio_readv,
1368 .bdrv_aio_writev = raw_aio_writev,
1369 #endif
1371 .bdrv_read = raw_read,
1372 .bdrv_write = raw_write,
1373 .bdrv_getlength = raw_getlength,
1375 /* removable device support */
1376 .bdrv_is_inserted = cdrom_is_inserted,
1377 .bdrv_eject = cdrom_eject,
1378 .bdrv_set_locked = cdrom_set_locked,
1380 /* generic scsi device */
1381 .bdrv_ioctl = hdev_ioctl,
1382 #ifdef CONFIG_AIO
1383 .bdrv_aio_ioctl = hdev_aio_ioctl,
1384 #endif
1386 #endif /* __linux__ */
1388 #ifdef __FreeBSD__
1389 static int cdrom_open(BlockDriverState *bs, const char *filename, int flags)
1391 BDRVRawState *s = bs->opaque;
1392 int ret;
1394 s->type = FTYPE_CD;
1396 ret = raw_open_common(bs, filename, flags, 0);
1397 if (ret)
1398 return ret;
1400 /* make sure the door isnt locked at this time */
1401 ioctl(s->fd, CDIOCALLOW);
1402 return 0;
1405 static int cdrom_probe_device(const char *filename)
1407 if (strstart(filename, "/dev/cd", NULL) ||
1408 strstart(filename, "/dev/acd", NULL))
1409 return 100;
1410 return 0;
1413 static int cdrom_reopen(BlockDriverState *bs)
1415 BDRVRawState *s = bs->opaque;
1416 int fd;
1419 * Force reread of possibly changed/newly loaded disc,
1420 * FreeBSD seems to not notice sometimes...
1422 if (s->fd >= 0)
1423 close(s->fd);
1424 fd = open(bs->filename, s->open_flags, 0644);
1425 if (fd < 0) {
1426 s->fd = -1;
1427 return -EIO;
1429 s->fd = fd;
1431 /* make sure the door isnt locked at this time */
1432 ioctl(s->fd, CDIOCALLOW);
1433 return 0;
1436 static int cdrom_is_inserted(BlockDriverState *bs)
1438 return raw_getlength(bs) > 0;
1441 static int cdrom_eject(BlockDriverState *bs, int eject_flag)
1443 BDRVRawState *s = bs->opaque;
1445 if (s->fd < 0)
1446 return -ENOTSUP;
1448 (void) ioctl(s->fd, CDIOCALLOW);
1450 if (eject_flag) {
1451 if (ioctl(s->fd, CDIOCEJECT) < 0)
1452 perror("CDIOCEJECT");
1453 } else {
1454 if (ioctl(s->fd, CDIOCCLOSE) < 0)
1455 perror("CDIOCCLOSE");
1458 if (cdrom_reopen(bs) < 0)
1459 return -ENOTSUP;
1460 return 0;
1463 static int cdrom_set_locked(BlockDriverState *bs, int locked)
1465 BDRVRawState *s = bs->opaque;
1467 if (s->fd < 0)
1468 return -ENOTSUP;
1469 if (ioctl(s->fd, (locked ? CDIOCPREVENT : CDIOCALLOW)) < 0) {
1471 * Note: an error can happen if the distribution automatically
1472 * mounts the CD-ROM
1474 /* perror("CDROM_LOCKDOOR"); */
1477 return 0;
1480 static BlockDriver bdrv_host_cdrom = {
1481 .format_name = "host_cdrom",
1482 .instance_size = sizeof(BDRVRawState),
1483 .bdrv_probe_device = cdrom_probe_device,
1484 .bdrv_open = cdrom_open,
1485 .bdrv_close = raw_close,
1486 .bdrv_create = hdev_create,
1487 .bdrv_flush = raw_flush,
1489 #ifdef CONFIG_AIO
1490 .bdrv_aio_readv = raw_aio_readv,
1491 .bdrv_aio_writev = raw_aio_writev,
1492 #endif
1494 .bdrv_read = raw_read,
1495 .bdrv_write = raw_write,
1496 .bdrv_getlength = raw_getlength,
1498 /* removable device support */
1499 .bdrv_is_inserted = cdrom_is_inserted,
1500 .bdrv_eject = cdrom_eject,
1501 .bdrv_set_locked = cdrom_set_locked,
1503 #endif /* __FreeBSD__ */
1505 static void bdrv_raw_init(void)
1508 * Register all the drivers. Note that order is important, the driver
1509 * registered last will get probed first.
1511 bdrv_register(&bdrv_raw);
1512 bdrv_register(&bdrv_host_device);
1513 #ifdef __linux__
1514 bdrv_register(&bdrv_host_floppy);
1515 bdrv_register(&bdrv_host_cdrom);
1516 #endif
1517 #ifdef __FreeBSD__
1518 bdrv_register(&bdrv_host_cdrom);
1519 #endif
1522 block_init(bdrv_raw_init);