qemu-io: Implement bdrv_load_vmstate/bdrv_save_vmstate
[qemu-kvm/fedora.git] / block / raw-posix.c
blob389903e44bc1d060785fc61b398c527f13d294bf
1 /*
2 * Block driver for RAW files (posix)
4 * Copyright (c) 2006 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
24 #include "qemu-common.h"
25 #include "qemu-timer.h"
26 #include "qemu-char.h"
27 #include "block_int.h"
28 #include "module.h"
29 #ifdef CONFIG_AIO
30 #include "posix-aio-compat.h"
31 #endif
33 #ifdef CONFIG_COCOA
34 #include <paths.h>
35 #include <sys/param.h>
36 #include <IOKit/IOKitLib.h>
37 #include <IOKit/IOBSD.h>
38 #include <IOKit/storage/IOMediaBSDClient.h>
39 #include <IOKit/storage/IOMedia.h>
40 #include <IOKit/storage/IOCDMedia.h>
41 //#include <IOKit/storage/IOCDTypes.h>
42 #include <CoreFoundation/CoreFoundation.h>
43 #endif
45 #ifdef __sun__
46 #define _POSIX_PTHREAD_SEMANTICS 1
47 #include <signal.h>
48 #include <sys/dkio.h>
49 #endif
50 #ifdef __linux__
51 #include <sys/ioctl.h>
52 #include <linux/cdrom.h>
53 #include <linux/fd.h>
54 #endif
55 #ifdef __FreeBSD__
56 #include <signal.h>
57 #include <sys/disk.h>
58 #include <sys/cdio.h>
59 #endif
61 #ifdef __OpenBSD__
62 #include <sys/ioctl.h>
63 #include <sys/disklabel.h>
64 #include <sys/dkio.h>
65 #endif
67 #ifdef __DragonFly__
68 #include <sys/ioctl.h>
69 #include <sys/diskslice.h>
70 #endif
72 //#define DEBUG_FLOPPY
74 //#define DEBUG_BLOCK
75 #if defined(DEBUG_BLOCK)
76 #define DEBUG_BLOCK_PRINT(formatCstr, ...) do { if (qemu_log_enabled()) \
77 { qemu_log(formatCstr, ## __VA_ARGS__); qemu_log_flush(); } } while (0)
78 #else
79 #define DEBUG_BLOCK_PRINT(formatCstr, ...)
80 #endif
82 /* OS X does not have O_DSYNC */
83 #ifndef O_DSYNC
84 #ifdef O_SYNC
85 #define O_DSYNC O_SYNC
86 #elif defined(O_FSYNC)
87 #define O_DSYNC O_FSYNC
88 #endif
89 #endif
91 /* Approximate O_DIRECT with O_DSYNC if O_DIRECT isn't available */
92 #ifndef O_DIRECT
93 #define O_DIRECT O_DSYNC
94 #endif
96 #define FTYPE_FILE 0
97 #define FTYPE_CD 1
98 #define FTYPE_FD 2
100 #define ALIGNED_BUFFER_SIZE (32 * 512)
102 /* if the FD is not accessed during that time (in ms), we try to
103 reopen it to see if the disk has been changed */
104 #define FD_OPEN_TIMEOUT 1000
106 typedef struct BDRVRawState {
107 int fd;
108 int type;
109 unsigned int lseek_err_cnt;
110 int open_flags;
111 #if defined(__linux__)
112 /* linux floppy specific */
113 int64_t fd_open_time;
114 int64_t fd_error_time;
115 int fd_got_error;
116 int fd_media_changed;
117 #endif
118 uint8_t* aligned_buf;
119 } BDRVRawState;
121 static int posix_aio_init(void);
123 static int fd_open(BlockDriverState *bs);
124 static int64_t raw_getlength(BlockDriverState *bs);
126 #if defined(__FreeBSD__)
127 static int cdrom_reopen(BlockDriverState *bs);
128 #endif
130 static int raw_open_common(BlockDriverState *bs, const char *filename,
131 int bdrv_flags, int open_flags)
133 BDRVRawState *s = bs->opaque;
134 int fd, ret;
136 posix_aio_init();
138 s->lseek_err_cnt = 0;
140 s->open_flags = open_flags | O_BINARY;
141 s->open_flags &= ~O_ACCMODE;
142 if ((bdrv_flags & BDRV_O_ACCESS) == BDRV_O_RDWR) {
143 s->open_flags |= O_RDWR;
144 } else {
145 s->open_flags |= O_RDONLY;
146 bs->read_only = 1;
149 /* Use O_DSYNC for write-through caching, no flags for write-back caching,
150 * and O_DIRECT for no caching. */
151 if ((bdrv_flags & BDRV_O_NOCACHE))
152 s->open_flags |= O_DIRECT;
153 else if (!(bdrv_flags & BDRV_O_CACHE_WB))
154 s->open_flags |= O_DSYNC;
156 s->fd = -1;
157 fd = open(filename, s->open_flags, 0644);
158 if (fd < 0) {
159 ret = -errno;
160 if (ret == -EROFS)
161 ret = -EACCES;
162 return ret;
164 s->fd = fd;
165 s->aligned_buf = NULL;
166 if ((bdrv_flags & BDRV_O_NOCACHE)) {
167 s->aligned_buf = qemu_blockalign(bs, ALIGNED_BUFFER_SIZE);
168 if (s->aligned_buf == NULL) {
169 ret = -errno;
170 close(fd);
171 return ret;
174 return 0;
177 static int raw_open(BlockDriverState *bs, const char *filename, int flags)
179 BDRVRawState *s = bs->opaque;
180 int open_flags = 0;
182 s->type = FTYPE_FILE;
183 if (flags & BDRV_O_CREAT)
184 open_flags = O_CREAT | O_TRUNC;
186 return raw_open_common(bs, filename, flags, open_flags);
189 /* XXX: use host sector size if necessary with:
190 #ifdef DIOCGSECTORSIZE
192 unsigned int sectorsize = 512;
193 if (!ioctl(fd, DIOCGSECTORSIZE, &sectorsize) &&
194 sectorsize > bufsize)
195 bufsize = sectorsize;
197 #endif
198 #ifdef CONFIG_COCOA
199 u_int32_t blockSize = 512;
200 if ( !ioctl( fd, DKIOCGETBLOCKSIZE, &blockSize ) && blockSize > bufsize) {
201 bufsize = blockSize;
203 #endif
207 * offset and count are in bytes, but must be multiples of 512 for files
208 * opened with O_DIRECT. buf must be aligned to 512 bytes then.
210 * This function may be called without alignment if the caller ensures
211 * that O_DIRECT is not in effect.
213 static int raw_pread_aligned(BlockDriverState *bs, int64_t offset,
214 uint8_t *buf, int count)
216 BDRVRawState *s = bs->opaque;
217 int ret;
219 ret = fd_open(bs);
220 if (ret < 0)
221 return ret;
223 if (offset >= 0 && lseek(s->fd, offset, SEEK_SET) == (off_t)-1) {
224 ++(s->lseek_err_cnt);
225 if(s->lseek_err_cnt <= 10) {
226 DEBUG_BLOCK_PRINT("raw_pread(%d:%s, %" PRId64 ", %p, %d) [%" PRId64
227 "] lseek failed : %d = %s\n",
228 s->fd, bs->filename, offset, buf, count,
229 bs->total_sectors, errno, strerror(errno));
231 return -1;
233 s->lseek_err_cnt=0;
235 ret = read(s->fd, buf, count);
236 if (ret == count)
237 goto label__raw_read__success;
239 /* Allow reads beyond the end (needed for pwrite) */
240 if ((ret == 0) && bs->growable) {
241 int64_t size = raw_getlength(bs);
242 if (offset >= size) {
243 memset(buf, 0, count);
244 ret = count;
245 goto label__raw_read__success;
249 DEBUG_BLOCK_PRINT("raw_pread(%d:%s, %" PRId64 ", %p, %d) [%" PRId64
250 "] read failed %d : %d = %s\n",
251 s->fd, bs->filename, offset, buf, count,
252 bs->total_sectors, ret, errno, strerror(errno));
254 /* Try harder for CDrom. */
255 if (bs->type == BDRV_TYPE_CDROM) {
256 lseek(s->fd, offset, SEEK_SET);
257 ret = read(s->fd, buf, count);
258 if (ret == count)
259 goto label__raw_read__success;
260 lseek(s->fd, offset, SEEK_SET);
261 ret = read(s->fd, buf, count);
262 if (ret == count)
263 goto label__raw_read__success;
265 DEBUG_BLOCK_PRINT("raw_pread(%d:%s, %" PRId64 ", %p, %d) [%" PRId64
266 "] retry read failed %d : %d = %s\n",
267 s->fd, bs->filename, offset, buf, count,
268 bs->total_sectors, ret, errno, strerror(errno));
271 label__raw_read__success:
273 return (ret < 0) ? -errno : ret;
277 * offset and count are in bytes, but must be multiples of 512 for files
278 * opened with O_DIRECT. buf must be aligned to 512 bytes then.
280 * This function may be called without alignment if the caller ensures
281 * that O_DIRECT is not in effect.
283 static int raw_pwrite_aligned(BlockDriverState *bs, int64_t offset,
284 const uint8_t *buf, int count)
286 BDRVRawState *s = bs->opaque;
287 int ret;
289 ret = fd_open(bs);
290 if (ret < 0)
291 return -errno;
293 if (offset >= 0 && lseek(s->fd, offset, SEEK_SET) == (off_t)-1) {
294 ++(s->lseek_err_cnt);
295 if(s->lseek_err_cnt) {
296 DEBUG_BLOCK_PRINT("raw_pwrite(%d:%s, %" PRId64 ", %p, %d) [%"
297 PRId64 "] lseek failed : %d = %s\n",
298 s->fd, bs->filename, offset, buf, count,
299 bs->total_sectors, errno, strerror(errno));
301 return -EIO;
303 s->lseek_err_cnt = 0;
305 ret = write(s->fd, buf, count);
306 if (ret == count)
307 goto label__raw_write__success;
309 DEBUG_BLOCK_PRINT("raw_pwrite(%d:%s, %" PRId64 ", %p, %d) [%" PRId64
310 "] write failed %d : %d = %s\n",
311 s->fd, bs->filename, offset, buf, count,
312 bs->total_sectors, ret, errno, strerror(errno));
314 label__raw_write__success:
316 return (ret < 0) ? -errno : ret;
321 * offset and count are in bytes and possibly not aligned. For files opened
322 * with O_DIRECT, necessary alignments are ensured before calling
323 * raw_pread_aligned to do the actual read.
325 static int raw_pread(BlockDriverState *bs, int64_t offset,
326 uint8_t *buf, int count)
328 BDRVRawState *s = bs->opaque;
329 int size, ret, shift, sum;
331 sum = 0;
333 if (s->aligned_buf != NULL) {
335 if (offset & 0x1ff) {
336 /* align offset on a 512 bytes boundary */
338 shift = offset & 0x1ff;
339 size = (shift + count + 0x1ff) & ~0x1ff;
340 if (size > ALIGNED_BUFFER_SIZE)
341 size = ALIGNED_BUFFER_SIZE;
342 ret = raw_pread_aligned(bs, offset - shift, s->aligned_buf, size);
343 if (ret < 0)
344 return ret;
346 size = 512 - shift;
347 if (size > count)
348 size = count;
349 memcpy(buf, s->aligned_buf + shift, size);
351 buf += size;
352 offset += size;
353 count -= size;
354 sum += size;
356 if (count == 0)
357 return sum;
359 if (count & 0x1ff || (uintptr_t) buf & 0x1ff) {
361 /* read on aligned buffer */
363 while (count) {
365 size = (count + 0x1ff) & ~0x1ff;
366 if (size > ALIGNED_BUFFER_SIZE)
367 size = ALIGNED_BUFFER_SIZE;
369 ret = raw_pread_aligned(bs, offset, s->aligned_buf, size);
370 if (ret < 0)
371 return ret;
373 size = ret;
374 if (size > count)
375 size = count;
377 memcpy(buf, s->aligned_buf, size);
379 buf += size;
380 offset += size;
381 count -= size;
382 sum += size;
385 return sum;
389 return raw_pread_aligned(bs, offset, buf, count) + sum;
392 static int raw_read(BlockDriverState *bs, int64_t sector_num,
393 uint8_t *buf, int nb_sectors)
395 int ret;
397 ret = raw_pread(bs, sector_num * 512, buf, nb_sectors * 512);
398 if (ret == (nb_sectors * 512))
399 ret = 0;
400 return ret;
404 * offset and count are in bytes and possibly not aligned. For files opened
405 * with O_DIRECT, necessary alignments are ensured before calling
406 * raw_pwrite_aligned to do the actual write.
408 static int raw_pwrite(BlockDriverState *bs, int64_t offset,
409 const uint8_t *buf, int count)
411 BDRVRawState *s = bs->opaque;
412 int size, ret, shift, sum;
414 sum = 0;
416 if (s->aligned_buf != NULL) {
418 if (offset & 0x1ff) {
419 /* align offset on a 512 bytes boundary */
420 shift = offset & 0x1ff;
421 ret = raw_pread_aligned(bs, offset - shift, s->aligned_buf, 512);
422 if (ret < 0)
423 return ret;
425 size = 512 - shift;
426 if (size > count)
427 size = count;
428 memcpy(s->aligned_buf + shift, buf, size);
430 ret = raw_pwrite_aligned(bs, offset - shift, s->aligned_buf, 512);
431 if (ret < 0)
432 return ret;
434 buf += size;
435 offset += size;
436 count -= size;
437 sum += size;
439 if (count == 0)
440 return sum;
442 if (count & 0x1ff || (uintptr_t) buf & 0x1ff) {
444 while ((size = (count & ~0x1ff)) != 0) {
446 if (size > ALIGNED_BUFFER_SIZE)
447 size = ALIGNED_BUFFER_SIZE;
449 memcpy(s->aligned_buf, buf, size);
451 ret = raw_pwrite_aligned(bs, offset, s->aligned_buf, size);
452 if (ret < 0)
453 return ret;
455 buf += ret;
456 offset += ret;
457 count -= ret;
458 sum += ret;
460 /* here, count < 512 because (count & ~0x1ff) == 0 */
461 if (count) {
462 ret = raw_pread_aligned(bs, offset, s->aligned_buf, 512);
463 if (ret < 0)
464 return ret;
465 memcpy(s->aligned_buf, buf, count);
467 ret = raw_pwrite_aligned(bs, offset, s->aligned_buf, 512);
468 if (ret < 0)
469 return ret;
470 if (count < ret)
471 ret = count;
473 sum += ret;
475 return sum;
478 return raw_pwrite_aligned(bs, offset, buf, count) + sum;
481 static int raw_write(BlockDriverState *bs, int64_t sector_num,
482 const uint8_t *buf, int nb_sectors)
484 int ret;
485 ret = raw_pwrite(bs, sector_num * 512, buf, nb_sectors * 512);
486 if (ret == (nb_sectors * 512))
487 ret = 0;
488 return ret;
491 #ifdef CONFIG_AIO
492 /***********************************************************/
493 /* Unix AIO using POSIX AIO */
495 typedef struct RawAIOCB {
496 BlockDriverAIOCB common;
497 struct qemu_paiocb aiocb;
498 struct RawAIOCB *next;
499 int ret;
500 } RawAIOCB;
502 typedef struct PosixAioState
504 int rfd, wfd;
505 RawAIOCB *first_aio;
506 } PosixAioState;
508 static void posix_aio_read(void *opaque)
510 PosixAioState *s = opaque;
511 RawAIOCB *acb, **pacb;
512 int ret;
513 ssize_t len;
515 /* read all bytes from signal pipe */
516 for (;;) {
517 char bytes[16];
519 len = read(s->rfd, bytes, sizeof(bytes));
520 if (len == -1 && errno == EINTR)
521 continue; /* try again */
522 if (len == sizeof(bytes))
523 continue; /* more to read */
524 break;
527 for(;;) {
528 pacb = &s->first_aio;
529 for(;;) {
530 acb = *pacb;
531 if (!acb)
532 goto the_end;
533 ret = qemu_paio_error(&acb->aiocb);
534 if (ret == ECANCELED) {
535 /* remove the request */
536 *pacb = acb->next;
537 qemu_aio_release(acb);
538 } else if (ret != EINPROGRESS) {
539 /* end of aio */
540 if (ret == 0) {
541 ret = qemu_paio_return(&acb->aiocb);
542 if (ret == acb->aiocb.aio_nbytes)
543 ret = 0;
544 else
545 ret = -EINVAL;
546 } else {
547 ret = -ret;
549 /* remove the request */
550 *pacb = acb->next;
551 /* call the callback */
552 acb->common.cb(acb->common.opaque, ret);
553 qemu_aio_release(acb);
554 break;
555 } else {
556 pacb = &acb->next;
560 the_end: ;
563 static int posix_aio_flush(void *opaque)
565 PosixAioState *s = opaque;
566 return !!s->first_aio;
569 static PosixAioState *posix_aio_state;
571 static void aio_signal_handler(int signum)
573 if (posix_aio_state) {
574 char byte = 0;
576 write(posix_aio_state->wfd, &byte, sizeof(byte));
579 qemu_service_io();
582 static int posix_aio_init(void)
584 struct sigaction act;
585 PosixAioState *s;
586 int fds[2];
587 struct qemu_paioinit ai;
589 if (posix_aio_state)
590 return 0;
592 s = qemu_malloc(sizeof(PosixAioState));
594 sigfillset(&act.sa_mask);
595 act.sa_flags = 0; /* do not restart syscalls to interrupt select() */
596 act.sa_handler = aio_signal_handler;
597 sigaction(SIGUSR2, &act, NULL);
599 s->first_aio = NULL;
600 if (pipe(fds) == -1) {
601 fprintf(stderr, "failed to create pipe\n");
602 return -errno;
605 s->rfd = fds[0];
606 s->wfd = fds[1];
608 fcntl(s->rfd, F_SETFL, O_NONBLOCK);
609 fcntl(s->wfd, F_SETFL, O_NONBLOCK);
611 qemu_aio_set_fd_handler(s->rfd, posix_aio_read, NULL, posix_aio_flush, s);
613 memset(&ai, 0, sizeof(ai));
614 ai.aio_threads = 64;
615 ai.aio_num = 64;
616 qemu_paio_init(&ai);
618 posix_aio_state = s;
620 return 0;
623 static void raw_aio_remove(RawAIOCB *acb)
625 RawAIOCB **pacb;
627 /* remove the callback from the queue */
628 pacb = &posix_aio_state->first_aio;
629 for(;;) {
630 if (*pacb == NULL) {
631 fprintf(stderr, "raw_aio_remove: aio request not found!\n");
632 break;
633 } else if (*pacb == acb) {
634 *pacb = acb->next;
635 qemu_aio_release(acb);
636 break;
638 pacb = &(*pacb)->next;
642 static void raw_aio_cancel(BlockDriverAIOCB *blockacb)
644 int ret;
645 RawAIOCB *acb = (RawAIOCB *)blockacb;
647 ret = qemu_paio_cancel(acb->aiocb.aio_fildes, &acb->aiocb);
648 if (ret == QEMU_PAIO_NOTCANCELED) {
649 /* fail safe: if the aio could not be canceled, we wait for
650 it */
651 while (qemu_paio_error(&acb->aiocb) == EINPROGRESS);
654 raw_aio_remove(acb);
657 static AIOPool raw_aio_pool = {
658 .aiocb_size = sizeof(RawAIOCB),
659 .cancel = raw_aio_cancel,
662 static RawAIOCB *raw_aio_setup(BlockDriverState *bs, int64_t sector_num,
663 QEMUIOVector *qiov, int nb_sectors,
664 BlockDriverCompletionFunc *cb, void *opaque)
666 BDRVRawState *s = bs->opaque;
667 RawAIOCB *acb;
669 if (fd_open(bs) < 0)
670 return NULL;
672 acb = qemu_aio_get(&raw_aio_pool, bs, cb, opaque);
673 if (!acb)
674 return NULL;
675 acb->aiocb.aio_fildes = s->fd;
676 acb->aiocb.ev_signo = SIGUSR2;
677 acb->aiocb.aio_iov = qiov->iov;
678 acb->aiocb.aio_niov = qiov->niov;
679 acb->aiocb.aio_nbytes = nb_sectors * 512;
680 acb->aiocb.aio_offset = sector_num * 512;
681 acb->aiocb.aio_flags = 0;
684 * If O_DIRECT is used the buffer needs to be aligned on a sector
685 * boundary. Tell the low level code to ensure that in case it's
686 * not done yet.
688 if (s->aligned_buf)
689 acb->aiocb.aio_flags |= QEMU_AIO_SECTOR_ALIGNED;
691 acb->next = posix_aio_state->first_aio;
692 posix_aio_state->first_aio = acb;
693 return acb;
696 static BlockDriverAIOCB *raw_aio_readv(BlockDriverState *bs,
697 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
698 BlockDriverCompletionFunc *cb, void *opaque)
700 RawAIOCB *acb;
702 acb = raw_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque);
703 if (!acb)
704 return NULL;
705 if (qemu_paio_read(&acb->aiocb) < 0) {
706 raw_aio_remove(acb);
707 return NULL;
709 return &acb->common;
712 static BlockDriverAIOCB *raw_aio_writev(BlockDriverState *bs,
713 int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
714 BlockDriverCompletionFunc *cb, void *opaque)
716 RawAIOCB *acb;
718 acb = raw_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque);
719 if (!acb)
720 return NULL;
721 if (qemu_paio_write(&acb->aiocb) < 0) {
722 raw_aio_remove(acb);
723 return NULL;
725 return &acb->common;
727 #else /* CONFIG_AIO */
728 static int posix_aio_init(void)
730 return 0;
732 #endif /* CONFIG_AIO */
735 static void raw_close(BlockDriverState *bs)
737 BDRVRawState *s = bs->opaque;
738 if (s->fd >= 0) {
739 close(s->fd);
740 s->fd = -1;
741 if (s->aligned_buf != NULL)
742 qemu_free(s->aligned_buf);
746 static int raw_truncate(BlockDriverState *bs, int64_t offset)
748 BDRVRawState *s = bs->opaque;
749 if (s->type != FTYPE_FILE)
750 return -ENOTSUP;
751 if (ftruncate(s->fd, offset) < 0)
752 return -errno;
753 return 0;
756 #ifdef __OpenBSD__
757 static int64_t raw_getlength(BlockDriverState *bs)
759 BDRVRawState *s = bs->opaque;
760 int fd = s->fd;
761 struct stat st;
763 if (fstat(fd, &st))
764 return -1;
765 if (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) {
766 struct disklabel dl;
768 if (ioctl(fd, DIOCGDINFO, &dl))
769 return -1;
770 return (uint64_t)dl.d_secsize *
771 dl.d_partitions[DISKPART(st.st_rdev)].p_size;
772 } else
773 return st.st_size;
775 #else /* !__OpenBSD__ */
776 static int64_t raw_getlength(BlockDriverState *bs)
778 BDRVRawState *s = bs->opaque;
779 int fd = s->fd;
780 int64_t size;
781 #ifdef HOST_BSD
782 struct stat sb;
783 #ifdef __FreeBSD__
784 int reopened = 0;
785 #endif
786 #endif
787 #ifdef __sun__
788 struct dk_minfo minfo;
789 int rv;
790 #endif
791 int ret;
793 ret = fd_open(bs);
794 if (ret < 0)
795 return ret;
797 #ifdef HOST_BSD
798 #ifdef __FreeBSD__
799 again:
800 #endif
801 if (!fstat(fd, &sb) && (S_IFCHR & sb.st_mode)) {
802 #ifdef DIOCGMEDIASIZE
803 if (ioctl(fd, DIOCGMEDIASIZE, (off_t *)&size))
804 #elif defined(DIOCGPART)
806 struct partinfo pi;
807 if (ioctl(fd, DIOCGPART, &pi) == 0)
808 size = pi.media_size;
809 else
810 size = 0;
812 if (size == 0)
813 #endif
814 #ifdef CONFIG_COCOA
815 size = LONG_LONG_MAX;
816 #else
817 size = lseek(fd, 0LL, SEEK_END);
818 #endif
819 #ifdef __FreeBSD__
820 switch(s->type) {
821 case FTYPE_CD:
822 /* XXX FreeBSD acd returns UINT_MAX sectors for an empty drive */
823 if (size == 2048LL * (unsigned)-1)
824 size = 0;
825 /* XXX no disc? maybe we need to reopen... */
826 if (size <= 0 && !reopened && cdrom_reopen(bs) >= 0) {
827 reopened = 1;
828 goto again;
831 #endif
832 } else
833 #endif
834 #ifdef __sun__
836 * use the DKIOCGMEDIAINFO ioctl to read the size.
838 rv = ioctl ( fd, DKIOCGMEDIAINFO, &minfo );
839 if ( rv != -1 ) {
840 size = minfo.dki_lbsize * minfo.dki_capacity;
841 } else /* there are reports that lseek on some devices
842 fails, but irc discussion said that contingency
843 on contingency was overkill */
844 #endif
846 size = lseek(fd, 0, SEEK_END);
848 return size;
850 #endif
852 static int raw_create(const char *filename, QEMUOptionParameter *options)
854 int fd;
855 int result = 0;
856 int64_t total_size = 0;
858 /* Read out options */
859 while (options && options->name) {
860 if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
861 total_size = options->value.n / 512;
863 options++;
866 fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY,
867 0644);
868 if (fd < 0) {
869 result = -errno;
870 } else {
871 if (ftruncate(fd, total_size * 512) != 0) {
872 result = -errno;
874 if (close(fd) != 0) {
875 result = -errno;
878 return result;
881 static void raw_flush(BlockDriverState *bs)
883 BDRVRawState *s = bs->opaque;
884 fsync(s->fd);
888 static QEMUOptionParameter raw_create_options[] = {
890 .name = BLOCK_OPT_SIZE,
891 .type = OPT_SIZE,
892 .help = "Virtual disk size"
894 { NULL }
897 static BlockDriver bdrv_raw = {
898 .format_name = "raw",
899 .instance_size = sizeof(BDRVRawState),
900 .bdrv_probe = NULL, /* no probe for protocols */
901 .bdrv_open = raw_open,
902 .bdrv_read = raw_read,
903 .bdrv_write = raw_write,
904 .bdrv_close = raw_close,
905 .bdrv_create = raw_create,
906 .bdrv_flush = raw_flush,
908 #ifdef CONFIG_AIO
909 .bdrv_aio_readv = raw_aio_readv,
910 .bdrv_aio_writev = raw_aio_writev,
911 #endif
913 .bdrv_truncate = raw_truncate,
914 .bdrv_getlength = raw_getlength,
916 .create_options = raw_create_options,
919 /***********************************************/
920 /* host device */
922 #ifdef CONFIG_COCOA
923 static kern_return_t FindEjectableCDMedia( io_iterator_t *mediaIterator );
924 static kern_return_t GetBSDPath( io_iterator_t mediaIterator, char *bsdPath, CFIndex maxPathSize );
926 kern_return_t FindEjectableCDMedia( io_iterator_t *mediaIterator )
928 kern_return_t kernResult;
929 mach_port_t masterPort;
930 CFMutableDictionaryRef classesToMatch;
932 kernResult = IOMasterPort( MACH_PORT_NULL, &masterPort );
933 if ( KERN_SUCCESS != kernResult ) {
934 printf( "IOMasterPort returned %d\n", kernResult );
937 classesToMatch = IOServiceMatching( kIOCDMediaClass );
938 if ( classesToMatch == NULL ) {
939 printf( "IOServiceMatching returned a NULL dictionary.\n" );
940 } else {
941 CFDictionarySetValue( classesToMatch, CFSTR( kIOMediaEjectableKey ), kCFBooleanTrue );
943 kernResult = IOServiceGetMatchingServices( masterPort, classesToMatch, mediaIterator );
944 if ( KERN_SUCCESS != kernResult )
946 printf( "IOServiceGetMatchingServices returned %d\n", kernResult );
949 return kernResult;
952 kern_return_t GetBSDPath( io_iterator_t mediaIterator, char *bsdPath, CFIndex maxPathSize )
954 io_object_t nextMedia;
955 kern_return_t kernResult = KERN_FAILURE;
956 *bsdPath = '\0';
957 nextMedia = IOIteratorNext( mediaIterator );
958 if ( nextMedia )
960 CFTypeRef bsdPathAsCFString;
961 bsdPathAsCFString = IORegistryEntryCreateCFProperty( nextMedia, CFSTR( kIOBSDNameKey ), kCFAllocatorDefault, 0 );
962 if ( bsdPathAsCFString ) {
963 size_t devPathLength;
964 strcpy( bsdPath, _PATH_DEV );
965 strcat( bsdPath, "r" );
966 devPathLength = strlen( bsdPath );
967 if ( CFStringGetCString( bsdPathAsCFString, bsdPath + devPathLength, maxPathSize - devPathLength, kCFStringEncodingASCII ) ) {
968 kernResult = KERN_SUCCESS;
970 CFRelease( bsdPathAsCFString );
972 IOObjectRelease( nextMedia );
975 return kernResult;
978 #endif
980 static int hdev_probe_device(const char *filename)
982 struct stat st;
984 /* allow a dedicated CD-ROM driver to match with a higher priority */
985 if (strstart(filename, "/dev/cdrom", NULL))
986 return 50;
988 if (stat(filename, &st) >= 0 &&
989 (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode))) {
990 return 100;
993 return 0;
996 static int hdev_open(BlockDriverState *bs, const char *filename, int flags)
998 BDRVRawState *s = bs->opaque;
1000 #ifdef CONFIG_COCOA
1001 if (strstart(filename, "/dev/cdrom", NULL)) {
1002 kern_return_t kernResult;
1003 io_iterator_t mediaIterator;
1004 char bsdPath[ MAXPATHLEN ];
1005 int fd;
1007 kernResult = FindEjectableCDMedia( &mediaIterator );
1008 kernResult = GetBSDPath( mediaIterator, bsdPath, sizeof( bsdPath ) );
1010 if ( bsdPath[ 0 ] != '\0' ) {
1011 strcat(bsdPath,"s0");
1012 /* some CDs don't have a partition 0 */
1013 fd = open(bsdPath, O_RDONLY | O_BINARY | O_LARGEFILE);
1014 if (fd < 0) {
1015 bsdPath[strlen(bsdPath)-1] = '1';
1016 } else {
1017 close(fd);
1019 filename = bsdPath;
1022 if ( mediaIterator )
1023 IOObjectRelease( mediaIterator );
1025 #endif
1027 s->type = FTYPE_FILE;
1028 #if defined(__linux__) && defined(CONFIG_AIO)
1029 if (strstart(filename, "/dev/sg", NULL)) {
1030 bs->sg = 1;
1032 #endif
1034 return raw_open_common(bs, filename, flags, 0);
1037 #if defined(__linux__)
1038 /* Note: we do not have a reliable method to detect if the floppy is
1039 present. The current method is to try to open the floppy at every
1040 I/O and to keep it opened during a few hundreds of ms. */
1041 static int fd_open(BlockDriverState *bs)
1043 BDRVRawState *s = bs->opaque;
1044 int last_media_present;
1046 if (s->type != FTYPE_FD)
1047 return 0;
1048 last_media_present = (s->fd >= 0);
1049 if (s->fd >= 0 &&
1050 (qemu_get_clock(rt_clock) - s->fd_open_time) >= FD_OPEN_TIMEOUT) {
1051 close(s->fd);
1052 s->fd = -1;
1053 #ifdef DEBUG_FLOPPY
1054 printf("Floppy closed\n");
1055 #endif
1057 if (s->fd < 0) {
1058 if (s->fd_got_error &&
1059 (qemu_get_clock(rt_clock) - s->fd_error_time) < FD_OPEN_TIMEOUT) {
1060 #ifdef DEBUG_FLOPPY
1061 printf("No floppy (open delayed)\n");
1062 #endif
1063 return -EIO;
1065 s->fd = open(bs->filename, s->open_flags & ~O_NONBLOCK);
1066 if (s->fd < 0) {
1067 s->fd_error_time = qemu_get_clock(rt_clock);
1068 s->fd_got_error = 1;
1069 if (last_media_present)
1070 s->fd_media_changed = 1;
1071 #ifdef DEBUG_FLOPPY
1072 printf("No floppy\n");
1073 #endif
1074 return -EIO;
1076 #ifdef DEBUG_FLOPPY
1077 printf("Floppy opened\n");
1078 #endif
1080 if (!last_media_present)
1081 s->fd_media_changed = 1;
1082 s->fd_open_time = qemu_get_clock(rt_clock);
1083 s->fd_got_error = 0;
1084 return 0;
1087 static int hdev_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
1089 BDRVRawState *s = bs->opaque;
1091 return ioctl(s->fd, req, buf);
1094 #ifdef CONFIG_AIO
1095 static BlockDriverAIOCB *hdev_aio_ioctl(BlockDriverState *bs,
1096 unsigned long int req, void *buf,
1097 BlockDriverCompletionFunc *cb, void *opaque)
1099 BDRVRawState *s = bs->opaque;
1100 RawAIOCB *acb;
1102 if (fd_open(bs) < 0)
1103 return NULL;
1105 acb = qemu_aio_get(&raw_aio_pool, bs, cb, opaque);
1106 if (!acb)
1107 return NULL;
1108 acb->aiocb.aio_fildes = s->fd;
1109 acb->aiocb.ev_signo = SIGUSR2;
1110 acb->aiocb.aio_offset = 0;
1111 acb->aiocb.aio_flags = 0;
1113 acb->next = posix_aio_state->first_aio;
1114 posix_aio_state->first_aio = acb;
1116 acb->aiocb.aio_ioctl_buf = buf;
1117 acb->aiocb.aio_ioctl_cmd = req;
1118 if (qemu_paio_ioctl(&acb->aiocb) < 0) {
1119 raw_aio_remove(acb);
1120 return NULL;
1123 return &acb->common;
1125 #endif
1127 #elif defined(__FreeBSD__)
1128 static int fd_open(BlockDriverState *bs)
1130 BDRVRawState *s = bs->opaque;
1132 /* this is just to ensure s->fd is sane (its called by io ops) */
1133 if (s->fd >= 0)
1134 return 0;
1135 return -EIO;
1137 #else /* !linux && !FreeBSD */
1139 static int fd_open(BlockDriverState *bs)
1141 return 0;
1144 #endif /* !linux && !FreeBSD */
1146 static int hdev_create(const char *filename, QEMUOptionParameter *options)
1148 int fd;
1149 int ret = 0;
1150 struct stat stat_buf;
1151 int64_t total_size = 0;
1153 /* Read out options */
1154 while (options && options->name) {
1155 if (!strcmp(options->name, "size")) {
1156 total_size = options->value.n / 512;
1158 options++;
1161 fd = open(filename, O_WRONLY | O_BINARY);
1162 if (fd < 0)
1163 return -EIO;
1165 if (fstat(fd, &stat_buf) < 0)
1166 ret = -EIO;
1167 else if (!S_ISBLK(stat_buf.st_mode) && !S_ISCHR(stat_buf.st_mode))
1168 ret = -EIO;
1169 else if (lseek(fd, 0, SEEK_END) < total_size * 512)
1170 ret = -ENOSPC;
1172 close(fd);
1173 return ret;
1176 static BlockDriver bdrv_host_device = {
1177 .format_name = "host_device",
1178 .instance_size = sizeof(BDRVRawState),
1179 .bdrv_probe_device = hdev_probe_device,
1180 .bdrv_open = hdev_open,
1181 .bdrv_close = raw_close,
1182 .bdrv_create = hdev_create,
1183 .bdrv_flush = raw_flush,
1185 #ifdef CONFIG_AIO
1186 .bdrv_aio_readv = raw_aio_readv,
1187 .bdrv_aio_writev = raw_aio_writev,
1188 #endif
1190 .bdrv_read = raw_read,
1191 .bdrv_write = raw_write,
1192 .bdrv_getlength = raw_getlength,
1194 /* generic scsi device */
1195 #ifdef __linux__
1196 .bdrv_ioctl = hdev_ioctl,
1197 #ifdef CONFIG_AIO
1198 .bdrv_aio_ioctl = hdev_aio_ioctl,
1199 #endif
1200 #endif
1203 #ifdef __linux__
1204 static int floppy_open(BlockDriverState *bs, const char *filename, int flags)
1206 BDRVRawState *s = bs->opaque;
1207 int ret;
1209 posix_aio_init();
1211 s->type = FTYPE_FD;
1213 /* open will not fail even if no floppy is inserted, so add O_NONBLOCK */
1214 ret = raw_open_common(bs, filename, flags, O_NONBLOCK);
1215 if (ret)
1216 return ret;
1218 /* close fd so that we can reopen it as needed */
1219 close(s->fd);
1220 s->fd = -1;
1221 s->fd_media_changed = 1;
1223 return 0;
1226 static int floppy_probe_device(const char *filename)
1228 if (strstart(filename, "/dev/fd", NULL))
1229 return 100;
1230 return 0;
1234 static int floppy_is_inserted(BlockDriverState *bs)
1236 return fd_open(bs) >= 0;
1239 static int floppy_media_changed(BlockDriverState *bs)
1241 BDRVRawState *s = bs->opaque;
1242 int ret;
1245 * XXX: we do not have a true media changed indication.
1246 * It does not work if the floppy is changed without trying to read it.
1248 fd_open(bs);
1249 ret = s->fd_media_changed;
1250 s->fd_media_changed = 0;
1251 #ifdef DEBUG_FLOPPY
1252 printf("Floppy changed=%d\n", ret);
1253 #endif
1254 return ret;
1257 static int floppy_eject(BlockDriverState *bs, int eject_flag)
1259 BDRVRawState *s = bs->opaque;
1260 int fd;
1262 if (s->fd >= 0) {
1263 close(s->fd);
1264 s->fd = -1;
1266 fd = open(bs->filename, s->open_flags | O_NONBLOCK);
1267 if (fd >= 0) {
1268 if (ioctl(fd, FDEJECT, 0) < 0)
1269 perror("FDEJECT");
1270 close(fd);
1273 return 0;
1276 static BlockDriver bdrv_host_floppy = {
1277 .format_name = "host_floppy",
1278 .instance_size = sizeof(BDRVRawState),
1279 .bdrv_probe_device = floppy_probe_device,
1280 .bdrv_open = floppy_open,
1281 .bdrv_close = raw_close,
1282 .bdrv_create = hdev_create,
1283 .bdrv_flush = raw_flush,
1285 #ifdef CONFIG_AIO
1286 .bdrv_aio_readv = raw_aio_readv,
1287 .bdrv_aio_writev = raw_aio_writev,
1288 #endif
1290 .bdrv_read = raw_read,
1291 .bdrv_write = raw_write,
1292 .bdrv_getlength = raw_getlength,
1294 /* removable device support */
1295 .bdrv_is_inserted = floppy_is_inserted,
1296 .bdrv_media_changed = floppy_media_changed,
1297 .bdrv_eject = floppy_eject,
1300 static int cdrom_open(BlockDriverState *bs, const char *filename, int flags)
1302 BDRVRawState *s = bs->opaque;
1304 s->type = FTYPE_CD;
1306 /* open will not fail even if no CD is inserted, so add O_NONBLOCK */
1307 return raw_open_common(bs, filename, flags, O_NONBLOCK);
1310 static int cdrom_probe_device(const char *filename)
1312 if (strstart(filename, "/dev/cd", NULL))
1313 return 100;
1314 return 0;
1317 static int cdrom_is_inserted(BlockDriverState *bs)
1319 BDRVRawState *s = bs->opaque;
1320 int ret;
1322 ret = ioctl(s->fd, CDROM_DRIVE_STATUS, CDSL_CURRENT);
1323 if (ret == CDS_DISC_OK)
1324 return 1;
1325 return 0;
1328 static int cdrom_eject(BlockDriverState *bs, int eject_flag)
1330 BDRVRawState *s = bs->opaque;
1332 if (eject_flag) {
1333 if (ioctl(s->fd, CDROMEJECT, NULL) < 0)
1334 perror("CDROMEJECT");
1335 } else {
1336 if (ioctl(s->fd, CDROMCLOSETRAY, NULL) < 0)
1337 perror("CDROMEJECT");
1340 return 0;
1343 static int cdrom_set_locked(BlockDriverState *bs, int locked)
1345 BDRVRawState *s = bs->opaque;
1347 if (ioctl(s->fd, CDROM_LOCKDOOR, locked) < 0) {
1349 * Note: an error can happen if the distribution automatically
1350 * mounts the CD-ROM
1352 /* perror("CDROM_LOCKDOOR"); */
1355 return 0;
1358 static BlockDriver bdrv_host_cdrom = {
1359 .format_name = "host_cdrom",
1360 .instance_size = sizeof(BDRVRawState),
1361 .bdrv_probe_device = cdrom_probe_device,
1362 .bdrv_open = cdrom_open,
1363 .bdrv_close = raw_close,
1364 .bdrv_create = hdev_create,
1365 .bdrv_flush = raw_flush,
1367 #ifdef CONFIG_AIO
1368 .bdrv_aio_readv = raw_aio_readv,
1369 .bdrv_aio_writev = raw_aio_writev,
1370 #endif
1372 .bdrv_read = raw_read,
1373 .bdrv_write = raw_write,
1374 .bdrv_getlength = raw_getlength,
1376 /* removable device support */
1377 .bdrv_is_inserted = cdrom_is_inserted,
1378 .bdrv_eject = cdrom_eject,
1379 .bdrv_set_locked = cdrom_set_locked,
1381 /* generic scsi device */
1382 .bdrv_ioctl = hdev_ioctl,
1383 #ifdef CONFIG_AIO
1384 .bdrv_aio_ioctl = hdev_aio_ioctl,
1385 #endif
1387 #endif /* __linux__ */
1389 #ifdef __FreeBSD__
1390 static int cdrom_open(BlockDriverState *bs, const char *filename, int flags)
1392 BDRVRawState *s = bs->opaque;
1393 int ret;
1395 s->type = FTYPE_CD;
1397 ret = raw_open_common(bs, filename, flags, 0);
1398 if (ret)
1399 return ret;
1401 /* make sure the door isnt locked at this time */
1402 ioctl(s->fd, CDIOCALLOW);
1403 return 0;
1406 static int cdrom_probe_device(const char *filename)
1408 if (strstart(filename, "/dev/cd", NULL) ||
1409 strstart(filename, "/dev/acd", NULL))
1410 return 100;
1411 return 0;
1414 static int cdrom_reopen(BlockDriverState *bs)
1416 BDRVRawState *s = bs->opaque;
1417 int fd;
1420 * Force reread of possibly changed/newly loaded disc,
1421 * FreeBSD seems to not notice sometimes...
1423 if (s->fd >= 0)
1424 close(s->fd);
1425 fd = open(bs->filename, s->open_flags, 0644);
1426 if (fd < 0) {
1427 s->fd = -1;
1428 return -EIO;
1430 s->fd = fd;
1432 /* make sure the door isnt locked at this time */
1433 ioctl(s->fd, CDIOCALLOW);
1434 return 0;
1437 static int cdrom_is_inserted(BlockDriverState *bs)
1439 return raw_getlength(bs) > 0;
1442 static int cdrom_eject(BlockDriverState *bs, int eject_flag)
1444 BDRVRawState *s = bs->opaque;
1446 if (s->fd < 0)
1447 return -ENOTSUP;
1449 (void) ioctl(s->fd, CDIOCALLOW);
1451 if (eject_flag) {
1452 if (ioctl(s->fd, CDIOCEJECT) < 0)
1453 perror("CDIOCEJECT");
1454 } else {
1455 if (ioctl(s->fd, CDIOCCLOSE) < 0)
1456 perror("CDIOCCLOSE");
1459 if (cdrom_reopen(bs) < 0)
1460 return -ENOTSUP;
1461 return 0;
1464 static int cdrom_set_locked(BlockDriverState *bs, int locked)
1466 BDRVRawState *s = bs->opaque;
1468 if (s->fd < 0)
1469 return -ENOTSUP;
1470 if (ioctl(s->fd, (locked ? CDIOCPREVENT : CDIOCALLOW)) < 0) {
1472 * Note: an error can happen if the distribution automatically
1473 * mounts the CD-ROM
1475 /* perror("CDROM_LOCKDOOR"); */
1478 return 0;
1481 static BlockDriver bdrv_host_cdrom = {
1482 .format_name = "host_cdrom",
1483 .instance_size = sizeof(BDRVRawState),
1484 .bdrv_probe_device = cdrom_probe_device,
1485 .bdrv_open = cdrom_open,
1486 .bdrv_close = raw_close,
1487 .bdrv_create = hdev_create,
1488 .bdrv_flush = raw_flush,
1490 #ifdef CONFIG_AIO
1491 .bdrv_aio_readv = raw_aio_readv,
1492 .bdrv_aio_writev = raw_aio_writev,
1493 #endif
1495 .bdrv_read = raw_read,
1496 .bdrv_write = raw_write,
1497 .bdrv_getlength = raw_getlength,
1499 /* removable device support */
1500 .bdrv_is_inserted = cdrom_is_inserted,
1501 .bdrv_eject = cdrom_eject,
1502 .bdrv_set_locked = cdrom_set_locked,
1504 #endif /* __FreeBSD__ */
1506 static void bdrv_raw_init(void)
1509 * Register all the drivers. Note that order is important, the driver
1510 * registered last will get probed first.
1512 bdrv_register(&bdrv_raw);
1513 bdrv_register(&bdrv_host_device);
1514 #ifdef __linux__
1515 bdrv_register(&bdrv_host_floppy);
1516 bdrv_register(&bdrv_host_cdrom);
1517 #endif
1518 #ifdef __FreeBSD__
1519 bdrv_register(&bdrv_host_cdrom);
1520 #endif
1523 block_init(bdrv_raw_init);