4 * Copyright IBM, Corp. 2007
7 * Anthony Liguori <aliguori@us.ibm.com>
9 * This work is licensed under the terms of the GNU GPL, version 2. See
10 * the COPYING file in the top-level directory.
14 #include <qemu-common.h>
16 #include "virtio-blk.h"
17 #include "block_int.h"
22 typedef struct VirtIOBlock
28 char serial_str
[BLOCK_SERIAL_STRLEN
+ 1];
31 static VirtIOBlock
*to_virtio_blk(VirtIODevice
*vdev
)
33 return (VirtIOBlock
*)vdev
;
36 /* store identify data in little endian format
38 static inline void put_le16(uint16_t *p
, unsigned int v
)
43 /* copy to *dst from *src, nul pad dst tail as needed to len bytes
45 static inline void padstr(char *dst
, const char *src
, int len
)
48 *dst
++ = *src
? *src
++ : '\0';
51 /* setup simulated identify data as appropriate for virtio block device
53 * ref: AT Attachment 8 - ATA/ATAPI Command Set (ATA8-ACS)
55 static inline void virtio_identify_template(struct virtio_blk_config
*bc
)
57 uint16_t *p
= &bc
->identify
[0];
58 uint64_t lba_sectors
= bc
->capacity
;
60 memset(p
, 0, sizeof(bc
->identify
));
61 put_le16(p
+ 0, 0x0); /* ATA device */
62 padstr((char *)(p
+ 23), QEMU_VERSION
, 8); /* firmware revision */
63 padstr((char *)(p
+ 27), "QEMU VIRT_BLK", 40); /* model# */
64 put_le16(p
+ 47, 0x80ff); /* max xfer 255 sectors */
65 put_le16(p
+ 49, 0x0b00); /* support IORDY/LBA/DMA */
66 put_le16(p
+ 59, 0x1ff); /* cur xfer 255 sectors */
67 put_le16(p
+ 80, 0x1f0); /* support ATA8/7/6/5/4 */
68 put_le16(p
+ 81, 0x16);
69 put_le16(p
+ 82, 0x400);
70 put_le16(p
+ 83, 0x400);
71 put_le16(p
+ 100, lba_sectors
);
72 put_le16(p
+ 101, lba_sectors
>> 16);
73 put_le16(p
+ 102, lba_sectors
>> 32);
74 put_le16(p
+ 103, lba_sectors
>> 48);
77 typedef struct VirtIOBlockReq
80 VirtQueueElement elem
;
81 struct virtio_blk_inhdr
*in
;
82 struct virtio_blk_outhdr
*out
;
83 struct virtio_scsi_inhdr
*scsi
;
85 struct VirtIOBlockReq
*next
;
88 static void virtio_blk_req_complete(VirtIOBlockReq
*req
, int status
)
90 VirtIOBlock
*s
= req
->dev
;
92 req
->in
->status
= status
;
93 virtqueue_push(s
->vq
, &req
->elem
, req
->qiov
.size
+ sizeof(*req
->in
));
94 virtio_notify(&s
->vdev
, s
->vq
);
99 static int virtio_blk_handle_write_error(VirtIOBlockReq
*req
, int error
)
101 BlockInterfaceErrorAction action
= drive_get_onerror(req
->dev
->bs
);
102 VirtIOBlock
*s
= req
->dev
;
104 if (action
== BLOCK_ERR_IGNORE
)
107 if ((error
== ENOSPC
&& action
== BLOCK_ERR_STOP_ENOSPC
)
108 || action
== BLOCK_ERR_STOP_ANY
) {
113 virtio_blk_req_complete(req
, VIRTIO_BLK_S_IOERR
);
119 static void virtio_blk_rw_complete(void *opaque
, int ret
)
121 VirtIOBlockReq
*req
= opaque
;
123 if (ret
&& (req
->out
->type
& VIRTIO_BLK_T_OUT
)) {
124 if (virtio_blk_handle_write_error(req
, -ret
))
128 virtio_blk_req_complete(req
, VIRTIO_BLK_S_OK
);
131 static VirtIOBlockReq
*virtio_blk_alloc_request(VirtIOBlock
*s
)
133 VirtIOBlockReq
*req
= qemu_mallocz(sizeof(*req
));
138 static VirtIOBlockReq
*virtio_blk_get_request(VirtIOBlock
*s
)
140 VirtIOBlockReq
*req
= virtio_blk_alloc_request(s
);
143 if (!virtqueue_pop(s
->vq
, &req
->elem
)) {
153 static void virtio_blk_handle_scsi(VirtIOBlockReq
*req
)
155 struct sg_io_hdr hdr
;
161 * We require at least one output segment each for the virtio_blk_outhdr
162 * and the SCSI command block.
164 * We also at least require the virtio_blk_inhdr, the virtio_scsi_inhdr
165 * and the sense buffer pointer in the input segments.
167 if (req
->elem
.out_num
< 2 || req
->elem
.in_num
< 3) {
168 virtio_blk_req_complete(req
, VIRTIO_BLK_S_IOERR
);
173 * No support for bidirection commands yet.
175 if (req
->elem
.out_num
> 2 && req
->elem
.in_num
> 3) {
176 virtio_blk_req_complete(req
, VIRTIO_BLK_S_UNSUPP
);
181 * The scsi inhdr is placed in the second-to-last input segment, just
182 * before the regular inhdr.
184 req
->scsi
= (void *)req
->elem
.in_sg
[req
->elem
.in_num
- 2].iov_base
;
185 size
= sizeof(*req
->in
) + sizeof(*req
->scsi
);
187 memset(&hdr
, 0, sizeof(struct sg_io_hdr
));
188 hdr
.interface_id
= 'S';
189 hdr
.cmd_len
= req
->elem
.out_sg
[1].iov_len
;
190 hdr
.cmdp
= req
->elem
.out_sg
[1].iov_base
;
193 if (req
->elem
.out_num
> 2) {
195 * If there are more than the minimally required 2 output segments
196 * there is write payload starting from the third iovec.
198 hdr
.dxfer_direction
= SG_DXFER_TO_DEV
;
199 hdr
.iovec_count
= req
->elem
.out_num
- 2;
201 for (i
= 0; i
< hdr
.iovec_count
; i
++)
202 hdr
.dxfer_len
+= req
->elem
.out_sg
[i
+ 2].iov_len
;
204 hdr
.dxferp
= req
->elem
.out_sg
+ 2;
206 } else if (req
->elem
.in_num
> 3) {
208 * If we have more than 3 input segments the guest wants to actually
211 hdr
.dxfer_direction
= SG_DXFER_FROM_DEV
;
212 hdr
.iovec_count
= req
->elem
.in_num
- 3;
213 for (i
= 0; i
< hdr
.iovec_count
; i
++)
214 hdr
.dxfer_len
+= req
->elem
.in_sg
[i
].iov_len
;
216 hdr
.dxferp
= req
->elem
.in_sg
;
217 size
+= hdr
.dxfer_len
;
220 * Some SCSI commands don't actually transfer any data.
222 hdr
.dxfer_direction
= SG_DXFER_NONE
;
225 hdr
.sbp
= req
->elem
.in_sg
[req
->elem
.in_num
- 3].iov_base
;
226 hdr
.mx_sb_len
= req
->elem
.in_sg
[req
->elem
.in_num
- 3].iov_len
;
227 size
+= hdr
.mx_sb_len
;
229 ret
= bdrv_ioctl(req
->dev
->bs
, SG_IO
, &hdr
);
231 status
= VIRTIO_BLK_S_UNSUPP
;
233 hdr
.resid
= hdr
.dxfer_len
;
234 } else if (hdr
.status
) {
235 status
= VIRTIO_BLK_S_IOERR
;
237 status
= VIRTIO_BLK_S_OK
;
240 req
->scsi
->errors
= hdr
.status
;
241 req
->scsi
->residual
= hdr
.resid
;
242 req
->scsi
->sense_len
= hdr
.sb_len_wr
;
243 req
->scsi
->data_len
= hdr
.dxfer_len
;
245 virtio_blk_req_complete(req
, status
);
248 static void virtio_blk_handle_scsi(VirtIOBlockReq
*req
)
250 virtio_blk_req_complete(req
, VIRTIO_BLK_S_UNSUPP
);
252 #endif /* __linux__ */
254 static void virtio_blk_handle_write(VirtIOBlockReq
*req
)
256 bdrv_aio_writev(req
->dev
->bs
, req
->out
->sector
, &req
->qiov
,
257 req
->qiov
.size
/ 512, virtio_blk_rw_complete
, req
);
260 static void virtio_blk_handle_read(VirtIOBlockReq
*req
)
262 bdrv_aio_readv(req
->dev
->bs
, req
->out
->sector
, &req
->qiov
,
263 req
->qiov
.size
/ 512, virtio_blk_rw_complete
, req
);
266 static void virtio_blk_handle_output(VirtIODevice
*vdev
, VirtQueue
*vq
)
268 VirtIOBlock
*s
= to_virtio_blk(vdev
);
271 while ((req
= virtio_blk_get_request(s
))) {
272 if (req
->elem
.out_num
< 1 || req
->elem
.in_num
< 1) {
273 fprintf(stderr
, "virtio-blk missing headers\n");
277 if (req
->elem
.out_sg
[0].iov_len
< sizeof(*req
->out
) ||
278 req
->elem
.in_sg
[req
->elem
.in_num
- 1].iov_len
< sizeof(*req
->in
)) {
279 fprintf(stderr
, "virtio-blk header not in correct element\n");
283 req
->out
= (void *)req
->elem
.out_sg
[0].iov_base
;
284 req
->in
= (void *)req
->elem
.in_sg
[req
->elem
.in_num
- 1].iov_base
;
286 if (req
->out
->type
& VIRTIO_BLK_T_SCSI_CMD
) {
287 virtio_blk_handle_scsi(req
);
288 } else if (req
->out
->type
& VIRTIO_BLK_T_OUT
) {
289 qemu_iovec_init_external(&req
->qiov
, &req
->elem
.out_sg
[1],
290 req
->elem
.out_num
- 1);
291 virtio_blk_handle_write(req
);
293 qemu_iovec_init_external(&req
->qiov
, &req
->elem
.in_sg
[0],
294 req
->elem
.in_num
- 1);
295 virtio_blk_handle_read(req
);
299 * FIXME: Want to check for completions before returning to guest mode,
300 * so cached reads and writes are reported as quickly as possible. But
301 * that should be done in the generic block layer.
305 static void virtio_blk_dma_restart_cb(void *opaque
, int running
, int reason
)
307 VirtIOBlock
*s
= opaque
;
308 VirtIOBlockReq
*req
= s
->rq
;
316 virtio_blk_handle_write(req
);
321 static void virtio_blk_reset(VirtIODevice
*vdev
)
324 * This should cancel pending requests, but can't do nicely until there
325 * are per-device request lists.
330 /* coalesce internal state, copy to pci i/o region 0
332 static void virtio_blk_update_config(VirtIODevice
*vdev
, uint8_t *config
)
334 VirtIOBlock
*s
= to_virtio_blk(vdev
);
335 struct virtio_blk_config blkcfg
;
337 int cylinders
, heads
, secs
;
339 bdrv_get_geometry(s
->bs
, &capacity
);
340 bdrv_get_geometry_hint(s
->bs
, &cylinders
, &heads
, &secs
);
341 memset(&blkcfg
, 0, sizeof(blkcfg
));
342 stq_raw(&blkcfg
.capacity
, capacity
);
343 stl_raw(&blkcfg
.seg_max
, 128 - 2);
344 stw_raw(&blkcfg
.cylinders
, cylinders
);
345 blkcfg
.heads
= heads
;
346 blkcfg
.sectors
= secs
;
348 virtio_identify_template(&blkcfg
);
349 memcpy(&blkcfg
.identify
[VIRTIO_BLK_ID_SN
], s
->serial_str
,
350 VIRTIO_BLK_ID_SN_BYTES
);
351 memcpy(config
, &blkcfg
, sizeof(blkcfg
));
354 static uint32_t virtio_blk_get_features(VirtIODevice
*vdev
)
356 VirtIOBlock
*s
= to_virtio_blk(vdev
);
357 uint32_t features
= 0;
359 features
|= (1 << VIRTIO_BLK_F_SEG_MAX
);
360 features
|= (1 << VIRTIO_BLK_F_GEOMETRY
);
362 features
|= (1 << VIRTIO_BLK_F_SCSI
);
364 if (strcmp(s
->serial_str
, "0"))
365 features
|= 1 << VIRTIO_BLK_F_IDENTIFY
;
370 static void virtio_blk_save(QEMUFile
*f
, void *opaque
)
372 VirtIOBlock
*s
= opaque
;
373 VirtIOBlockReq
*req
= s
->rq
;
375 virtio_save(&s
->vdev
, f
);
378 qemu_put_sbyte(f
, 1);
379 qemu_put_buffer(f
, (unsigned char*)&req
->elem
, sizeof(req
->elem
));
382 qemu_put_sbyte(f
, 0);
385 static int virtio_blk_load(QEMUFile
*f
, void *opaque
, int version_id
)
387 VirtIOBlock
*s
= opaque
;
392 virtio_load(&s
->vdev
, f
);
393 while (qemu_get_sbyte(f
)) {
394 VirtIOBlockReq
*req
= virtio_blk_alloc_request(s
);
395 qemu_get_buffer(f
, (unsigned char*)&req
->elem
, sizeof(req
->elem
));
403 VirtIODevice
*virtio_blk_init(DeviceState
*dev
)
406 int cylinders
, heads
, secs
;
407 static int virtio_blk_id
;
408 BlockDriverState
*bs
;
411 s
= (VirtIOBlock
*)virtio_common_init("virtio-blk", VIRTIO_ID_BLOCK
,
412 sizeof(struct virtio_blk_config
),
413 sizeof(VirtIOBlock
));
415 bs
= qdev_init_bdrv(dev
, IF_VIRTIO
);
416 s
->vdev
.get_config
= virtio_blk_update_config
;
417 s
->vdev
.get_features
= virtio_blk_get_features
;
418 s
->vdev
.reset
= virtio_blk_reset
;
421 if (strlen(ps
= (char *)drive_get_serial(bs
)))
422 strncpy(s
->serial_str
, ps
, sizeof(s
->serial_str
));
424 snprintf(s
->serial_str
, sizeof(s
->serial_str
), "0");
426 bdrv_guess_geometry(s
->bs
, &cylinders
, &heads
, &secs
);
427 bdrv_set_geometry_hint(s
->bs
, cylinders
, heads
, secs
);
429 s
->vq
= virtio_add_queue(&s
->vdev
, 128, virtio_blk_handle_output
);
431 qemu_add_vm_change_state_handler(virtio_blk_dma_restart_cb
, s
);
432 register_savevm("virtio-blk", virtio_blk_id
++, 2,
433 virtio_blk_save
, virtio_blk_load
, s
);