Convert machine registration to use module init functions
[qemu.git] / hw / virtio-blk.c
blob8dd3c7ac52d9719d3a4382f4a6720cf6316ef363
1 /*
2 * Virtio Block Device
4 * Copyright IBM, Corp. 2007
6 * Authors:
7 * Anthony Liguori <aliguori@us.ibm.com>
9 * This work is licensed under the terms of the GNU GPL, version 2. See
10 * the COPYING file in the top-level directory.
14 #include <qemu-common.h>
15 #include <sysemu.h>
16 #include "virtio-blk.h"
17 #include "block_int.h"
18 #ifdef __linux__
19 # include <scsi/sg.h>
20 #endif
22 typedef struct VirtIOBlock
24 VirtIODevice vdev;
25 BlockDriverState *bs;
26 VirtQueue *vq;
27 void *rq;
28 } VirtIOBlock;
30 static VirtIOBlock *to_virtio_blk(VirtIODevice *vdev)
32 return (VirtIOBlock *)vdev;
35 typedef struct VirtIOBlockReq
37 VirtIOBlock *dev;
38 VirtQueueElement elem;
39 struct virtio_blk_inhdr *in;
40 struct virtio_blk_outhdr *out;
41 struct virtio_scsi_inhdr *scsi;
42 QEMUIOVector qiov;
43 struct VirtIOBlockReq *next;
44 } VirtIOBlockReq;
46 static void virtio_blk_req_complete(VirtIOBlockReq *req, int status)
48 VirtIOBlock *s = req->dev;
50 req->in->status = status;
51 virtqueue_push(s->vq, &req->elem, req->qiov.size + sizeof(*req->in));
52 virtio_notify(&s->vdev, s->vq);
54 qemu_free(req);
57 static int virtio_blk_handle_write_error(VirtIOBlockReq *req, int error)
59 BlockInterfaceErrorAction action = drive_get_onerror(req->dev->bs);
60 VirtIOBlock *s = req->dev;
62 if (action == BLOCK_ERR_IGNORE)
63 return 0;
65 if ((error == ENOSPC && action == BLOCK_ERR_STOP_ENOSPC)
66 || action == BLOCK_ERR_STOP_ANY) {
67 req->next = s->rq;
68 s->rq = req;
69 vm_stop(0);
70 } else {
71 virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR);
74 return 1;
77 static void virtio_blk_rw_complete(void *opaque, int ret)
79 VirtIOBlockReq *req = opaque;
81 if (ret && (req->out->type & VIRTIO_BLK_T_OUT)) {
82 if (virtio_blk_handle_write_error(req, -ret))
83 return;
86 virtio_blk_req_complete(req, VIRTIO_BLK_S_OK);
89 static VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s)
91 VirtIOBlockReq *req = qemu_mallocz(sizeof(*req));
92 req->dev = s;
93 return req;
96 static VirtIOBlockReq *virtio_blk_get_request(VirtIOBlock *s)
98 VirtIOBlockReq *req = virtio_blk_alloc_request(s);
100 if (req != NULL) {
101 if (!virtqueue_pop(s->vq, &req->elem)) {
102 qemu_free(req);
103 return NULL;
107 return req;
110 #ifdef __linux__
111 static void virtio_blk_handle_scsi(VirtIOBlockReq *req)
113 struct sg_io_hdr hdr;
114 int ret, size = 0;
115 int status;
116 int i;
119 * We require at least one output segment each for the virtio_blk_outhdr
120 * and the SCSI command block.
122 * We also at least require the virtio_blk_inhdr, the virtio_scsi_inhdr
123 * and the sense buffer pointer in the input segments.
125 if (req->elem.out_num < 2 || req->elem.in_num < 3) {
126 virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR);
127 return;
131 * No support for bidirection commands yet.
133 if (req->elem.out_num > 2 && req->elem.in_num > 3) {
134 virtio_blk_req_complete(req, VIRTIO_BLK_S_UNSUPP);
135 return;
139 * The scsi inhdr is placed in the second-to-last input segment, just
140 * before the regular inhdr.
142 req->scsi = (void *)req->elem.in_sg[req->elem.in_num - 2].iov_base;
143 size = sizeof(*req->in) + sizeof(*req->scsi);
145 memset(&hdr, 0, sizeof(struct sg_io_hdr));
146 hdr.interface_id = 'S';
147 hdr.cmd_len = req->elem.out_sg[1].iov_len;
148 hdr.cmdp = req->elem.out_sg[1].iov_base;
149 hdr.dxfer_len = 0;
151 if (req->elem.out_num > 2) {
153 * If there are more than the minimally required 2 output segments
154 * there is write payload starting from the third iovec.
156 hdr.dxfer_direction = SG_DXFER_TO_DEV;
157 hdr.iovec_count = req->elem.out_num - 2;
159 for (i = 0; i < hdr.iovec_count; i++)
160 hdr.dxfer_len += req->elem.out_sg[i + 2].iov_len;
162 hdr.dxferp = req->elem.out_sg + 2;
164 } else if (req->elem.in_num > 3) {
166 * If we have more than 3 input segments the guest wants to actually
167 * read data.
169 hdr.dxfer_direction = SG_DXFER_FROM_DEV;
170 hdr.iovec_count = req->elem.in_num - 3;
171 for (i = 0; i < hdr.iovec_count; i++)
172 hdr.dxfer_len += req->elem.in_sg[i].iov_len;
174 hdr.dxferp = req->elem.in_sg;
175 size += hdr.dxfer_len;
176 } else {
178 * Some SCSI commands don't actually transfer any data.
180 hdr.dxfer_direction = SG_DXFER_NONE;
183 hdr.sbp = req->elem.in_sg[req->elem.in_num - 3].iov_base;
184 hdr.mx_sb_len = req->elem.in_sg[req->elem.in_num - 3].iov_len;
185 size += hdr.mx_sb_len;
187 ret = bdrv_ioctl(req->dev->bs, SG_IO, &hdr);
188 if (ret) {
189 status = VIRTIO_BLK_S_UNSUPP;
190 hdr.status = ret;
191 hdr.resid = hdr.dxfer_len;
192 } else if (hdr.status) {
193 status = VIRTIO_BLK_S_IOERR;
194 } else {
195 status = VIRTIO_BLK_S_OK;
198 req->scsi->errors = hdr.status;
199 req->scsi->residual = hdr.resid;
200 req->scsi->sense_len = hdr.sb_len_wr;
201 req->scsi->data_len = hdr.dxfer_len;
203 virtio_blk_req_complete(req, status);
205 #else
206 static void virtio_blk_handle_scsi(VirtIOBlockReq *req)
208 virtio_blk_req_complete(req, VIRTIO_BLK_S_UNSUPP);
210 #endif /* __linux__ */
212 static void virtio_blk_handle_write(VirtIOBlockReq *req)
214 bdrv_aio_writev(req->dev->bs, req->out->sector, &req->qiov,
215 req->qiov.size / 512, virtio_blk_rw_complete, req);
218 static void virtio_blk_handle_read(VirtIOBlockReq *req)
220 bdrv_aio_readv(req->dev->bs, req->out->sector, &req->qiov,
221 req->qiov.size / 512, virtio_blk_rw_complete, req);
224 static void virtio_blk_handle_output(VirtIODevice *vdev, VirtQueue *vq)
226 VirtIOBlock *s = to_virtio_blk(vdev);
227 VirtIOBlockReq *req;
229 while ((req = virtio_blk_get_request(s))) {
230 if (req->elem.out_num < 1 || req->elem.in_num < 1) {
231 fprintf(stderr, "virtio-blk missing headers\n");
232 exit(1);
235 if (req->elem.out_sg[0].iov_len < sizeof(*req->out) ||
236 req->elem.in_sg[req->elem.in_num - 1].iov_len < sizeof(*req->in)) {
237 fprintf(stderr, "virtio-blk header not in correct element\n");
238 exit(1);
241 req->out = (void *)req->elem.out_sg[0].iov_base;
242 req->in = (void *)req->elem.in_sg[req->elem.in_num - 1].iov_base;
244 if (req->out->type & VIRTIO_BLK_T_SCSI_CMD) {
245 virtio_blk_handle_scsi(req);
246 } else if (req->out->type & VIRTIO_BLK_T_OUT) {
247 qemu_iovec_init_external(&req->qiov, &req->elem.out_sg[1],
248 req->elem.out_num - 1);
249 virtio_blk_handle_write(req);
250 } else {
251 qemu_iovec_init_external(&req->qiov, &req->elem.in_sg[0],
252 req->elem.in_num - 1);
253 virtio_blk_handle_read(req);
257 * FIXME: Want to check for completions before returning to guest mode,
258 * so cached reads and writes are reported as quickly as possible. But
259 * that should be done in the generic block layer.
263 static void virtio_blk_dma_restart_cb(void *opaque, int running, int reason)
265 VirtIOBlock *s = opaque;
266 VirtIOBlockReq *req = s->rq;
268 if (!running)
269 return;
271 s->rq = NULL;
273 while (req) {
274 virtio_blk_handle_write(req);
275 req = req->next;
279 static void virtio_blk_reset(VirtIODevice *vdev)
282 * This should cancel pending requests, but can't do nicely until there
283 * are per-device request lists.
285 qemu_aio_flush();
288 static void virtio_blk_update_config(VirtIODevice *vdev, uint8_t *config)
290 VirtIOBlock *s = to_virtio_blk(vdev);
291 struct virtio_blk_config blkcfg;
292 uint64_t capacity;
293 int cylinders, heads, secs;
295 bdrv_get_geometry(s->bs, &capacity);
296 bdrv_get_geometry_hint(s->bs, &cylinders, &heads, &secs);
297 stq_raw(&blkcfg.capacity, capacity);
298 stl_raw(&blkcfg.seg_max, 128 - 2);
299 stw_raw(&blkcfg.cylinders, cylinders);
300 blkcfg.heads = heads;
301 blkcfg.sectors = secs;
302 memcpy(config, &blkcfg, sizeof(blkcfg));
305 static uint32_t virtio_blk_get_features(VirtIODevice *vdev)
307 uint32_t features = 0;
309 features |= (1 << VIRTIO_BLK_F_SEG_MAX);
310 features |= (1 << VIRTIO_BLK_F_GEOMETRY);
311 #ifdef __linux__
312 features |= (1 << VIRTIO_BLK_F_SCSI);
313 #endif
315 return features;
318 static void virtio_blk_save(QEMUFile *f, void *opaque)
320 VirtIOBlock *s = opaque;
321 VirtIOBlockReq *req = s->rq;
323 virtio_save(&s->vdev, f);
325 while (req) {
326 qemu_put_sbyte(f, 1);
327 qemu_put_buffer(f, (unsigned char*)&req->elem, sizeof(req->elem));
328 req = req->next;
330 qemu_put_sbyte(f, 0);
333 static int virtio_blk_load(QEMUFile *f, void *opaque, int version_id)
335 VirtIOBlock *s = opaque;
337 if (version_id != 2)
338 return -EINVAL;
340 virtio_load(&s->vdev, f);
341 while (qemu_get_sbyte(f)) {
342 VirtIOBlockReq *req = virtio_blk_alloc_request(s);
343 qemu_get_buffer(f, (unsigned char*)&req->elem, sizeof(req->elem));
344 req->next = s->rq;
345 s->rq = req->next;
348 return 0;
351 VirtIODevice *virtio_blk_init(DeviceState *dev)
353 VirtIOBlock *s;
354 int cylinders, heads, secs;
355 static int virtio_blk_id;
356 BlockDriverState *bs;
358 s = (VirtIOBlock *)virtio_common_init("virtio-blk", VIRTIO_ID_BLOCK,
359 sizeof(struct virtio_blk_config),
360 sizeof(VirtIOBlock));
362 bs = qdev_init_bdrv(dev, IF_VIRTIO);
363 s->vdev.get_config = virtio_blk_update_config;
364 s->vdev.get_features = virtio_blk_get_features;
365 s->vdev.reset = virtio_blk_reset;
366 s->bs = bs;
367 s->rq = NULL;
368 bs->private = dev;
369 bdrv_guess_geometry(s->bs, &cylinders, &heads, &secs);
370 bdrv_set_geometry_hint(s->bs, cylinders, heads, secs);
372 s->vq = virtio_add_queue(&s->vdev, 128, virtio_blk_handle_output);
374 qemu_add_vm_change_state_handler(virtio_blk_dma_restart_cb, s);
375 register_savevm("virtio-blk", virtio_blk_id++, 2,
376 virtio_blk_save, virtio_blk_load, s);
378 return &s->vdev;