vfio/ccw/pci: Allow devices to opt-in for ballooning
[qemu/ar7.git] / hw / vfio / ccw.c
blobe96bbdc78b48eafcf1ed04756af1e4a29753d4cc
1 /*
2 * vfio based subchannel assignment support
4 * Copyright 2017 IBM Corp.
5 * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
6 * Xiao Feng Ren <renxiaof@linux.vnet.ibm.com>
7 * Pierre Morel <pmorel@linux.vnet.ibm.com>
9 * This work is licensed under the terms of the GNU GPL, version 2 or (at
10 * your option) any later version. See the COPYING file in the top-level
11 * directory.
14 #include "qemu/osdep.h"
15 #include <linux/vfio.h>
16 #include <linux/vfio_ccw.h>
17 #include <sys/ioctl.h>
19 #include "qapi/error.h"
20 #include "hw/sysbus.h"
21 #include "hw/vfio/vfio.h"
22 #include "hw/vfio/vfio-common.h"
23 #include "hw/s390x/s390-ccw.h"
24 #include "hw/s390x/ccw-device.h"
25 #include "exec/address-spaces.h"
26 #include "qemu/error-report.h"
28 #define TYPE_VFIO_CCW "vfio-ccw"
29 typedef struct VFIOCCWDevice {
30 S390CCWDevice cdev;
31 VFIODevice vdev;
32 uint64_t io_region_size;
33 uint64_t io_region_offset;
34 struct ccw_io_region *io_region;
35 EventNotifier io_notifier;
36 bool force_orb_pfch;
37 bool warned_orb_pfch;
38 } VFIOCCWDevice;
40 static inline void warn_once(bool *warned, const char *fmt, ...)
42 va_list ap;
44 if (!warned || *warned) {
45 return;
47 *warned = true;
48 va_start(ap, fmt);
49 warn_vreport(fmt, ap);
50 va_end(ap);
53 static inline void warn_once_pfch(VFIOCCWDevice *vcdev, SubchDev *sch,
54 const char *msg)
56 warn_once(&vcdev->warned_orb_pfch, "vfio-ccw (devno %x.%x.%04x): %s",
57 sch->cssid, sch->ssid, sch->devno, msg);
60 static void vfio_ccw_compute_needs_reset(VFIODevice *vdev)
62 vdev->needs_reset = false;
66 * We don't need vfio_hot_reset_multi and vfio_eoi operations for
67 * vfio_ccw device now.
69 struct VFIODeviceOps vfio_ccw_ops = {
70 .vfio_compute_needs_reset = vfio_ccw_compute_needs_reset,
73 static IOInstEnding vfio_ccw_handle_request(SubchDev *sch)
75 S390CCWDevice *cdev = sch->driver_data;
76 VFIOCCWDevice *vcdev = DO_UPCAST(VFIOCCWDevice, cdev, cdev);
77 struct ccw_io_region *region = vcdev->io_region;
78 int ret;
80 if (!(sch->orb.ctrl0 & ORB_CTRL0_MASK_PFCH)) {
81 if (!(vcdev->force_orb_pfch)) {
82 warn_once_pfch(vcdev, sch, "requires PFCH flag set");
83 sch_gen_unit_exception(sch);
84 css_inject_io_interrupt(sch);
85 return IOINST_CC_EXPECTED;
86 } else {
87 sch->orb.ctrl0 |= ORB_CTRL0_MASK_PFCH;
88 warn_once_pfch(vcdev, sch, "PFCH flag forced");
92 QEMU_BUILD_BUG_ON(sizeof(region->orb_area) != sizeof(ORB));
93 QEMU_BUILD_BUG_ON(sizeof(region->scsw_area) != sizeof(SCSW));
94 QEMU_BUILD_BUG_ON(sizeof(region->irb_area) != sizeof(IRB));
96 memset(region, 0, sizeof(*region));
98 memcpy(region->orb_area, &sch->orb, sizeof(ORB));
99 memcpy(region->scsw_area, &sch->curr_status.scsw, sizeof(SCSW));
101 again:
102 ret = pwrite(vcdev->vdev.fd, region,
103 vcdev->io_region_size, vcdev->io_region_offset);
104 if (ret != vcdev->io_region_size) {
105 if (errno == EAGAIN) {
106 goto again;
108 error_report("vfio-ccw: wirte I/O region failed with errno=%d", errno);
109 ret = -errno;
110 } else {
111 ret = region->ret_code;
113 switch (ret) {
114 case 0:
115 return IOINST_CC_EXPECTED;
116 case -EBUSY:
117 return IOINST_CC_BUSY;
118 case -ENODEV:
119 case -EACCES:
120 return IOINST_CC_NOT_OPERATIONAL;
121 case -EFAULT:
122 default:
123 sch_gen_unit_exception(sch);
124 css_inject_io_interrupt(sch);
125 return IOINST_CC_EXPECTED;
129 static void vfio_ccw_reset(DeviceState *dev)
131 CcwDevice *ccw_dev = DO_UPCAST(CcwDevice, parent_obj, dev);
132 S390CCWDevice *cdev = DO_UPCAST(S390CCWDevice, parent_obj, ccw_dev);
133 VFIOCCWDevice *vcdev = DO_UPCAST(VFIOCCWDevice, cdev, cdev);
135 ioctl(vcdev->vdev.fd, VFIO_DEVICE_RESET);
138 static void vfio_ccw_io_notifier_handler(void *opaque)
140 VFIOCCWDevice *vcdev = opaque;
141 struct ccw_io_region *region = vcdev->io_region;
142 S390CCWDevice *cdev = S390_CCW_DEVICE(vcdev);
143 CcwDevice *ccw_dev = CCW_DEVICE(cdev);
144 SubchDev *sch = ccw_dev->sch;
145 SCSW *s = &sch->curr_status.scsw;
146 PMCW *p = &sch->curr_status.pmcw;
147 IRB irb;
148 int size;
150 if (!event_notifier_test_and_clear(&vcdev->io_notifier)) {
151 return;
154 size = pread(vcdev->vdev.fd, region, vcdev->io_region_size,
155 vcdev->io_region_offset);
156 if (size == -1) {
157 switch (errno) {
158 case ENODEV:
159 /* Generate a deferred cc 3 condition. */
160 s->flags |= SCSW_FLAGS_MASK_CC;
161 s->ctrl &= ~SCSW_CTRL_MASK_STCTL;
162 s->ctrl |= (SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND);
163 goto read_err;
164 case EFAULT:
165 /* Memory problem, generate channel data check. */
166 s->ctrl &= ~SCSW_ACTL_START_PEND;
167 s->cstat = SCSW_CSTAT_DATA_CHECK;
168 s->ctrl &= ~SCSW_CTRL_MASK_STCTL;
169 s->ctrl |= SCSW_STCTL_PRIMARY | SCSW_STCTL_SECONDARY |
170 SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND;
171 goto read_err;
172 default:
173 /* Error, generate channel program check. */
174 s->ctrl &= ~SCSW_ACTL_START_PEND;
175 s->cstat = SCSW_CSTAT_PROG_CHECK;
176 s->ctrl &= ~SCSW_CTRL_MASK_STCTL;
177 s->ctrl |= SCSW_STCTL_PRIMARY | SCSW_STCTL_SECONDARY |
178 SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND;
179 goto read_err;
181 } else if (size != vcdev->io_region_size) {
182 /* Information transfer error, generate channel-control check. */
183 s->ctrl &= ~SCSW_ACTL_START_PEND;
184 s->cstat = SCSW_CSTAT_CHN_CTRL_CHK;
185 s->ctrl &= ~SCSW_CTRL_MASK_STCTL;
186 s->ctrl |= SCSW_STCTL_PRIMARY | SCSW_STCTL_SECONDARY |
187 SCSW_STCTL_ALERT | SCSW_STCTL_STATUS_PEND;
188 goto read_err;
191 memcpy(&irb, region->irb_area, sizeof(IRB));
193 /* Update control block via irb. */
194 copy_scsw_to_guest(s, &irb.scsw);
196 /* If a uint check is pending, copy sense data. */
197 if ((s->dstat & SCSW_DSTAT_UNIT_CHECK) &&
198 (p->chars & PMCW_CHARS_MASK_CSENSE)) {
199 memcpy(sch->sense_data, irb.ecw, sizeof(irb.ecw));
202 read_err:
203 css_inject_io_interrupt(sch);
206 static void vfio_ccw_register_io_notifier(VFIOCCWDevice *vcdev, Error **errp)
208 VFIODevice *vdev = &vcdev->vdev;
209 struct vfio_irq_info *irq_info;
210 struct vfio_irq_set *irq_set;
211 size_t argsz;
212 int32_t *pfd;
214 if (vdev->num_irqs < VFIO_CCW_IO_IRQ_INDEX + 1) {
215 error_setg(errp, "vfio: unexpected number of io irqs %u",
216 vdev->num_irqs);
217 return;
220 argsz = sizeof(*irq_info);
221 irq_info = g_malloc0(argsz);
222 irq_info->index = VFIO_CCW_IO_IRQ_INDEX;
223 irq_info->argsz = argsz;
224 if (ioctl(vdev->fd, VFIO_DEVICE_GET_IRQ_INFO,
225 irq_info) < 0 || irq_info->count < 1) {
226 error_setg_errno(errp, errno, "vfio: Error getting irq info");
227 goto out_free_info;
230 if (event_notifier_init(&vcdev->io_notifier, 0)) {
231 error_setg_errno(errp, errno,
232 "vfio: Unable to init event notifier for IO");
233 goto out_free_info;
236 argsz = sizeof(*irq_set) + sizeof(*pfd);
237 irq_set = g_malloc0(argsz);
238 irq_set->argsz = argsz;
239 irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD |
240 VFIO_IRQ_SET_ACTION_TRIGGER;
241 irq_set->index = VFIO_CCW_IO_IRQ_INDEX;
242 irq_set->start = 0;
243 irq_set->count = 1;
244 pfd = (int32_t *) &irq_set->data;
246 *pfd = event_notifier_get_fd(&vcdev->io_notifier);
247 qemu_set_fd_handler(*pfd, vfio_ccw_io_notifier_handler, NULL, vcdev);
248 if (ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, irq_set)) {
249 error_setg(errp, "vfio: Failed to set up io notification");
250 qemu_set_fd_handler(*pfd, NULL, NULL, vcdev);
251 event_notifier_cleanup(&vcdev->io_notifier);
254 g_free(irq_set);
256 out_free_info:
257 g_free(irq_info);
260 static void vfio_ccw_unregister_io_notifier(VFIOCCWDevice *vcdev)
262 struct vfio_irq_set *irq_set;
263 size_t argsz;
264 int32_t *pfd;
266 argsz = sizeof(*irq_set) + sizeof(*pfd);
267 irq_set = g_malloc0(argsz);
268 irq_set->argsz = argsz;
269 irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD |
270 VFIO_IRQ_SET_ACTION_TRIGGER;
271 irq_set->index = VFIO_CCW_IO_IRQ_INDEX;
272 irq_set->start = 0;
273 irq_set->count = 1;
274 pfd = (int32_t *) &irq_set->data;
275 *pfd = -1;
277 if (ioctl(vcdev->vdev.fd, VFIO_DEVICE_SET_IRQS, irq_set)) {
278 error_report("vfio: Failed to de-assign device io fd: %m");
281 qemu_set_fd_handler(event_notifier_get_fd(&vcdev->io_notifier),
282 NULL, NULL, vcdev);
283 event_notifier_cleanup(&vcdev->io_notifier);
285 g_free(irq_set);
288 static void vfio_ccw_get_region(VFIOCCWDevice *vcdev, Error **errp)
290 VFIODevice *vdev = &vcdev->vdev;
291 struct vfio_region_info *info;
292 int ret;
294 /* Sanity check device */
295 if (!(vdev->flags & VFIO_DEVICE_FLAGS_CCW)) {
296 error_setg(errp, "vfio: Um, this isn't a vfio-ccw device");
297 return;
300 if (vdev->num_regions < VFIO_CCW_CONFIG_REGION_INDEX + 1) {
301 error_setg(errp, "vfio: Unexpected number of the I/O region %u",
302 vdev->num_regions);
303 return;
306 ret = vfio_get_region_info(vdev, VFIO_CCW_CONFIG_REGION_INDEX, &info);
307 if (ret) {
308 error_setg_errno(errp, -ret, "vfio: Error getting config info");
309 return;
312 vcdev->io_region_size = info->size;
313 if (sizeof(*vcdev->io_region) != vcdev->io_region_size) {
314 error_setg(errp, "vfio: Unexpected size of the I/O region");
315 g_free(info);
316 return;
319 vcdev->io_region_offset = info->offset;
320 vcdev->io_region = g_malloc0(info->size);
322 g_free(info);
325 static void vfio_ccw_put_region(VFIOCCWDevice *vcdev)
327 g_free(vcdev->io_region);
330 static void vfio_ccw_put_device(VFIOCCWDevice *vcdev)
332 g_free(vcdev->vdev.name);
333 vfio_put_base_device(&vcdev->vdev);
336 static void vfio_ccw_get_device(VFIOGroup *group, VFIOCCWDevice *vcdev,
337 Error **errp)
339 char *name = g_strdup_printf("%x.%x.%04x", vcdev->cdev.hostid.cssid,
340 vcdev->cdev.hostid.ssid,
341 vcdev->cdev.hostid.devid);
342 VFIODevice *vbasedev;
344 QLIST_FOREACH(vbasedev, &group->device_list, next) {
345 if (strcmp(vbasedev->name, name) == 0) {
346 error_setg(errp, "vfio: subchannel %s has already been attached",
347 name);
348 goto out_err;
353 * All vfio-ccw devices are believed to operate in a way compatible with
354 * memory ballooning, ie. pages pinned in the host are in the current
355 * working set of the guest driver and therefore never overlap with pages
356 * available to the guest balloon driver. This needs to be set before
357 * vfio_get_device() for vfio common to handle the balloon inhibitor.
359 vcdev->vdev.balloon_allowed = true;
361 if (vfio_get_device(group, vcdev->cdev.mdevid, &vcdev->vdev, errp)) {
362 goto out_err;
365 vcdev->vdev.ops = &vfio_ccw_ops;
366 vcdev->vdev.type = VFIO_DEVICE_TYPE_CCW;
367 vcdev->vdev.name = name;
368 vcdev->vdev.dev = &vcdev->cdev.parent_obj.parent_obj;
370 return;
372 out_err:
373 g_free(name);
376 static VFIOGroup *vfio_ccw_get_group(S390CCWDevice *cdev, Error **errp)
378 char *tmp, group_path[PATH_MAX];
379 ssize_t len;
380 int groupid;
382 tmp = g_strdup_printf("/sys/bus/css/devices/%x.%x.%04x/%s/iommu_group",
383 cdev->hostid.cssid, cdev->hostid.ssid,
384 cdev->hostid.devid, cdev->mdevid);
385 len = readlink(tmp, group_path, sizeof(group_path));
386 g_free(tmp);
388 if (len <= 0 || len >= sizeof(group_path)) {
389 error_setg(errp, "vfio: no iommu_group found");
390 return NULL;
393 group_path[len] = 0;
395 if (sscanf(basename(group_path), "%d", &groupid) != 1) {
396 error_setg(errp, "vfio: failed to read %s", group_path);
397 return NULL;
400 return vfio_get_group(groupid, &address_space_memory, errp);
403 static void vfio_ccw_realize(DeviceState *dev, Error **errp)
405 VFIOGroup *group;
406 CcwDevice *ccw_dev = DO_UPCAST(CcwDevice, parent_obj, dev);
407 S390CCWDevice *cdev = DO_UPCAST(S390CCWDevice, parent_obj, ccw_dev);
408 VFIOCCWDevice *vcdev = DO_UPCAST(VFIOCCWDevice, cdev, cdev);
409 S390CCWDeviceClass *cdc = S390_CCW_DEVICE_GET_CLASS(cdev);
410 Error *err = NULL;
412 /* Call the class init function for subchannel. */
413 if (cdc->realize) {
414 cdc->realize(cdev, vcdev->vdev.sysfsdev, &err);
415 if (err) {
416 goto out_err_propagate;
420 group = vfio_ccw_get_group(cdev, &err);
421 if (!group) {
422 goto out_group_err;
425 vfio_ccw_get_device(group, vcdev, &err);
426 if (err) {
427 goto out_device_err;
430 vfio_ccw_get_region(vcdev, &err);
431 if (err) {
432 goto out_region_err;
435 vfio_ccw_register_io_notifier(vcdev, &err);
436 if (err) {
437 goto out_notifier_err;
440 return;
442 out_notifier_err:
443 vfio_ccw_put_region(vcdev);
444 out_region_err:
445 vfio_ccw_put_device(vcdev);
446 out_device_err:
447 vfio_put_group(group);
448 out_group_err:
449 if (cdc->unrealize) {
450 cdc->unrealize(cdev, NULL);
452 out_err_propagate:
453 error_propagate(errp, err);
456 static void vfio_ccw_unrealize(DeviceState *dev, Error **errp)
458 CcwDevice *ccw_dev = DO_UPCAST(CcwDevice, parent_obj, dev);
459 S390CCWDevice *cdev = DO_UPCAST(S390CCWDevice, parent_obj, ccw_dev);
460 VFIOCCWDevice *vcdev = DO_UPCAST(VFIOCCWDevice, cdev, cdev);
461 S390CCWDeviceClass *cdc = S390_CCW_DEVICE_GET_CLASS(cdev);
462 VFIOGroup *group = vcdev->vdev.group;
464 vfio_ccw_unregister_io_notifier(vcdev);
465 vfio_ccw_put_region(vcdev);
466 vfio_ccw_put_device(vcdev);
467 vfio_put_group(group);
469 if (cdc->unrealize) {
470 cdc->unrealize(cdev, errp);
474 static Property vfio_ccw_properties[] = {
475 DEFINE_PROP_STRING("sysfsdev", VFIOCCWDevice, vdev.sysfsdev),
476 DEFINE_PROP_BOOL("force-orb-pfch", VFIOCCWDevice, force_orb_pfch, false),
477 DEFINE_PROP_END_OF_LIST(),
480 static const VMStateDescription vfio_ccw_vmstate = {
481 .name = TYPE_VFIO_CCW,
482 .unmigratable = 1,
485 static void vfio_ccw_class_init(ObjectClass *klass, void *data)
487 DeviceClass *dc = DEVICE_CLASS(klass);
488 S390CCWDeviceClass *cdc = S390_CCW_DEVICE_CLASS(klass);
490 dc->props = vfio_ccw_properties;
491 dc->vmsd = &vfio_ccw_vmstate;
492 dc->desc = "VFIO-based subchannel assignment";
493 set_bit(DEVICE_CATEGORY_MISC, dc->categories);
494 dc->realize = vfio_ccw_realize;
495 dc->unrealize = vfio_ccw_unrealize;
496 dc->reset = vfio_ccw_reset;
498 cdc->handle_request = vfio_ccw_handle_request;
501 static const TypeInfo vfio_ccw_info = {
502 .name = TYPE_VFIO_CCW,
503 .parent = TYPE_S390_CCW,
504 .instance_size = sizeof(VFIOCCWDevice),
505 .class_init = vfio_ccw_class_init,
508 static void register_vfio_ccw_type(void)
510 type_register_static(&vfio_ccw_info);
513 type_init(register_vfio_ccw_type)