2 * vfio based subchannel assignment support
4 * Copyright 2017 IBM Corp.
5 * Copyright 2019 Red Hat, Inc.
7 * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
8 * Xiao Feng Ren <renxiaof@linux.vnet.ibm.com>
9 * Pierre Morel <pmorel@linux.vnet.ibm.com>
10 * Cornelia Huck <cohuck@redhat.com>
12 * This work is licensed under the terms of the GNU GPL, version 2 or (at
13 * your option) any later version. See the COPYING file in the top-level
17 #include "qemu/osdep.h"
18 #include <linux/vfio.h>
19 #include <linux/vfio_ccw.h>
20 #include <sys/ioctl.h>
22 #include "qapi/error.h"
23 #include "hw/sysbus.h"
24 #include "hw/vfio/vfio.h"
25 #include "hw/vfio/vfio-common.h"
26 #include "hw/s390x/s390-ccw.h"
27 #include "hw/s390x/vfio-ccw.h"
28 #include "hw/qdev-properties.h"
29 #include "hw/s390x/ccw-device.h"
30 #include "exec/address-spaces.h"
31 #include "qemu/error-report.h"
32 #include "qemu/main-loop.h"
33 #include "qemu/module.h"
35 struct VFIOCCWDevice
{
38 uint64_t io_region_size
;
39 uint64_t io_region_offset
;
40 struct ccw_io_region
*io_region
;
41 uint64_t async_cmd_region_size
;
42 uint64_t async_cmd_region_offset
;
43 struct ccw_cmd_region
*async_cmd_region
;
44 uint64_t schib_region_size
;
45 uint64_t schib_region_offset
;
46 struct ccw_schib_region
*schib_region
;
47 EventNotifier io_notifier
;
52 static inline void warn_once_pfch(VFIOCCWDevice
*vcdev
, SubchDev
*sch
,
55 warn_report_once_cond(&vcdev
->warned_orb_pfch
,
56 "vfio-ccw (devno %x.%x.%04x): %s",
57 sch
->cssid
, sch
->ssid
, sch
->devno
, msg
);
60 static void vfio_ccw_compute_needs_reset(VFIODevice
*vdev
)
62 vdev
->needs_reset
= false;
66 * We don't need vfio_hot_reset_multi and vfio_eoi operations for
67 * vfio_ccw device now.
69 struct VFIODeviceOps vfio_ccw_ops
= {
70 .vfio_compute_needs_reset
= vfio_ccw_compute_needs_reset
,
73 static IOInstEnding
vfio_ccw_handle_request(SubchDev
*sch
)
75 S390CCWDevice
*cdev
= sch
->driver_data
;
76 VFIOCCWDevice
*vcdev
= DO_UPCAST(VFIOCCWDevice
, cdev
, cdev
);
77 struct ccw_io_region
*region
= vcdev
->io_region
;
80 if (!(sch
->orb
.ctrl0
& ORB_CTRL0_MASK_PFCH
) && vcdev
->force_orb_pfch
) {
81 sch
->orb
.ctrl0
|= ORB_CTRL0_MASK_PFCH
;
82 warn_once_pfch(vcdev
, sch
, "PFCH flag forced");
85 QEMU_BUILD_BUG_ON(sizeof(region
->orb_area
) != sizeof(ORB
));
86 QEMU_BUILD_BUG_ON(sizeof(region
->scsw_area
) != sizeof(SCSW
));
87 QEMU_BUILD_BUG_ON(sizeof(region
->irb_area
) != sizeof(IRB
));
89 memset(region
, 0, sizeof(*region
));
91 memcpy(region
->orb_area
, &sch
->orb
, sizeof(ORB
));
92 memcpy(region
->scsw_area
, &sch
->curr_status
.scsw
, sizeof(SCSW
));
95 ret
= pwrite(vcdev
->vdev
.fd
, region
,
96 vcdev
->io_region_size
, vcdev
->io_region_offset
);
97 if (ret
!= vcdev
->io_region_size
) {
98 if (errno
== EAGAIN
) {
101 error_report("vfio-ccw: write I/O region failed with errno=%d", errno
);
104 ret
= region
->ret_code
;
108 return IOINST_CC_EXPECTED
;
110 return IOINST_CC_BUSY
;
113 return IOINST_CC_NOT_OPERATIONAL
;
116 sch_gen_unit_exception(sch
);
117 css_inject_io_interrupt(sch
);
118 return IOINST_CC_EXPECTED
;
122 static IOInstEnding
vfio_ccw_handle_store(SubchDev
*sch
)
124 S390CCWDevice
*cdev
= sch
->driver_data
;
125 VFIOCCWDevice
*vcdev
= DO_UPCAST(VFIOCCWDevice
, cdev
, cdev
);
126 SCHIB
*schib
= &sch
->curr_status
;
127 struct ccw_schib_region
*region
= vcdev
->schib_region
;
131 /* schib region not available so nothing else to do */
133 return IOINST_CC_EXPECTED
;
136 memset(region
, 0, sizeof(*region
));
137 ret
= pread(vcdev
->vdev
.fd
, region
, vcdev
->schib_region_size
,
138 vcdev
->schib_region_offset
);
142 * Device is probably damaged, but store subchannel does not
143 * have a nonzero cc defined for this scenario. Log an error,
144 * and presume things are otherwise fine.
146 error_report("vfio-ccw: store region read failed with errno=%d", errno
);
147 return IOINST_CC_EXPECTED
;
151 * Selectively copy path-related bits of the SCHIB,
152 * rather than copying the entire struct.
154 s
= (SCHIB
*)region
->schib_area
;
155 schib
->pmcw
.pnom
= s
->pmcw
.pnom
;
156 schib
->pmcw
.lpum
= s
->pmcw
.lpum
;
157 schib
->pmcw
.pam
= s
->pmcw
.pam
;
158 schib
->pmcw
.pom
= s
->pmcw
.pom
;
160 if (s
->scsw
.flags
& SCSW_FLAGS_MASK_PNO
) {
161 schib
->scsw
.flags
|= SCSW_FLAGS_MASK_PNO
;
164 return IOINST_CC_EXPECTED
;
167 static int vfio_ccw_handle_clear(SubchDev
*sch
)
169 S390CCWDevice
*cdev
= sch
->driver_data
;
170 VFIOCCWDevice
*vcdev
= DO_UPCAST(VFIOCCWDevice
, cdev
, cdev
);
171 struct ccw_cmd_region
*region
= vcdev
->async_cmd_region
;
174 if (!vcdev
->async_cmd_region
) {
175 /* Async command region not available, fall back to emulation */
179 memset(region
, 0, sizeof(*region
));
180 region
->command
= VFIO_CCW_ASYNC_CMD_CSCH
;
183 ret
= pwrite(vcdev
->vdev
.fd
, region
,
184 vcdev
->async_cmd_region_size
, vcdev
->async_cmd_region_offset
);
185 if (ret
!= vcdev
->async_cmd_region_size
) {
186 if (errno
== EAGAIN
) {
189 error_report("vfio-ccw: write cmd region failed with errno=%d", errno
);
192 ret
= region
->ret_code
;
201 sch_gen_unit_exception(sch
);
202 css_inject_io_interrupt(sch
);
207 static int vfio_ccw_handle_halt(SubchDev
*sch
)
209 S390CCWDevice
*cdev
= sch
->driver_data
;
210 VFIOCCWDevice
*vcdev
= DO_UPCAST(VFIOCCWDevice
, cdev
, cdev
);
211 struct ccw_cmd_region
*region
= vcdev
->async_cmd_region
;
214 if (!vcdev
->async_cmd_region
) {
215 /* Async command region not available, fall back to emulation */
219 memset(region
, 0, sizeof(*region
));
220 region
->command
= VFIO_CCW_ASYNC_CMD_HSCH
;
223 ret
= pwrite(vcdev
->vdev
.fd
, region
,
224 vcdev
->async_cmd_region_size
, vcdev
->async_cmd_region_offset
);
225 if (ret
!= vcdev
->async_cmd_region_size
) {
226 if (errno
== EAGAIN
) {
229 error_report("vfio-ccw: write cmd region failed with errno=%d", errno
);
232 ret
= region
->ret_code
;
242 sch_gen_unit_exception(sch
);
243 css_inject_io_interrupt(sch
);
248 static void vfio_ccw_reset(DeviceState
*dev
)
250 CcwDevice
*ccw_dev
= DO_UPCAST(CcwDevice
, parent_obj
, dev
);
251 S390CCWDevice
*cdev
= DO_UPCAST(S390CCWDevice
, parent_obj
, ccw_dev
);
252 VFIOCCWDevice
*vcdev
= DO_UPCAST(VFIOCCWDevice
, cdev
, cdev
);
254 ioctl(vcdev
->vdev
.fd
, VFIO_DEVICE_RESET
);
257 static void vfio_ccw_io_notifier_handler(void *opaque
)
259 VFIOCCWDevice
*vcdev
= opaque
;
260 struct ccw_io_region
*region
= vcdev
->io_region
;
261 S390CCWDevice
*cdev
= S390_CCW_DEVICE(vcdev
);
262 CcwDevice
*ccw_dev
= CCW_DEVICE(cdev
);
263 SubchDev
*sch
= ccw_dev
->sch
;
264 SCHIB
*schib
= &sch
->curr_status
;
269 if (!event_notifier_test_and_clear(&vcdev
->io_notifier
)) {
273 size
= pread(vcdev
->vdev
.fd
, region
, vcdev
->io_region_size
,
274 vcdev
->io_region_offset
);
278 /* Generate a deferred cc 3 condition. */
279 schib
->scsw
.flags
|= SCSW_FLAGS_MASK_CC
;
280 schib
->scsw
.ctrl
&= ~SCSW_CTRL_MASK_STCTL
;
281 schib
->scsw
.ctrl
|= (SCSW_STCTL_ALERT
| SCSW_STCTL_STATUS_PEND
);
284 /* Memory problem, generate channel data check. */
285 schib
->scsw
.ctrl
&= ~SCSW_ACTL_START_PEND
;
286 schib
->scsw
.cstat
= SCSW_CSTAT_DATA_CHECK
;
287 schib
->scsw
.ctrl
&= ~SCSW_CTRL_MASK_STCTL
;
288 schib
->scsw
.ctrl
|= SCSW_STCTL_PRIMARY
| SCSW_STCTL_SECONDARY
|
289 SCSW_STCTL_ALERT
| SCSW_STCTL_STATUS_PEND
;
292 /* Error, generate channel program check. */
293 schib
->scsw
.ctrl
&= ~SCSW_ACTL_START_PEND
;
294 schib
->scsw
.cstat
= SCSW_CSTAT_PROG_CHECK
;
295 schib
->scsw
.ctrl
&= ~SCSW_CTRL_MASK_STCTL
;
296 schib
->scsw
.ctrl
|= SCSW_STCTL_PRIMARY
| SCSW_STCTL_SECONDARY
|
297 SCSW_STCTL_ALERT
| SCSW_STCTL_STATUS_PEND
;
300 } else if (size
!= vcdev
->io_region_size
) {
301 /* Information transfer error, generate channel-control check. */
302 schib
->scsw
.ctrl
&= ~SCSW_ACTL_START_PEND
;
303 schib
->scsw
.cstat
= SCSW_CSTAT_CHN_CTRL_CHK
;
304 schib
->scsw
.ctrl
&= ~SCSW_CTRL_MASK_STCTL
;
305 schib
->scsw
.ctrl
|= SCSW_STCTL_PRIMARY
| SCSW_STCTL_SECONDARY
|
306 SCSW_STCTL_ALERT
| SCSW_STCTL_STATUS_PEND
;
310 memcpy(&irb
, region
->irb_area
, sizeof(IRB
));
312 /* Update control block via irb. */
314 copy_scsw_to_guest(&s
, &irb
.scsw
);
317 /* If a uint check is pending, copy sense data. */
318 if ((schib
->scsw
.dstat
& SCSW_DSTAT_UNIT_CHECK
) &&
319 (schib
->pmcw
.chars
& PMCW_CHARS_MASK_CSENSE
)) {
320 memcpy(sch
->sense_data
, irb
.ecw
, sizeof(irb
.ecw
));
324 css_inject_io_interrupt(sch
);
327 static void vfio_ccw_register_io_notifier(VFIOCCWDevice
*vcdev
, Error
**errp
)
329 VFIODevice
*vdev
= &vcdev
->vdev
;
330 struct vfio_irq_info
*irq_info
;
334 if (vdev
->num_irqs
< VFIO_CCW_IO_IRQ_INDEX
+ 1) {
335 error_setg(errp
, "vfio: unexpected number of io irqs %u",
340 argsz
= sizeof(*irq_info
);
341 irq_info
= g_malloc0(argsz
);
342 irq_info
->index
= VFIO_CCW_IO_IRQ_INDEX
;
343 irq_info
->argsz
= argsz
;
344 if (ioctl(vdev
->fd
, VFIO_DEVICE_GET_IRQ_INFO
,
345 irq_info
) < 0 || irq_info
->count
< 1) {
346 error_setg_errno(errp
, errno
, "vfio: Error getting irq info");
350 if (event_notifier_init(&vcdev
->io_notifier
, 0)) {
351 error_setg_errno(errp
, errno
,
352 "vfio: Unable to init event notifier for IO");
356 fd
= event_notifier_get_fd(&vcdev
->io_notifier
);
357 qemu_set_fd_handler(fd
, vfio_ccw_io_notifier_handler
, NULL
, vcdev
);
359 if (vfio_set_irq_signaling(vdev
, VFIO_CCW_IO_IRQ_INDEX
, 0,
360 VFIO_IRQ_SET_ACTION_TRIGGER
, fd
, errp
)) {
361 qemu_set_fd_handler(fd
, NULL
, NULL
, vcdev
);
362 event_notifier_cleanup(&vcdev
->io_notifier
);
369 static void vfio_ccw_unregister_io_notifier(VFIOCCWDevice
*vcdev
)
373 if (vfio_set_irq_signaling(&vcdev
->vdev
, VFIO_CCW_IO_IRQ_INDEX
, 0,
374 VFIO_IRQ_SET_ACTION_TRIGGER
, -1, &err
)) {
375 error_reportf_err(err
, VFIO_MSG_PREFIX
, vcdev
->vdev
.name
);
378 qemu_set_fd_handler(event_notifier_get_fd(&vcdev
->io_notifier
),
380 event_notifier_cleanup(&vcdev
->io_notifier
);
383 static void vfio_ccw_get_region(VFIOCCWDevice
*vcdev
, Error
**errp
)
385 VFIODevice
*vdev
= &vcdev
->vdev
;
386 struct vfio_region_info
*info
;
389 /* Sanity check device */
390 if (!(vdev
->flags
& VFIO_DEVICE_FLAGS_CCW
)) {
391 error_setg(errp
, "vfio: Um, this isn't a vfio-ccw device");
396 * We always expect at least the I/O region to be present. We also
397 * may have a variable number of regions governed by capabilities.
399 if (vdev
->num_regions
< VFIO_CCW_CONFIG_REGION_INDEX
+ 1) {
400 error_setg(errp
, "vfio: too few regions (%u), expected at least %u",
401 vdev
->num_regions
, VFIO_CCW_CONFIG_REGION_INDEX
+ 1);
405 ret
= vfio_get_region_info(vdev
, VFIO_CCW_CONFIG_REGION_INDEX
, &info
);
407 error_setg_errno(errp
, -ret
, "vfio: Error getting config info");
411 vcdev
->io_region_size
= info
->size
;
412 if (sizeof(*vcdev
->io_region
) != vcdev
->io_region_size
) {
413 error_setg(errp
, "vfio: Unexpected size of the I/O region");
417 vcdev
->io_region_offset
= info
->offset
;
418 vcdev
->io_region
= g_malloc0(info
->size
);
420 /* check for the optional async command region */
421 ret
= vfio_get_dev_region_info(vdev
, VFIO_REGION_TYPE_CCW
,
422 VFIO_REGION_SUBTYPE_CCW_ASYNC_CMD
, &info
);
424 vcdev
->async_cmd_region_size
= info
->size
;
425 if (sizeof(*vcdev
->async_cmd_region
) != vcdev
->async_cmd_region_size
) {
426 error_setg(errp
, "vfio: Unexpected size of the async cmd region");
429 vcdev
->async_cmd_region_offset
= info
->offset
;
430 vcdev
->async_cmd_region
= g_malloc0(info
->size
);
433 ret
= vfio_get_dev_region_info(vdev
, VFIO_REGION_TYPE_CCW
,
434 VFIO_REGION_SUBTYPE_CCW_SCHIB
, &info
);
436 vcdev
->schib_region_size
= info
->size
;
437 if (sizeof(*vcdev
->schib_region
) != vcdev
->schib_region_size
) {
438 error_setg(errp
, "vfio: Unexpected size of the schib region");
441 vcdev
->schib_region_offset
= info
->offset
;
442 vcdev
->schib_region
= g_malloc(info
->size
);
449 g_free(vcdev
->schib_region
);
450 g_free(vcdev
->async_cmd_region
);
451 g_free(vcdev
->io_region
);
456 static void vfio_ccw_put_region(VFIOCCWDevice
*vcdev
)
458 g_free(vcdev
->schib_region
);
459 g_free(vcdev
->async_cmd_region
);
460 g_free(vcdev
->io_region
);
463 static void vfio_ccw_put_device(VFIOCCWDevice
*vcdev
)
465 g_free(vcdev
->vdev
.name
);
466 vfio_put_base_device(&vcdev
->vdev
);
469 static void vfio_ccw_get_device(VFIOGroup
*group
, VFIOCCWDevice
*vcdev
,
472 char *name
= g_strdup_printf("%x.%x.%04x", vcdev
->cdev
.hostid
.cssid
,
473 vcdev
->cdev
.hostid
.ssid
,
474 vcdev
->cdev
.hostid
.devid
);
475 VFIODevice
*vbasedev
;
477 QLIST_FOREACH(vbasedev
, &group
->device_list
, next
) {
478 if (strcmp(vbasedev
->name
, name
) == 0) {
479 error_setg(errp
, "vfio: subchannel %s has already been attached",
486 * All vfio-ccw devices are believed to operate in a way compatible with
487 * memory ballooning, ie. pages pinned in the host are in the current
488 * working set of the guest driver and therefore never overlap with pages
489 * available to the guest balloon driver. This needs to be set before
490 * vfio_get_device() for vfio common to handle the balloon inhibitor.
492 vcdev
->vdev
.balloon_allowed
= true;
494 if (vfio_get_device(group
, vcdev
->cdev
.mdevid
, &vcdev
->vdev
, errp
)) {
498 vcdev
->vdev
.ops
= &vfio_ccw_ops
;
499 vcdev
->vdev
.type
= VFIO_DEVICE_TYPE_CCW
;
500 vcdev
->vdev
.name
= name
;
501 vcdev
->vdev
.dev
= &vcdev
->cdev
.parent_obj
.parent_obj
;
509 static VFIOGroup
*vfio_ccw_get_group(S390CCWDevice
*cdev
, Error
**errp
)
511 char *tmp
, group_path
[PATH_MAX
];
515 tmp
= g_strdup_printf("/sys/bus/css/devices/%x.%x.%04x/%s/iommu_group",
516 cdev
->hostid
.cssid
, cdev
->hostid
.ssid
,
517 cdev
->hostid
.devid
, cdev
->mdevid
);
518 len
= readlink(tmp
, group_path
, sizeof(group_path
));
521 if (len
<= 0 || len
>= sizeof(group_path
)) {
522 error_setg(errp
, "vfio: no iommu_group found");
528 if (sscanf(basename(group_path
), "%d", &groupid
) != 1) {
529 error_setg(errp
, "vfio: failed to read %s", group_path
);
533 return vfio_get_group(groupid
, &address_space_memory
, errp
);
536 static void vfio_ccw_realize(DeviceState
*dev
, Error
**errp
)
539 CcwDevice
*ccw_dev
= DO_UPCAST(CcwDevice
, parent_obj
, dev
);
540 S390CCWDevice
*cdev
= DO_UPCAST(S390CCWDevice
, parent_obj
, ccw_dev
);
541 VFIOCCWDevice
*vcdev
= DO_UPCAST(VFIOCCWDevice
, cdev
, cdev
);
542 S390CCWDeviceClass
*cdc
= S390_CCW_DEVICE_GET_CLASS(cdev
);
545 /* Call the class init function for subchannel. */
547 cdc
->realize(cdev
, vcdev
->vdev
.sysfsdev
, &err
);
549 goto out_err_propagate
;
553 group
= vfio_ccw_get_group(cdev
, &err
);
558 vfio_ccw_get_device(group
, vcdev
, &err
);
563 vfio_ccw_get_region(vcdev
, &err
);
568 vfio_ccw_register_io_notifier(vcdev
, &err
);
570 goto out_notifier_err
;
576 vfio_ccw_put_region(vcdev
);
578 vfio_ccw_put_device(vcdev
);
580 vfio_put_group(group
);
582 if (cdc
->unrealize
) {
583 cdc
->unrealize(cdev
);
586 error_propagate(errp
, err
);
589 static void vfio_ccw_unrealize(DeviceState
*dev
)
591 CcwDevice
*ccw_dev
= DO_UPCAST(CcwDevice
, parent_obj
, dev
);
592 S390CCWDevice
*cdev
= DO_UPCAST(S390CCWDevice
, parent_obj
, ccw_dev
);
593 VFIOCCWDevice
*vcdev
= DO_UPCAST(VFIOCCWDevice
, cdev
, cdev
);
594 S390CCWDeviceClass
*cdc
= S390_CCW_DEVICE_GET_CLASS(cdev
);
595 VFIOGroup
*group
= vcdev
->vdev
.group
;
597 vfio_ccw_unregister_io_notifier(vcdev
);
598 vfio_ccw_put_region(vcdev
);
599 vfio_ccw_put_device(vcdev
);
600 vfio_put_group(group
);
602 if (cdc
->unrealize
) {
603 cdc
->unrealize(cdev
);
607 static Property vfio_ccw_properties
[] = {
608 DEFINE_PROP_STRING("sysfsdev", VFIOCCWDevice
, vdev
.sysfsdev
),
609 DEFINE_PROP_BOOL("force-orb-pfch", VFIOCCWDevice
, force_orb_pfch
, false),
610 DEFINE_PROP_END_OF_LIST(),
613 static const VMStateDescription vfio_ccw_vmstate
= {
618 static void vfio_ccw_class_init(ObjectClass
*klass
, void *data
)
620 DeviceClass
*dc
= DEVICE_CLASS(klass
);
621 S390CCWDeviceClass
*cdc
= S390_CCW_DEVICE_CLASS(klass
);
623 device_class_set_props(dc
, vfio_ccw_properties
);
624 dc
->vmsd
= &vfio_ccw_vmstate
;
625 dc
->desc
= "VFIO-based subchannel assignment";
626 set_bit(DEVICE_CATEGORY_MISC
, dc
->categories
);
627 dc
->realize
= vfio_ccw_realize
;
628 dc
->unrealize
= vfio_ccw_unrealize
;
629 dc
->reset
= vfio_ccw_reset
;
631 cdc
->handle_request
= vfio_ccw_handle_request
;
632 cdc
->handle_halt
= vfio_ccw_handle_halt
;
633 cdc
->handle_clear
= vfio_ccw_handle_clear
;
634 cdc
->handle_store
= vfio_ccw_handle_store
;
637 static const TypeInfo vfio_ccw_info
= {
638 .name
= TYPE_VFIO_CCW
,
639 .parent
= TYPE_S390_CCW
,
640 .instance_size
= sizeof(VFIOCCWDevice
),
641 .class_init
= vfio_ccw_class_init
,
644 static void register_vfio_ccw_type(void)
646 type_register_static(&vfio_ccw_info
);
649 type_init(register_vfio_ccw_type
)