2 * QEMU PowerPC sPAPR XIVE interrupt controller model
4 * Copyright (c) 2017-2019, IBM Corporation.
6 * This code is licensed under the GPL version 2 or later. See the
7 * COPYING file in the top-level directory.
10 #include "qemu/osdep.h"
12 #include "qemu/error-report.h"
13 #include "qapi/error.h"
14 #include "target/ppc/cpu.h"
15 #include "sysemu/cpus.h"
16 #include "sysemu/kvm.h"
17 #include "hw/ppc/spapr.h"
18 #include "hw/ppc/spapr_cpu_core.h"
19 #include "hw/ppc/spapr_xive.h"
20 #include "hw/ppc/xive.h"
23 #include <sys/ioctl.h>
26 * Helpers for CPU hotplug
28 * TODO: make a common KVMEnabledCPU layer for XICS and XIVE
30 typedef struct KVMEnabledCPU
{
31 unsigned long vcpu_id
;
32 QLIST_ENTRY(KVMEnabledCPU
) node
;
35 static QLIST_HEAD(, KVMEnabledCPU
)
36 kvm_enabled_cpus
= QLIST_HEAD_INITIALIZER(&kvm_enabled_cpus
);
38 static bool kvm_cpu_is_enabled(CPUState
*cs
)
40 KVMEnabledCPU
*enabled_cpu
;
41 unsigned long vcpu_id
= kvm_arch_vcpu_id(cs
);
43 QLIST_FOREACH(enabled_cpu
, &kvm_enabled_cpus
, node
) {
44 if (enabled_cpu
->vcpu_id
== vcpu_id
) {
51 static void kvm_cpu_enable(CPUState
*cs
)
53 KVMEnabledCPU
*enabled_cpu
;
54 unsigned long vcpu_id
= kvm_arch_vcpu_id(cs
);
56 enabled_cpu
= g_malloc(sizeof(*enabled_cpu
));
57 enabled_cpu
->vcpu_id
= vcpu_id
;
58 QLIST_INSERT_HEAD(&kvm_enabled_cpus
, enabled_cpu
, node
);
61 static void kvm_cpu_disable_all(void)
63 KVMEnabledCPU
*enabled_cpu
, *next
;
65 QLIST_FOREACH_SAFE(enabled_cpu
, &kvm_enabled_cpus
, node
, next
) {
66 QLIST_REMOVE(enabled_cpu
, node
);
72 * XIVE Thread Interrupt Management context (KVM)
75 static void kvmppc_xive_cpu_set_state(XiveTCTX
*tctx
, Error
**errp
)
80 /* word0 and word1 of the OS ring. */
81 state
[0] = *((uint64_t *) &tctx
->regs
[TM_QW1_OS
]);
83 ret
= kvm_set_one_reg(tctx
->cs
, KVM_REG_PPC_VP_STATE
, state
);
85 error_setg_errno(errp
, errno
,
86 "XIVE: could not restore KVM state of CPU %ld",
87 kvm_arch_vcpu_id(tctx
->cs
));
91 void kvmppc_xive_cpu_get_state(XiveTCTX
*tctx
, Error
**errp
)
93 uint64_t state
[2] = { 0 };
96 ret
= kvm_get_one_reg(tctx
->cs
, KVM_REG_PPC_VP_STATE
, state
);
98 error_setg_errno(errp
, errno
,
99 "XIVE: could not capture KVM state of CPU %ld",
100 kvm_arch_vcpu_id(tctx
->cs
));
104 /* word0 and word1 of the OS ring. */
105 *((uint64_t *) &tctx
->regs
[TM_QW1_OS
]) = state
[0];
113 static void kvmppc_xive_cpu_do_synchronize_state(CPUState
*cpu
,
116 XiveCpuGetState
*s
= arg
.host_ptr
;
118 kvmppc_xive_cpu_get_state(s
->tctx
, &s
->err
);
121 void kvmppc_xive_cpu_synchronize_state(XiveTCTX
*tctx
, Error
**errp
)
123 XiveCpuGetState s
= {
129 * Kick the vCPU to make sure they are available for the KVM ioctl.
131 run_on_cpu(tctx
->cs
, kvmppc_xive_cpu_do_synchronize_state
,
132 RUN_ON_CPU_HOST_PTR(&s
));
135 error_propagate(errp
, s
.err
);
140 void kvmppc_xive_cpu_connect(XiveTCTX
*tctx
, Error
**errp
)
142 SpaprXive
*xive
= SPAPR_MACHINE(qdev_get_machine())->xive
;
143 unsigned long vcpu_id
;
146 /* Check if CPU was hot unplugged and replugged. */
147 if (kvm_cpu_is_enabled(tctx
->cs
)) {
151 vcpu_id
= kvm_arch_vcpu_id(tctx
->cs
);
153 ret
= kvm_vcpu_enable_cap(tctx
->cs
, KVM_CAP_PPC_IRQ_XIVE
, 0, xive
->fd
,
156 error_setg(errp
, "XIVE: unable to connect CPU%ld to KVM device: %s",
157 vcpu_id
, strerror(errno
));
161 kvm_cpu_enable(tctx
->cs
);
165 * XIVE Interrupt Source (KVM)
168 void kvmppc_xive_set_source_config(SpaprXive
*xive
, uint32_t lisn
, XiveEAS
*eas
,
178 Error
*local_err
= NULL
;
180 assert(xive_eas_is_valid(eas
));
182 end_idx
= xive_get_field64(EAS_END_INDEX
, eas
->w
);
183 end_blk
= xive_get_field64(EAS_END_BLOCK
, eas
->w
);
184 eisn
= xive_get_field64(EAS_END_DATA
, eas
->w
);
185 masked
= xive_eas_is_masked(eas
);
187 spapr_xive_end_to_target(end_blk
, end_idx
, &server
, &priority
);
189 kvm_src
= priority
<< KVM_XIVE_SOURCE_PRIORITY_SHIFT
&
190 KVM_XIVE_SOURCE_PRIORITY_MASK
;
191 kvm_src
|= server
<< KVM_XIVE_SOURCE_SERVER_SHIFT
&
192 KVM_XIVE_SOURCE_SERVER_MASK
;
193 kvm_src
|= ((uint64_t) masked
<< KVM_XIVE_SOURCE_MASKED_SHIFT
) &
194 KVM_XIVE_SOURCE_MASKED_MASK
;
195 kvm_src
|= ((uint64_t)eisn
<< KVM_XIVE_SOURCE_EISN_SHIFT
) &
196 KVM_XIVE_SOURCE_EISN_MASK
;
198 kvm_device_access(xive
->fd
, KVM_DEV_XIVE_GRP_SOURCE_CONFIG
, lisn
,
199 &kvm_src
, true, &local_err
);
201 error_propagate(errp
, local_err
);
206 void kvmppc_xive_sync_source(SpaprXive
*xive
, uint32_t lisn
, Error
**errp
)
208 kvm_device_access(xive
->fd
, KVM_DEV_XIVE_GRP_SOURCE_SYNC
, lisn
,
213 * At reset, the interrupt sources are simply created and MASKED. We
214 * only need to inform the KVM XIVE device about their type: LSI or
217 void kvmppc_xive_source_reset_one(XiveSource
*xsrc
, int srcno
, Error
**errp
)
219 SpaprXive
*xive
= SPAPR_XIVE(xsrc
->xive
);
222 if (xive_source_irq_is_lsi(xsrc
, srcno
)) {
223 state
|= KVM_XIVE_LEVEL_SENSITIVE
;
224 if (xsrc
->status
[srcno
] & XIVE_STATUS_ASSERTED
) {
225 state
|= KVM_XIVE_LEVEL_ASSERTED
;
229 kvm_device_access(xive
->fd
, KVM_DEV_XIVE_GRP_SOURCE
, srcno
, &state
,
233 void kvmppc_xive_source_reset(XiveSource
*xsrc
, Error
**errp
)
237 for (i
= 0; i
< xsrc
->nr_irqs
; i
++) {
238 Error
*local_err
= NULL
;
240 kvmppc_xive_source_reset_one(xsrc
, i
, &local_err
);
242 error_propagate(errp
, local_err
);
249 * This is used to perform the magic loads on the ESB pages, described
252 * Memory barriers should not be needed for loads (no store for now).
254 static uint64_t xive_esb_rw(XiveSource
*xsrc
, int srcno
, uint32_t offset
,
255 uint64_t data
, bool write
)
257 uint64_t *addr
= xsrc
->esb_mmap
+ xive_source_esb_mgmt(xsrc
, srcno
) +
261 *addr
= cpu_to_be64(data
);
264 /* Prevent the compiler from optimizing away the load */
265 volatile uint64_t value
= be64_to_cpu(*addr
);
270 static uint8_t xive_esb_read(XiveSource
*xsrc
, int srcno
, uint32_t offset
)
272 return xive_esb_rw(xsrc
, srcno
, offset
, 0, 0) & 0x3;
275 static void xive_esb_trigger(XiveSource
*xsrc
, int srcno
)
277 uint64_t *addr
= xsrc
->esb_mmap
+ xive_source_esb_page(xsrc
, srcno
);
282 uint64_t kvmppc_xive_esb_rw(XiveSource
*xsrc
, int srcno
, uint32_t offset
,
283 uint64_t data
, bool write
)
286 return xive_esb_rw(xsrc
, srcno
, offset
, data
, 1);
290 * Special Load EOI handling for LSI sources. Q bit is never set
291 * and the interrupt should be re-triggered if the level is still
294 if (xive_source_irq_is_lsi(xsrc
, srcno
) &&
295 offset
== XIVE_ESB_LOAD_EOI
) {
296 xive_esb_read(xsrc
, srcno
, XIVE_ESB_SET_PQ_00
);
297 if (xsrc
->status
[srcno
] & XIVE_STATUS_ASSERTED
) {
298 xive_esb_trigger(xsrc
, srcno
);
302 return xive_esb_rw(xsrc
, srcno
, offset
, 0, 0);
306 static void kvmppc_xive_source_get_state(XiveSource
*xsrc
)
310 for (i
= 0; i
< xsrc
->nr_irqs
; i
++) {
311 /* Perform a load without side effect to retrieve the PQ bits */
312 uint8_t pq
= xive_esb_read(xsrc
, i
, XIVE_ESB_GET
);
314 /* and save PQ locally */
315 xive_source_esb_set(xsrc
, i
, pq
);
319 void kvmppc_xive_source_set_irq(void *opaque
, int srcno
, int val
)
321 XiveSource
*xsrc
= opaque
;
322 struct kvm_irq_level args
;
326 if (!xive_source_irq_is_lsi(xsrc
, srcno
)) {
330 args
.level
= KVM_INTERRUPT_SET
;
333 xsrc
->status
[srcno
] |= XIVE_STATUS_ASSERTED
;
334 args
.level
= KVM_INTERRUPT_SET_LEVEL
;
336 xsrc
->status
[srcno
] &= ~XIVE_STATUS_ASSERTED
;
337 args
.level
= KVM_INTERRUPT_UNSET
;
340 rc
= kvm_vm_ioctl(kvm_state
, KVM_IRQ_LINE
, &args
);
342 error_report("XIVE: kvm_irq_line() failed : %s", strerror(errno
));
347 * sPAPR XIVE interrupt controller (KVM)
349 void kvmppc_xive_get_queue_config(SpaprXive
*xive
, uint8_t end_blk
,
350 uint32_t end_idx
, XiveEND
*end
,
353 struct kvm_ppc_xive_eq kvm_eq
= { 0 };
357 Error
*local_err
= NULL
;
359 assert(xive_end_is_valid(end
));
361 /* Encode the tuple (server, prio) as a KVM EQ index */
362 spapr_xive_end_to_target(end_blk
, end_idx
, &server
, &priority
);
364 kvm_eq_idx
= priority
<< KVM_XIVE_EQ_PRIORITY_SHIFT
&
365 KVM_XIVE_EQ_PRIORITY_MASK
;
366 kvm_eq_idx
|= server
<< KVM_XIVE_EQ_SERVER_SHIFT
&
367 KVM_XIVE_EQ_SERVER_MASK
;
369 kvm_device_access(xive
->fd
, KVM_DEV_XIVE_GRP_EQ_CONFIG
, kvm_eq_idx
,
370 &kvm_eq
, false, &local_err
);
372 error_propagate(errp
, local_err
);
377 * The EQ index and toggle bit are updated by HW. These are the
378 * only fields from KVM we want to update QEMU with. The other END
379 * fields should already be in the QEMU END table.
381 end
->w1
= xive_set_field32(END_W1_GENERATION
, 0ul, kvm_eq
.qtoggle
) |
382 xive_set_field32(END_W1_PAGE_OFF
, 0ul, kvm_eq
.qindex
);
385 void kvmppc_xive_set_queue_config(SpaprXive
*xive
, uint8_t end_blk
,
386 uint32_t end_idx
, XiveEND
*end
,
389 struct kvm_ppc_xive_eq kvm_eq
= { 0 };
393 Error
*local_err
= NULL
;
396 * Build the KVM state from the local END structure.
400 if (xive_get_field32(END_W0_UCOND_NOTIFY
, end
->w0
)) {
401 kvm_eq
.flags
|= KVM_XIVE_EQ_ALWAYS_NOTIFY
;
405 * If the hcall is disabling the EQ, set the size and page address
406 * to zero. When migrating, only valid ENDs are taken into
409 if (xive_end_is_valid(end
)) {
410 kvm_eq
.qshift
= xive_get_field32(END_W0_QSIZE
, end
->w0
) + 12;
411 kvm_eq
.qaddr
= xive_end_qaddr(end
);
413 * The EQ toggle bit and index should only be relevant when
414 * restoring the EQ state
416 kvm_eq
.qtoggle
= xive_get_field32(END_W1_GENERATION
, end
->w1
);
417 kvm_eq
.qindex
= xive_get_field32(END_W1_PAGE_OFF
, end
->w1
);
423 /* Encode the tuple (server, prio) as a KVM EQ index */
424 spapr_xive_end_to_target(end_blk
, end_idx
, &server
, &priority
);
426 kvm_eq_idx
= priority
<< KVM_XIVE_EQ_PRIORITY_SHIFT
&
427 KVM_XIVE_EQ_PRIORITY_MASK
;
428 kvm_eq_idx
|= server
<< KVM_XIVE_EQ_SERVER_SHIFT
&
429 KVM_XIVE_EQ_SERVER_MASK
;
431 kvm_device_access(xive
->fd
, KVM_DEV_XIVE_GRP_EQ_CONFIG
, kvm_eq_idx
,
432 &kvm_eq
, true, &local_err
);
434 error_propagate(errp
, local_err
);
439 void kvmppc_xive_reset(SpaprXive
*xive
, Error
**errp
)
441 kvm_device_access(xive
->fd
, KVM_DEV_XIVE_GRP_CTRL
, KVM_DEV_XIVE_RESET
,
445 static void kvmppc_xive_get_queues(SpaprXive
*xive
, Error
**errp
)
447 Error
*local_err
= NULL
;
450 for (i
= 0; i
< xive
->nr_ends
; i
++) {
451 if (!xive_end_is_valid(&xive
->endt
[i
])) {
455 kvmppc_xive_get_queue_config(xive
, SPAPR_XIVE_BLOCK_ID
, i
,
456 &xive
->endt
[i
], &local_err
);
458 error_propagate(errp
, local_err
);
465 * The primary goal of the XIVE VM change handler is to mark the EQ
466 * pages dirty when all XIVE event notifications have stopped.
468 * Whenever the VM is stopped, the VM change handler sets the source
469 * PQs to PENDING to stop the flow of events and to possibly catch a
470 * triggered interrupt occuring while the VM is stopped. The previous
471 * state is saved in anticipation of a migration. The XIVE controller
472 * is then synced through KVM to flush any in-flight event
473 * notification and stabilize the EQs.
475 * At this stage, we can mark the EQ page dirty and let a migration
476 * sequence transfer the EQ pages to the destination, which is done
477 * just after the stop state.
479 * The previous configuration of the sources is restored when the VM
480 * runs again. If an interrupt was queued while the VM was stopped,
481 * simply generate a trigger.
483 static void kvmppc_xive_change_state_handler(void *opaque
, int running
,
486 SpaprXive
*xive
= opaque
;
487 XiveSource
*xsrc
= &xive
->source
;
488 Error
*local_err
= NULL
;
492 * Restore the sources to their initial state. This is called when
493 * the VM resumes after a stop or a migration.
496 for (i
= 0; i
< xsrc
->nr_irqs
; i
++) {
497 uint8_t pq
= xive_source_esb_get(xsrc
, i
);
500 old_pq
= xive_esb_read(xsrc
, i
, XIVE_ESB_SET_PQ_00
+ (pq
<< 8));
503 * An interrupt was queued while the VM was stopped,
504 * generate a trigger.
506 if (pq
== XIVE_ESB_RESET
&& old_pq
== XIVE_ESB_QUEUED
) {
507 xive_esb_trigger(xsrc
, i
);
515 * Mask the sources, to stop the flow of event notifications, and
516 * save the PQs locally in the XiveSource object. The XiveSource
517 * state will be collected later on by its vmstate handler if a
518 * migration is in progress.
520 for (i
= 0; i
< xsrc
->nr_irqs
; i
++) {
521 uint8_t pq
= xive_esb_read(xsrc
, i
, XIVE_ESB_GET
);
524 * PQ is set to PENDING to possibly catch a triggered
525 * interrupt occuring while the VM is stopped (hotplug event
528 if (pq
!= XIVE_ESB_OFF
) {
529 pq
= xive_esb_read(xsrc
, i
, XIVE_ESB_SET_PQ_10
);
531 xive_source_esb_set(xsrc
, i
, pq
);
535 * Sync the XIVE controller in KVM, to flush in-flight event
536 * notification that should be enqueued in the EQs and mark the
537 * XIVE EQ pages dirty to collect all updates.
539 kvm_device_access(xive
->fd
, KVM_DEV_XIVE_GRP_CTRL
,
540 KVM_DEV_XIVE_EQ_SYNC
, NULL
, true, &local_err
);
542 error_report_err(local_err
);
547 void kvmppc_xive_synchronize_state(SpaprXive
*xive
, Error
**errp
)
550 * When the VM is stopped, the sources are masked and the previous
551 * state is saved in anticipation of a migration. We should not
552 * synchronize the source state in that case else we will override
555 if (runstate_is_running()) {
556 kvmppc_xive_source_get_state(&xive
->source
);
559 /* EAT: there is no extra state to query from KVM */
562 kvmppc_xive_get_queues(xive
, errp
);
566 * The SpaprXive 'pre_save' method is called by the vmstate handler of
567 * the SpaprXive model, after the XIVE controller is synced in the VM
570 int kvmppc_xive_pre_save(SpaprXive
*xive
)
572 Error
*local_err
= NULL
;
574 /* EAT: there is no extra state to query from KVM */
577 kvmppc_xive_get_queues(xive
, &local_err
);
579 error_report_err(local_err
);
587 * The SpaprXive 'post_load' method is not called by a vmstate
588 * handler. It is called at the sPAPR machine level at the end of the
589 * migration sequence by the sPAPR IRQ backend 'post_load' method,
590 * when all XIVE states have been transferred and loaded.
592 int kvmppc_xive_post_load(SpaprXive
*xive
, int version_id
)
594 Error
*local_err
= NULL
;
598 /* Restore the ENDT first. The targetting depends on it. */
599 for (i
= 0; i
< xive
->nr_ends
; i
++) {
600 if (!xive_end_is_valid(&xive
->endt
[i
])) {
604 kvmppc_xive_set_queue_config(xive
, SPAPR_XIVE_BLOCK_ID
, i
,
605 &xive
->endt
[i
], &local_err
);
607 error_report_err(local_err
);
612 /* Restore the EAT */
613 for (i
= 0; i
< xive
->nr_irqs
; i
++) {
614 if (!xive_eas_is_valid(&xive
->eat
[i
])) {
618 kvmppc_xive_set_source_config(xive
, i
, &xive
->eat
[i
], &local_err
);
620 error_report_err(local_err
);
625 /* Restore the thread interrupt contexts */
627 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
629 kvmppc_xive_cpu_set_state(spapr_cpu_state(cpu
)->tctx
, &local_err
);
631 error_report_err(local_err
);
636 /* The source states will be restored when the machine starts running */
640 static void *kvmppc_xive_mmap(SpaprXive
*xive
, int pgoff
, size_t len
,
644 uint32_t page_shift
= 16; /* TODO: fix page_shift */
646 addr
= mmap(NULL
, len
, PROT_WRITE
| PROT_READ
, MAP_SHARED
, xive
->fd
,
647 pgoff
<< page_shift
);
648 if (addr
== MAP_FAILED
) {
649 error_setg_errno(errp
, errno
, "XIVE: unable to set memory mapping");
657 * All the XIVE memory regions are now backed by mappings from the KVM
660 void kvmppc_xive_connect(SpaprXive
*xive
, Error
**errp
)
662 XiveSource
*xsrc
= &xive
->source
;
663 XiveENDSource
*end_xsrc
= &xive
->end_source
;
664 Error
*local_err
= NULL
;
665 size_t esb_len
= (1ull << xsrc
->esb_shift
) * xsrc
->nr_irqs
;
666 size_t tima_len
= 4ull << TM_SHIFT
;
668 if (!kvmppc_has_cap_xive()) {
669 error_setg(errp
, "IRQ_XIVE capability must be present for KVM");
673 /* First, create the KVM XIVE device */
674 xive
->fd
= kvm_create_device(kvm_state
, KVM_DEV_TYPE_XIVE
, false);
676 error_setg_errno(errp
, -xive
->fd
, "XIVE: error creating KVM device");
681 * 1. Source ESB pages - KVM mapping
683 xsrc
->esb_mmap
= kvmppc_xive_mmap(xive
, KVM_XIVE_ESB_PAGE_OFFSET
, esb_len
,
686 error_propagate(errp
, local_err
);
690 memory_region_init_ram_device_ptr(&xsrc
->esb_mmio
, OBJECT(xsrc
),
691 "xive.esb", esb_len
, xsrc
->esb_mmap
);
692 sysbus_init_mmio(SYS_BUS_DEVICE(xive
), &xsrc
->esb_mmio
);
695 * 2. END ESB pages (No KVM support yet)
697 sysbus_init_mmio(SYS_BUS_DEVICE(xive
), &end_xsrc
->esb_mmio
);
700 * 3. TIMA pages - KVM mapping
702 xive
->tm_mmap
= kvmppc_xive_mmap(xive
, KVM_XIVE_TIMA_PAGE_OFFSET
, tima_len
,
705 error_propagate(errp
, local_err
);
708 memory_region_init_ram_device_ptr(&xive
->tm_mmio
, OBJECT(xive
),
709 "xive.tima", tima_len
, xive
->tm_mmap
);
710 sysbus_init_mmio(SYS_BUS_DEVICE(xive
), &xive
->tm_mmio
);
712 xive
->change
= qemu_add_vm_change_state_handler(
713 kvmppc_xive_change_state_handler
, xive
);
715 kvm_kernel_irqchip
= true;
716 kvm_msi_via_irqfd_allowed
= true;
717 kvm_gsi_direct_mapping
= true;
719 /* Map all regions */
720 spapr_xive_map_mmio(xive
);
723 void kvmppc_xive_disconnect(SpaprXive
*xive
, Error
**errp
)
728 /* The KVM XIVE device is not in use */
729 if (!xive
|| xive
->fd
== -1) {
733 if (!kvmppc_has_cap_xive()) {
734 error_setg(errp
, "IRQ_XIVE capability must be present for KVM");
738 /* Clear the KVM mapping */
739 xsrc
= &xive
->source
;
740 esb_len
= (1ull << xsrc
->esb_shift
) * xsrc
->nr_irqs
;
742 sysbus_mmio_unmap(SYS_BUS_DEVICE(xive
), 0);
743 munmap(xsrc
->esb_mmap
, esb_len
);
745 sysbus_mmio_unmap(SYS_BUS_DEVICE(xive
), 1);
747 sysbus_mmio_unmap(SYS_BUS_DEVICE(xive
), 2);
748 munmap(xive
->tm_mmap
, 4ull << TM_SHIFT
);
751 * When the KVM device fd is closed, the KVM device is destroyed
752 * and removed from the list of devices of the VM. The VCPU
753 * presenters are also detached from the device.
758 kvm_kernel_irqchip
= false;
759 kvm_msi_via_irqfd_allowed
= false;
760 kvm_gsi_direct_mapping
= false;
762 /* Clear the local list of presenter (hotplug) */
763 kvm_cpu_disable_all();
765 /* VM Change state handler is not needed anymore */
766 qemu_del_vm_change_state_handler(xive
->change
);