2 * PowerPC implementation of KVM hooks
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
17 #include "qemu/osdep.h"
19 #include <sys/ioctl.h>
22 #include <linux/kvm.h>
24 #include "qemu-common.h"
25 #include "qapi/error.h"
26 #include "qemu/error-report.h"
28 #include "cpu-models.h"
29 #include "qemu/timer.h"
30 #include "sysemu/sysemu.h"
31 #include "sysemu/hw_accel.h"
33 #include "sysemu/cpus.h"
34 #include "sysemu/device_tree.h"
35 #include "mmu-hash64.h"
37 #include "hw/sysbus.h"
38 #include "hw/ppc/spapr.h"
39 #include "hw/ppc/spapr_cpu_core.h"
40 #include "hw/ppc/ppc.h"
41 #include "sysemu/watchdog.h"
43 #include "exec/gdbstub.h"
44 #include "exec/memattrs.h"
45 #include "exec/ram_addr.h"
46 #include "sysemu/hostmem.h"
47 #include "qemu/cutils.h"
48 #include "qemu/mmap-alloc.h"
50 #include "sysemu/kvm_int.h"
55 #define DPRINTF(fmt, ...) \
56 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
58 #define DPRINTF(fmt, ...) \
62 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
64 const KVMCapabilityInfo kvm_arch_required_capabilities
[] = {
68 static int cap_interrupt_unset
= false;
69 static int cap_interrupt_level
= false;
70 static int cap_segstate
;
71 static int cap_booke_sregs
;
72 static int cap_ppc_smt
;
73 static int cap_ppc_smt_possible
;
74 static int cap_spapr_tce
;
75 static int cap_spapr_tce_64
;
76 static int cap_spapr_multitce
;
77 static int cap_spapr_vfio
;
79 static int cap_one_reg
;
81 static int cap_ppc_watchdog
;
83 static int cap_htab_fd
;
84 static int cap_fixup_hcalls
;
85 static int cap_htm
; /* Hardware transactional memory support */
86 static int cap_mmu_radix
;
87 static int cap_mmu_hash_v3
;
88 static int cap_resize_hpt
;
89 static int cap_ppc_pvr_compat
;
90 static int cap_ppc_safe_cache
;
91 static int cap_ppc_safe_bounds_check
;
92 static int cap_ppc_safe_indirect_branch
;
93 static int cap_ppc_nested_kvm_hv
;
95 static uint32_t debug_inst_opcode
;
97 /* XXX We have a race condition where we actually have a level triggered
98 * interrupt, but the infrastructure can't expose that yet, so the guest
99 * takes but ignores it, goes to sleep and never gets notified that there's
100 * still an interrupt pending.
102 * As a quick workaround, let's just wake up again 20 ms after we injected
103 * an interrupt. That way we can assure that we're always reinjecting
104 * interrupts in case the guest swallowed them.
106 static QEMUTimer
*idle_timer
;
108 static void kvm_kick_cpu(void *opaque
)
110 PowerPCCPU
*cpu
= opaque
;
112 qemu_cpu_kick(CPU(cpu
));
115 /* Check whether we are running with KVM-PR (instead of KVM-HV). This
116 * should only be used for fallback tests - generally we should use
117 * explicit capabilities for the features we want, rather than
118 * assuming what is/isn't available depending on the KVM variant. */
119 static bool kvmppc_is_pr(KVMState
*ks
)
121 /* Assume KVM-PR if the GET_PVINFO capability is available */
122 return kvm_vm_check_extension(ks
, KVM_CAP_PPC_GET_PVINFO
) != 0;
125 static int kvm_ppc_register_host_cpu_type(MachineState
*ms
);
126 static void kvmppc_get_cpu_characteristics(KVMState
*s
);
128 int kvm_arch_init(MachineState
*ms
, KVMState
*s
)
130 cap_interrupt_unset
= kvm_check_extension(s
, KVM_CAP_PPC_UNSET_IRQ
);
131 cap_interrupt_level
= kvm_check_extension(s
, KVM_CAP_PPC_IRQ_LEVEL
);
132 cap_segstate
= kvm_check_extension(s
, KVM_CAP_PPC_SEGSTATE
);
133 cap_booke_sregs
= kvm_check_extension(s
, KVM_CAP_PPC_BOOKE_SREGS
);
134 cap_ppc_smt_possible
= kvm_vm_check_extension(s
, KVM_CAP_PPC_SMT_POSSIBLE
);
135 cap_spapr_tce
= kvm_check_extension(s
, KVM_CAP_SPAPR_TCE
);
136 cap_spapr_tce_64
= kvm_check_extension(s
, KVM_CAP_SPAPR_TCE_64
);
137 cap_spapr_multitce
= kvm_check_extension(s
, KVM_CAP_SPAPR_MULTITCE
);
138 cap_spapr_vfio
= kvm_vm_check_extension(s
, KVM_CAP_SPAPR_TCE_VFIO
);
139 cap_one_reg
= kvm_check_extension(s
, KVM_CAP_ONE_REG
);
140 cap_hior
= kvm_check_extension(s
, KVM_CAP_PPC_HIOR
);
141 cap_epr
= kvm_check_extension(s
, KVM_CAP_PPC_EPR
);
142 cap_ppc_watchdog
= kvm_check_extension(s
, KVM_CAP_PPC_BOOKE_WATCHDOG
);
143 /* Note: we don't set cap_papr here, because this capability is
144 * only activated after this by kvmppc_set_papr() */
145 cap_htab_fd
= kvm_vm_check_extension(s
, KVM_CAP_PPC_HTAB_FD
);
146 cap_fixup_hcalls
= kvm_check_extension(s
, KVM_CAP_PPC_FIXUP_HCALL
);
147 cap_ppc_smt
= kvm_vm_check_extension(s
, KVM_CAP_PPC_SMT
);
148 cap_htm
= kvm_vm_check_extension(s
, KVM_CAP_PPC_HTM
);
149 cap_mmu_radix
= kvm_vm_check_extension(s
, KVM_CAP_PPC_MMU_RADIX
);
150 cap_mmu_hash_v3
= kvm_vm_check_extension(s
, KVM_CAP_PPC_MMU_HASH_V3
);
151 cap_resize_hpt
= kvm_vm_check_extension(s
, KVM_CAP_SPAPR_RESIZE_HPT
);
152 kvmppc_get_cpu_characteristics(s
);
153 cap_ppc_nested_kvm_hv
= kvm_vm_check_extension(s
, KVM_CAP_PPC_NESTED_HV
);
155 * Note: setting it to false because there is not such capability
156 * in KVM at this moment.
158 * TODO: call kvm_vm_check_extension() with the right capability
159 * after the kernel starts implementing it.*/
160 cap_ppc_pvr_compat
= false;
162 if (!cap_interrupt_level
) {
163 fprintf(stderr
, "KVM: Couldn't find level irq capability. Expect the "
164 "VM to stall at times!\n");
167 kvm_ppc_register_host_cpu_type(ms
);
172 int kvm_arch_irqchip_create(MachineState
*ms
, KVMState
*s
)
177 static int kvm_arch_sync_sregs(PowerPCCPU
*cpu
)
179 CPUPPCState
*cenv
= &cpu
->env
;
180 CPUState
*cs
= CPU(cpu
);
181 struct kvm_sregs sregs
;
184 if (cenv
->excp_model
== POWERPC_EXCP_BOOKE
) {
185 /* What we're really trying to say is "if we're on BookE, we use
186 the native PVR for now". This is the only sane way to check
187 it though, so we potentially confuse users that they can run
188 BookE guests on BookS. Let's hope nobody dares enough :) */
192 fprintf(stderr
, "kvm error: missing PVR setting capability\n");
197 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_SREGS
, &sregs
);
202 sregs
.pvr
= cenv
->spr
[SPR_PVR
];
203 return kvm_vcpu_ioctl(cs
, KVM_SET_SREGS
, &sregs
);
206 /* Set up a shared TLB array with KVM */
207 static int kvm_booke206_tlb_init(PowerPCCPU
*cpu
)
209 CPUPPCState
*env
= &cpu
->env
;
210 CPUState
*cs
= CPU(cpu
);
211 struct kvm_book3e_206_tlb_params params
= {};
212 struct kvm_config_tlb cfg
= {};
213 unsigned int entries
= 0;
216 if (!kvm_enabled() ||
217 !kvm_check_extension(cs
->kvm_state
, KVM_CAP_SW_TLB
)) {
221 assert(ARRAY_SIZE(params
.tlb_sizes
) == BOOKE206_MAX_TLBN
);
223 for (i
= 0; i
< BOOKE206_MAX_TLBN
; i
++) {
224 params
.tlb_sizes
[i
] = booke206_tlb_size(env
, i
);
225 params
.tlb_ways
[i
] = booke206_tlb_ways(env
, i
);
226 entries
+= params
.tlb_sizes
[i
];
229 assert(entries
== env
->nb_tlb
);
230 assert(sizeof(struct kvm_book3e_206_tlb_entry
) == sizeof(ppcmas_tlb_t
));
232 env
->tlb_dirty
= true;
234 cfg
.array
= (uintptr_t)env
->tlb
.tlbm
;
235 cfg
.array_len
= sizeof(ppcmas_tlb_t
) * entries
;
236 cfg
.params
= (uintptr_t)¶ms
;
237 cfg
.mmu_type
= KVM_MMU_FSL_BOOKE_NOHV
;
239 ret
= kvm_vcpu_enable_cap(cs
, KVM_CAP_SW_TLB
, 0, (uintptr_t)&cfg
);
241 fprintf(stderr
, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
242 __func__
, strerror(-ret
));
246 env
->kvm_sw_tlb
= true;
251 #if defined(TARGET_PPC64)
252 static void kvm_get_smmu_info(struct kvm_ppc_smmu_info
*info
, Error
**errp
)
256 assert(kvm_state
!= NULL
);
258 if (!kvm_check_extension(kvm_state
, KVM_CAP_PPC_GET_SMMU_INFO
)) {
259 error_setg(errp
, "KVM doesn't expose the MMU features it supports");
260 error_append_hint(errp
, "Consider switching to a newer KVM\n");
264 ret
= kvm_vm_ioctl(kvm_state
, KVM_PPC_GET_SMMU_INFO
, info
);
269 error_setg_errno(errp
, -ret
,
270 "KVM failed to provide the MMU features it supports");
273 struct ppc_radix_page_info
*kvm_get_radix_page_info(void)
275 KVMState
*s
= KVM_STATE(current_machine
->accelerator
);
276 struct ppc_radix_page_info
*radix_page_info
;
277 struct kvm_ppc_rmmu_info rmmu_info
;
280 if (!kvm_check_extension(s
, KVM_CAP_PPC_MMU_RADIX
)) {
283 if (kvm_vm_ioctl(s
, KVM_PPC_GET_RMMU_INFO
, &rmmu_info
)) {
286 radix_page_info
= g_malloc0(sizeof(*radix_page_info
));
287 radix_page_info
->count
= 0;
288 for (i
= 0; i
< PPC_PAGE_SIZES_MAX_SZ
; i
++) {
289 if (rmmu_info
.ap_encodings
[i
]) {
290 radix_page_info
->entries
[i
] = rmmu_info
.ap_encodings
[i
];
291 radix_page_info
->count
++;
294 return radix_page_info
;
297 target_ulong
kvmppc_configure_v3_mmu(PowerPCCPU
*cpu
,
298 bool radix
, bool gtse
,
301 CPUState
*cs
= CPU(cpu
);
304 struct kvm_ppc_mmuv3_cfg cfg
= {
305 .process_table
= proc_tbl
,
309 flags
|= KVM_PPC_MMUV3_RADIX
;
312 flags
|= KVM_PPC_MMUV3_GTSE
;
315 ret
= kvm_vm_ioctl(cs
->kvm_state
, KVM_PPC_CONFIGURE_V3_MMU
, &cfg
);
322 return H_NOT_AVAILABLE
;
328 bool kvmppc_hpt_needs_host_contiguous_pages(void)
330 static struct kvm_ppc_smmu_info smmu_info
;
332 if (!kvm_enabled()) {
336 kvm_get_smmu_info(&smmu_info
, &error_fatal
);
337 return !!(smmu_info
.flags
& KVM_PPC_PAGE_SIZES_REAL
);
340 void kvm_check_mmu(PowerPCCPU
*cpu
, Error
**errp
)
342 struct kvm_ppc_smmu_info smmu_info
;
344 Error
*local_err
= NULL
;
346 /* For now, we only have anything to check on hash64 MMUs */
347 if (!cpu
->hash64_opts
|| !kvm_enabled()) {
351 kvm_get_smmu_info(&smmu_info
, &local_err
);
353 error_propagate(errp
, local_err
);
357 if (ppc_hash64_has(cpu
, PPC_HASH64_1TSEG
)
358 && !(smmu_info
.flags
& KVM_PPC_1T_SEGMENTS
)) {
360 "KVM does not support 1TiB segments which guest expects");
364 if (smmu_info
.slb_size
< cpu
->hash64_opts
->slb_size
) {
365 error_setg(errp
, "KVM only supports %u SLB entries, but guest needs %u",
366 smmu_info
.slb_size
, cpu
->hash64_opts
->slb_size
);
371 * Verify that every pagesize supported by the cpu model is
372 * supported by KVM with the same encodings
374 for (iq
= 0; iq
< ARRAY_SIZE(cpu
->hash64_opts
->sps
); iq
++) {
375 PPCHash64SegmentPageSizes
*qsps
= &cpu
->hash64_opts
->sps
[iq
];
376 struct kvm_ppc_one_seg_page_size
*ksps
;
378 for (ik
= 0; ik
< ARRAY_SIZE(smmu_info
.sps
); ik
++) {
379 if (qsps
->page_shift
== smmu_info
.sps
[ik
].page_shift
) {
383 if (ik
>= ARRAY_SIZE(smmu_info
.sps
)) {
384 error_setg(errp
, "KVM doesn't support for base page shift %u",
389 ksps
= &smmu_info
.sps
[ik
];
390 if (ksps
->slb_enc
!= qsps
->slb_enc
) {
392 "KVM uses SLB encoding 0x%x for page shift %u, but guest expects 0x%x",
393 ksps
->slb_enc
, ksps
->page_shift
, qsps
->slb_enc
);
397 for (jq
= 0; jq
< ARRAY_SIZE(qsps
->enc
); jq
++) {
398 for (jk
= 0; jk
< ARRAY_SIZE(ksps
->enc
); jk
++) {
399 if (qsps
->enc
[jq
].page_shift
== ksps
->enc
[jk
].page_shift
) {
404 if (jk
>= ARRAY_SIZE(ksps
->enc
)) {
405 error_setg(errp
, "KVM doesn't support page shift %u/%u",
406 qsps
->enc
[jq
].page_shift
, qsps
->page_shift
);
409 if (qsps
->enc
[jq
].pte_enc
!= ksps
->enc
[jk
].pte_enc
) {
411 "KVM uses PTE encoding 0x%x for page shift %u/%u, but guest expects 0x%x",
412 ksps
->enc
[jk
].pte_enc
, qsps
->enc
[jq
].page_shift
,
413 qsps
->page_shift
, qsps
->enc
[jq
].pte_enc
);
419 if (ppc_hash64_has(cpu
, PPC_HASH64_CI_LARGEPAGE
)) {
420 /* Mostly what guest pagesizes we can use are related to the
421 * host pages used to map guest RAM, which is handled in the
422 * platform code. Cache-Inhibited largepages (64k) however are
423 * used for I/O, so if they're mapped to the host at all it
424 * will be a normal mapping, not a special hugepage one used
426 if (getpagesize() < 0x10000) {
428 "KVM can't supply 64kiB CI pages, which guest expects");
432 #endif /* !defined (TARGET_PPC64) */
434 unsigned long kvm_arch_vcpu_id(CPUState
*cpu
)
436 return POWERPC_CPU(cpu
)->vcpu_id
;
439 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
440 * book3s supports only 1 watchpoint, so array size
441 * of 4 is sufficient for now.
443 #define MAX_HW_BKPTS 4
445 static struct HWBreakpoint
{
448 } hw_debug_points
[MAX_HW_BKPTS
];
450 static CPUWatchpoint hw_watchpoint
;
452 /* Default there is no breakpoint and watchpoint supported */
453 static int max_hw_breakpoint
;
454 static int max_hw_watchpoint
;
455 static int nb_hw_breakpoint
;
456 static int nb_hw_watchpoint
;
458 static void kvmppc_hw_debug_points_init(CPUPPCState
*cenv
)
460 if (cenv
->excp_model
== POWERPC_EXCP_BOOKE
) {
461 max_hw_breakpoint
= 2;
462 max_hw_watchpoint
= 2;
465 if ((max_hw_breakpoint
+ max_hw_watchpoint
) > MAX_HW_BKPTS
) {
466 fprintf(stderr
, "Error initializing h/w breakpoints\n");
471 int kvm_arch_init_vcpu(CPUState
*cs
)
473 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
474 CPUPPCState
*cenv
= &cpu
->env
;
477 /* Synchronize sregs with kvm */
478 ret
= kvm_arch_sync_sregs(cpu
);
480 if (ret
== -EINVAL
) {
481 error_report("Register sync failed... If you're using kvm-hv.ko,"
482 " only \"-cpu host\" is possible");
487 idle_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL
, kvm_kick_cpu
, cpu
);
489 switch (cenv
->mmu_model
) {
490 case POWERPC_MMU_BOOKE206
:
491 /* This target supports access to KVM's guest TLB */
492 ret
= kvm_booke206_tlb_init(cpu
);
494 case POWERPC_MMU_2_07
:
495 if (!cap_htm
&& !kvmppc_is_pr(cs
->kvm_state
)) {
496 /* KVM-HV has transactional memory on POWER8 also without the
497 * KVM_CAP_PPC_HTM extension, so enable it here instead as
498 * long as it's availble to userspace on the host. */
499 if (qemu_getauxval(AT_HWCAP2
) & PPC_FEATURE2_HAS_HTM
) {
508 kvm_get_one_reg(cs
, KVM_REG_PPC_DEBUG_INST
, &debug_inst_opcode
);
509 kvmppc_hw_debug_points_init(cenv
);
514 static void kvm_sw_tlb_put(PowerPCCPU
*cpu
)
516 CPUPPCState
*env
= &cpu
->env
;
517 CPUState
*cs
= CPU(cpu
);
518 struct kvm_dirty_tlb dirty_tlb
;
519 unsigned char *bitmap
;
522 if (!env
->kvm_sw_tlb
) {
526 bitmap
= g_malloc((env
->nb_tlb
+ 7) / 8);
527 memset(bitmap
, 0xFF, (env
->nb_tlb
+ 7) / 8);
529 dirty_tlb
.bitmap
= (uintptr_t)bitmap
;
530 dirty_tlb
.num_dirty
= env
->nb_tlb
;
532 ret
= kvm_vcpu_ioctl(cs
, KVM_DIRTY_TLB
, &dirty_tlb
);
534 fprintf(stderr
, "%s: KVM_DIRTY_TLB: %s\n",
535 __func__
, strerror(-ret
));
541 static void kvm_get_one_spr(CPUState
*cs
, uint64_t id
, int spr
)
543 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
544 CPUPPCState
*env
= &cpu
->env
;
549 struct kvm_one_reg reg
= {
551 .addr
= (uintptr_t) &val
,
555 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
557 trace_kvm_failed_spr_get(spr
, strerror(errno
));
559 switch (id
& KVM_REG_SIZE_MASK
) {
560 case KVM_REG_SIZE_U32
:
561 env
->spr
[spr
] = val
.u32
;
564 case KVM_REG_SIZE_U64
:
565 env
->spr
[spr
] = val
.u64
;
569 /* Don't handle this size yet */
575 static void kvm_put_one_spr(CPUState
*cs
, uint64_t id
, int spr
)
577 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
578 CPUPPCState
*env
= &cpu
->env
;
583 struct kvm_one_reg reg
= {
585 .addr
= (uintptr_t) &val
,
589 switch (id
& KVM_REG_SIZE_MASK
) {
590 case KVM_REG_SIZE_U32
:
591 val
.u32
= env
->spr
[spr
];
594 case KVM_REG_SIZE_U64
:
595 val
.u64
= env
->spr
[spr
];
599 /* Don't handle this size yet */
603 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
605 trace_kvm_failed_spr_set(spr
, strerror(errno
));
609 static int kvm_put_fp(CPUState
*cs
)
611 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
612 CPUPPCState
*env
= &cpu
->env
;
613 struct kvm_one_reg reg
;
617 if (env
->insns_flags
& PPC_FLOAT
) {
618 uint64_t fpscr
= env
->fpscr
;
619 bool vsx
= !!(env
->insns_flags2
& PPC2_VSX
);
621 reg
.id
= KVM_REG_PPC_FPSCR
;
622 reg
.addr
= (uintptr_t)&fpscr
;
623 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
625 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno
));
629 for (i
= 0; i
< 32; i
++) {
631 uint64_t *fpr
= cpu_fpr_ptr(&cpu
->env
, i
);
632 uint64_t *vsrl
= cpu_vsrl_ptr(&cpu
->env
, i
);
634 #ifdef HOST_WORDS_BIGENDIAN
635 vsr
[0] = float64_val(*fpr
);
639 vsr
[1] = float64_val(*fpr
);
641 reg
.addr
= (uintptr_t) &vsr
;
642 reg
.id
= vsx
? KVM_REG_PPC_VSR(i
) : KVM_REG_PPC_FPR(i
);
644 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
646 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx
? "VSR" : "FPR",
653 if (env
->insns_flags
& PPC_ALTIVEC
) {
654 reg
.id
= KVM_REG_PPC_VSCR
;
655 reg
.addr
= (uintptr_t)&env
->vscr
;
656 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
658 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno
));
662 for (i
= 0; i
< 32; i
++) {
663 reg
.id
= KVM_REG_PPC_VR(i
);
664 reg
.addr
= (uintptr_t)cpu_avr_ptr(env
, i
);
665 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
667 DPRINTF("Unable to set VR%d to KVM: %s\n", i
, strerror(errno
));
676 static int kvm_get_fp(CPUState
*cs
)
678 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
679 CPUPPCState
*env
= &cpu
->env
;
680 struct kvm_one_reg reg
;
684 if (env
->insns_flags
& PPC_FLOAT
) {
686 bool vsx
= !!(env
->insns_flags2
& PPC2_VSX
);
688 reg
.id
= KVM_REG_PPC_FPSCR
;
689 reg
.addr
= (uintptr_t)&fpscr
;
690 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
692 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno
));
698 for (i
= 0; i
< 32; i
++) {
700 uint64_t *fpr
= cpu_fpr_ptr(&cpu
->env
, i
);
701 uint64_t *vsrl
= cpu_vsrl_ptr(&cpu
->env
, i
);
703 reg
.addr
= (uintptr_t) &vsr
;
704 reg
.id
= vsx
? KVM_REG_PPC_VSR(i
) : KVM_REG_PPC_FPR(i
);
706 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
708 DPRINTF("Unable to get %s%d from KVM: %s\n",
709 vsx
? "VSR" : "FPR", i
, strerror(errno
));
712 #ifdef HOST_WORDS_BIGENDIAN
727 if (env
->insns_flags
& PPC_ALTIVEC
) {
728 reg
.id
= KVM_REG_PPC_VSCR
;
729 reg
.addr
= (uintptr_t)&env
->vscr
;
730 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
732 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno
));
736 for (i
= 0; i
< 32; i
++) {
737 reg
.id
= KVM_REG_PPC_VR(i
);
738 reg
.addr
= (uintptr_t)cpu_avr_ptr(env
, i
);
739 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
741 DPRINTF("Unable to get VR%d from KVM: %s\n",
751 #if defined(TARGET_PPC64)
752 static int kvm_get_vpa(CPUState
*cs
)
754 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
755 sPAPRCPUState
*spapr_cpu
= spapr_cpu_state(cpu
);
756 struct kvm_one_reg reg
;
759 reg
.id
= KVM_REG_PPC_VPA_ADDR
;
760 reg
.addr
= (uintptr_t)&spapr_cpu
->vpa_addr
;
761 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
763 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno
));
767 assert((uintptr_t)&spapr_cpu
->slb_shadow_size
768 == ((uintptr_t)&spapr_cpu
->slb_shadow_addr
+ 8));
769 reg
.id
= KVM_REG_PPC_VPA_SLB
;
770 reg
.addr
= (uintptr_t)&spapr_cpu
->slb_shadow_addr
;
771 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
773 DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
778 assert((uintptr_t)&spapr_cpu
->dtl_size
779 == ((uintptr_t)&spapr_cpu
->dtl_addr
+ 8));
780 reg
.id
= KVM_REG_PPC_VPA_DTL
;
781 reg
.addr
= (uintptr_t)&spapr_cpu
->dtl_addr
;
782 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
784 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
792 static int kvm_put_vpa(CPUState
*cs
)
794 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
795 sPAPRCPUState
*spapr_cpu
= spapr_cpu_state(cpu
);
796 struct kvm_one_reg reg
;
799 /* SLB shadow or DTL can't be registered unless a master VPA is
800 * registered. That means when restoring state, if a VPA *is*
801 * registered, we need to set that up first. If not, we need to
802 * deregister the others before deregistering the master VPA */
803 assert(spapr_cpu
->vpa_addr
804 || !(spapr_cpu
->slb_shadow_addr
|| spapr_cpu
->dtl_addr
));
806 if (spapr_cpu
->vpa_addr
) {
807 reg
.id
= KVM_REG_PPC_VPA_ADDR
;
808 reg
.addr
= (uintptr_t)&spapr_cpu
->vpa_addr
;
809 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
811 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno
));
816 assert((uintptr_t)&spapr_cpu
->slb_shadow_size
817 == ((uintptr_t)&spapr_cpu
->slb_shadow_addr
+ 8));
818 reg
.id
= KVM_REG_PPC_VPA_SLB
;
819 reg
.addr
= (uintptr_t)&spapr_cpu
->slb_shadow_addr
;
820 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
822 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno
));
826 assert((uintptr_t)&spapr_cpu
->dtl_size
827 == ((uintptr_t)&spapr_cpu
->dtl_addr
+ 8));
828 reg
.id
= KVM_REG_PPC_VPA_DTL
;
829 reg
.addr
= (uintptr_t)&spapr_cpu
->dtl_addr
;
830 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
832 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
837 if (!spapr_cpu
->vpa_addr
) {
838 reg
.id
= KVM_REG_PPC_VPA_ADDR
;
839 reg
.addr
= (uintptr_t)&spapr_cpu
->vpa_addr
;
840 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
842 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno
));
849 #endif /* TARGET_PPC64 */
851 int kvmppc_put_books_sregs(PowerPCCPU
*cpu
)
853 CPUPPCState
*env
= &cpu
->env
;
854 struct kvm_sregs sregs
;
857 sregs
.pvr
= env
->spr
[SPR_PVR
];
860 PPCVirtualHypervisorClass
*vhc
=
861 PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu
->vhyp
);
862 sregs
.u
.s
.sdr1
= vhc
->encode_hpt_for_kvm_pr(cpu
->vhyp
);
864 sregs
.u
.s
.sdr1
= env
->spr
[SPR_SDR1
];
869 for (i
= 0; i
< ARRAY_SIZE(env
->slb
); i
++) {
870 sregs
.u
.s
.ppc64
.slb
[i
].slbe
= env
->slb
[i
].esid
;
871 if (env
->slb
[i
].esid
& SLB_ESID_V
) {
872 sregs
.u
.s
.ppc64
.slb
[i
].slbe
|= i
;
874 sregs
.u
.s
.ppc64
.slb
[i
].slbv
= env
->slb
[i
].vsid
;
879 for (i
= 0; i
< 16; i
++) {
880 sregs
.u
.s
.ppc32
.sr
[i
] = env
->sr
[i
];
884 for (i
= 0; i
< 8; i
++) {
885 /* Beware. We have to swap upper and lower bits here */
886 sregs
.u
.s
.ppc32
.dbat
[i
] = ((uint64_t)env
->DBAT
[0][i
] << 32)
888 sregs
.u
.s
.ppc32
.ibat
[i
] = ((uint64_t)env
->IBAT
[0][i
] << 32)
892 return kvm_vcpu_ioctl(CPU(cpu
), KVM_SET_SREGS
, &sregs
);
895 int kvm_arch_put_registers(CPUState
*cs
, int level
)
897 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
898 CPUPPCState
*env
= &cpu
->env
;
899 struct kvm_regs regs
;
903 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_REGS
, ®s
);
910 regs
.xer
= cpu_read_xer(env
);
914 regs
.srr0
= env
->spr
[SPR_SRR0
];
915 regs
.srr1
= env
->spr
[SPR_SRR1
];
917 regs
.sprg0
= env
->spr
[SPR_SPRG0
];
918 regs
.sprg1
= env
->spr
[SPR_SPRG1
];
919 regs
.sprg2
= env
->spr
[SPR_SPRG2
];
920 regs
.sprg3
= env
->spr
[SPR_SPRG3
];
921 regs
.sprg4
= env
->spr
[SPR_SPRG4
];
922 regs
.sprg5
= env
->spr
[SPR_SPRG5
];
923 regs
.sprg6
= env
->spr
[SPR_SPRG6
];
924 regs
.sprg7
= env
->spr
[SPR_SPRG7
];
926 regs
.pid
= env
->spr
[SPR_BOOKE_PID
];
928 for (i
= 0;i
< 32; i
++)
929 regs
.gpr
[i
] = env
->gpr
[i
];
932 for (i
= 0; i
< 8; i
++) {
933 regs
.cr
|= (env
->crf
[i
] & 15) << (4 * (7 - i
));
936 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_REGS
, ®s
);
942 if (env
->tlb_dirty
) {
944 env
->tlb_dirty
= false;
947 if (cap_segstate
&& (level
>= KVM_PUT_RESET_STATE
)) {
948 ret
= kvmppc_put_books_sregs(cpu
);
954 if (cap_hior
&& (level
>= KVM_PUT_RESET_STATE
)) {
955 kvm_put_one_spr(cs
, KVM_REG_PPC_HIOR
, SPR_HIOR
);
961 /* We deliberately ignore errors here, for kernels which have
962 * the ONE_REG calls, but don't support the specific
963 * registers, there's a reasonable chance things will still
964 * work, at least until we try to migrate. */
965 for (i
= 0; i
< 1024; i
++) {
966 uint64_t id
= env
->spr_cb
[i
].one_reg_id
;
969 kvm_put_one_spr(cs
, id
, i
);
975 for (i
= 0; i
< ARRAY_SIZE(env
->tm_gpr
); i
++) {
976 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_GPR(i
), &env
->tm_gpr
[i
]);
978 for (i
= 0; i
< ARRAY_SIZE(env
->tm_vsr
); i
++) {
979 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_VSR(i
), &env
->tm_vsr
[i
]);
981 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_CR
, &env
->tm_cr
);
982 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_LR
, &env
->tm_lr
);
983 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_CTR
, &env
->tm_ctr
);
984 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_FPSCR
, &env
->tm_fpscr
);
985 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_AMR
, &env
->tm_amr
);
986 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_PPR
, &env
->tm_ppr
);
987 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_VRSAVE
, &env
->tm_vrsave
);
988 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_VSCR
, &env
->tm_vscr
);
989 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_DSCR
, &env
->tm_dscr
);
990 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_TAR
, &env
->tm_tar
);
994 if (kvm_put_vpa(cs
) < 0) {
995 DPRINTF("Warning: Unable to set VPA information to KVM\n");
999 kvm_set_one_reg(cs
, KVM_REG_PPC_TB_OFFSET
, &env
->tb_env
->tb_offset
);
1000 #endif /* TARGET_PPC64 */
1006 static void kvm_sync_excp(CPUPPCState
*env
, int vector
, int ivor
)
1008 env
->excp_vectors
[vector
] = env
->spr
[ivor
] + env
->spr
[SPR_BOOKE_IVPR
];
1011 static int kvmppc_get_booke_sregs(PowerPCCPU
*cpu
)
1013 CPUPPCState
*env
= &cpu
->env
;
1014 struct kvm_sregs sregs
;
1017 ret
= kvm_vcpu_ioctl(CPU(cpu
), KVM_GET_SREGS
, &sregs
);
1022 if (sregs
.u
.e
.features
& KVM_SREGS_E_BASE
) {
1023 env
->spr
[SPR_BOOKE_CSRR0
] = sregs
.u
.e
.csrr0
;
1024 env
->spr
[SPR_BOOKE_CSRR1
] = sregs
.u
.e
.csrr1
;
1025 env
->spr
[SPR_BOOKE_ESR
] = sregs
.u
.e
.esr
;
1026 env
->spr
[SPR_BOOKE_DEAR
] = sregs
.u
.e
.dear
;
1027 env
->spr
[SPR_BOOKE_MCSR
] = sregs
.u
.e
.mcsr
;
1028 env
->spr
[SPR_BOOKE_TSR
] = sregs
.u
.e
.tsr
;
1029 env
->spr
[SPR_BOOKE_TCR
] = sregs
.u
.e
.tcr
;
1030 env
->spr
[SPR_DECR
] = sregs
.u
.e
.dec
;
1031 env
->spr
[SPR_TBL
] = sregs
.u
.e
.tb
& 0xffffffff;
1032 env
->spr
[SPR_TBU
] = sregs
.u
.e
.tb
>> 32;
1033 env
->spr
[SPR_VRSAVE
] = sregs
.u
.e
.vrsave
;
1036 if (sregs
.u
.e
.features
& KVM_SREGS_E_ARCH206
) {
1037 env
->spr
[SPR_BOOKE_PIR
] = sregs
.u
.e
.pir
;
1038 env
->spr
[SPR_BOOKE_MCSRR0
] = sregs
.u
.e
.mcsrr0
;
1039 env
->spr
[SPR_BOOKE_MCSRR1
] = sregs
.u
.e
.mcsrr1
;
1040 env
->spr
[SPR_BOOKE_DECAR
] = sregs
.u
.e
.decar
;
1041 env
->spr
[SPR_BOOKE_IVPR
] = sregs
.u
.e
.ivpr
;
1044 if (sregs
.u
.e
.features
& KVM_SREGS_E_64
) {
1045 env
->spr
[SPR_BOOKE_EPCR
] = sregs
.u
.e
.epcr
;
1048 if (sregs
.u
.e
.features
& KVM_SREGS_E_SPRG8
) {
1049 env
->spr
[SPR_BOOKE_SPRG8
] = sregs
.u
.e
.sprg8
;
1052 if (sregs
.u
.e
.features
& KVM_SREGS_E_IVOR
) {
1053 env
->spr
[SPR_BOOKE_IVOR0
] = sregs
.u
.e
.ivor_low
[0];
1054 kvm_sync_excp(env
, POWERPC_EXCP_CRITICAL
, SPR_BOOKE_IVOR0
);
1055 env
->spr
[SPR_BOOKE_IVOR1
] = sregs
.u
.e
.ivor_low
[1];
1056 kvm_sync_excp(env
, POWERPC_EXCP_MCHECK
, SPR_BOOKE_IVOR1
);
1057 env
->spr
[SPR_BOOKE_IVOR2
] = sregs
.u
.e
.ivor_low
[2];
1058 kvm_sync_excp(env
, POWERPC_EXCP_DSI
, SPR_BOOKE_IVOR2
);
1059 env
->spr
[SPR_BOOKE_IVOR3
] = sregs
.u
.e
.ivor_low
[3];
1060 kvm_sync_excp(env
, POWERPC_EXCP_ISI
, SPR_BOOKE_IVOR3
);
1061 env
->spr
[SPR_BOOKE_IVOR4
] = sregs
.u
.e
.ivor_low
[4];
1062 kvm_sync_excp(env
, POWERPC_EXCP_EXTERNAL
, SPR_BOOKE_IVOR4
);
1063 env
->spr
[SPR_BOOKE_IVOR5
] = sregs
.u
.e
.ivor_low
[5];
1064 kvm_sync_excp(env
, POWERPC_EXCP_ALIGN
, SPR_BOOKE_IVOR5
);
1065 env
->spr
[SPR_BOOKE_IVOR6
] = sregs
.u
.e
.ivor_low
[6];
1066 kvm_sync_excp(env
, POWERPC_EXCP_PROGRAM
, SPR_BOOKE_IVOR6
);
1067 env
->spr
[SPR_BOOKE_IVOR7
] = sregs
.u
.e
.ivor_low
[7];
1068 kvm_sync_excp(env
, POWERPC_EXCP_FPU
, SPR_BOOKE_IVOR7
);
1069 env
->spr
[SPR_BOOKE_IVOR8
] = sregs
.u
.e
.ivor_low
[8];
1070 kvm_sync_excp(env
, POWERPC_EXCP_SYSCALL
, SPR_BOOKE_IVOR8
);
1071 env
->spr
[SPR_BOOKE_IVOR9
] = sregs
.u
.e
.ivor_low
[9];
1072 kvm_sync_excp(env
, POWERPC_EXCP_APU
, SPR_BOOKE_IVOR9
);
1073 env
->spr
[SPR_BOOKE_IVOR10
] = sregs
.u
.e
.ivor_low
[10];
1074 kvm_sync_excp(env
, POWERPC_EXCP_DECR
, SPR_BOOKE_IVOR10
);
1075 env
->spr
[SPR_BOOKE_IVOR11
] = sregs
.u
.e
.ivor_low
[11];
1076 kvm_sync_excp(env
, POWERPC_EXCP_FIT
, SPR_BOOKE_IVOR11
);
1077 env
->spr
[SPR_BOOKE_IVOR12
] = sregs
.u
.e
.ivor_low
[12];
1078 kvm_sync_excp(env
, POWERPC_EXCP_WDT
, SPR_BOOKE_IVOR12
);
1079 env
->spr
[SPR_BOOKE_IVOR13
] = sregs
.u
.e
.ivor_low
[13];
1080 kvm_sync_excp(env
, POWERPC_EXCP_DTLB
, SPR_BOOKE_IVOR13
);
1081 env
->spr
[SPR_BOOKE_IVOR14
] = sregs
.u
.e
.ivor_low
[14];
1082 kvm_sync_excp(env
, POWERPC_EXCP_ITLB
, SPR_BOOKE_IVOR14
);
1083 env
->spr
[SPR_BOOKE_IVOR15
] = sregs
.u
.e
.ivor_low
[15];
1084 kvm_sync_excp(env
, POWERPC_EXCP_DEBUG
, SPR_BOOKE_IVOR15
);
1086 if (sregs
.u
.e
.features
& KVM_SREGS_E_SPE
) {
1087 env
->spr
[SPR_BOOKE_IVOR32
] = sregs
.u
.e
.ivor_high
[0];
1088 kvm_sync_excp(env
, POWERPC_EXCP_SPEU
, SPR_BOOKE_IVOR32
);
1089 env
->spr
[SPR_BOOKE_IVOR33
] = sregs
.u
.e
.ivor_high
[1];
1090 kvm_sync_excp(env
, POWERPC_EXCP_EFPDI
, SPR_BOOKE_IVOR33
);
1091 env
->spr
[SPR_BOOKE_IVOR34
] = sregs
.u
.e
.ivor_high
[2];
1092 kvm_sync_excp(env
, POWERPC_EXCP_EFPRI
, SPR_BOOKE_IVOR34
);
1095 if (sregs
.u
.e
.features
& KVM_SREGS_E_PM
) {
1096 env
->spr
[SPR_BOOKE_IVOR35
] = sregs
.u
.e
.ivor_high
[3];
1097 kvm_sync_excp(env
, POWERPC_EXCP_EPERFM
, SPR_BOOKE_IVOR35
);
1100 if (sregs
.u
.e
.features
& KVM_SREGS_E_PC
) {
1101 env
->spr
[SPR_BOOKE_IVOR36
] = sregs
.u
.e
.ivor_high
[4];
1102 kvm_sync_excp(env
, POWERPC_EXCP_DOORI
, SPR_BOOKE_IVOR36
);
1103 env
->spr
[SPR_BOOKE_IVOR37
] = sregs
.u
.e
.ivor_high
[5];
1104 kvm_sync_excp(env
, POWERPC_EXCP_DOORCI
, SPR_BOOKE_IVOR37
);
1108 if (sregs
.u
.e
.features
& KVM_SREGS_E_ARCH206_MMU
) {
1109 env
->spr
[SPR_BOOKE_MAS0
] = sregs
.u
.e
.mas0
;
1110 env
->spr
[SPR_BOOKE_MAS1
] = sregs
.u
.e
.mas1
;
1111 env
->spr
[SPR_BOOKE_MAS2
] = sregs
.u
.e
.mas2
;
1112 env
->spr
[SPR_BOOKE_MAS3
] = sregs
.u
.e
.mas7_3
& 0xffffffff;
1113 env
->spr
[SPR_BOOKE_MAS4
] = sregs
.u
.e
.mas4
;
1114 env
->spr
[SPR_BOOKE_MAS6
] = sregs
.u
.e
.mas6
;
1115 env
->spr
[SPR_BOOKE_MAS7
] = sregs
.u
.e
.mas7_3
>> 32;
1116 env
->spr
[SPR_MMUCFG
] = sregs
.u
.e
.mmucfg
;
1117 env
->spr
[SPR_BOOKE_TLB0CFG
] = sregs
.u
.e
.tlbcfg
[0];
1118 env
->spr
[SPR_BOOKE_TLB1CFG
] = sregs
.u
.e
.tlbcfg
[1];
1121 if (sregs
.u
.e
.features
& KVM_SREGS_EXP
) {
1122 env
->spr
[SPR_BOOKE_EPR
] = sregs
.u
.e
.epr
;
1125 if (sregs
.u
.e
.features
& KVM_SREGS_E_PD
) {
1126 env
->spr
[SPR_BOOKE_EPLC
] = sregs
.u
.e
.eplc
;
1127 env
->spr
[SPR_BOOKE_EPSC
] = sregs
.u
.e
.epsc
;
1130 if (sregs
.u
.e
.impl_id
== KVM_SREGS_E_IMPL_FSL
) {
1131 env
->spr
[SPR_E500_SVR
] = sregs
.u
.e
.impl
.fsl
.svr
;
1132 env
->spr
[SPR_Exxx_MCAR
] = sregs
.u
.e
.impl
.fsl
.mcar
;
1133 env
->spr
[SPR_HID0
] = sregs
.u
.e
.impl
.fsl
.hid0
;
1135 if (sregs
.u
.e
.impl
.fsl
.features
& KVM_SREGS_E_FSL_PIDn
) {
1136 env
->spr
[SPR_BOOKE_PID1
] = sregs
.u
.e
.impl
.fsl
.pid1
;
1137 env
->spr
[SPR_BOOKE_PID2
] = sregs
.u
.e
.impl
.fsl
.pid2
;
1144 static int kvmppc_get_books_sregs(PowerPCCPU
*cpu
)
1146 CPUPPCState
*env
= &cpu
->env
;
1147 struct kvm_sregs sregs
;
1151 ret
= kvm_vcpu_ioctl(CPU(cpu
), KVM_GET_SREGS
, &sregs
);
1157 ppc_store_sdr1(env
, sregs
.u
.s
.sdr1
);
1163 * The packed SLB array we get from KVM_GET_SREGS only contains
1164 * information about valid entries. So we flush our internal copy
1165 * to get rid of stale ones, then put all valid SLB entries back
1168 memset(env
->slb
, 0, sizeof(env
->slb
));
1169 for (i
= 0; i
< ARRAY_SIZE(env
->slb
); i
++) {
1170 target_ulong rb
= sregs
.u
.s
.ppc64
.slb
[i
].slbe
;
1171 target_ulong rs
= sregs
.u
.s
.ppc64
.slb
[i
].slbv
;
1173 * Only restore valid entries
1175 if (rb
& SLB_ESID_V
) {
1176 ppc_store_slb(cpu
, rb
& 0xfff, rb
& ~0xfffULL
, rs
);
1182 for (i
= 0; i
< 16; i
++) {
1183 env
->sr
[i
] = sregs
.u
.s
.ppc32
.sr
[i
];
1187 for (i
= 0; i
< 8; i
++) {
1188 env
->DBAT
[0][i
] = sregs
.u
.s
.ppc32
.dbat
[i
] & 0xffffffff;
1189 env
->DBAT
[1][i
] = sregs
.u
.s
.ppc32
.dbat
[i
] >> 32;
1190 env
->IBAT
[0][i
] = sregs
.u
.s
.ppc32
.ibat
[i
] & 0xffffffff;
1191 env
->IBAT
[1][i
] = sregs
.u
.s
.ppc32
.ibat
[i
] >> 32;
1197 int kvm_arch_get_registers(CPUState
*cs
)
1199 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
1200 CPUPPCState
*env
= &cpu
->env
;
1201 struct kvm_regs regs
;
1205 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_REGS
, ®s
);
1210 for (i
= 7; i
>= 0; i
--) {
1211 env
->crf
[i
] = cr
& 15;
1215 env
->ctr
= regs
.ctr
;
1217 cpu_write_xer(env
, regs
.xer
);
1218 env
->msr
= regs
.msr
;
1221 env
->spr
[SPR_SRR0
] = regs
.srr0
;
1222 env
->spr
[SPR_SRR1
] = regs
.srr1
;
1224 env
->spr
[SPR_SPRG0
] = regs
.sprg0
;
1225 env
->spr
[SPR_SPRG1
] = regs
.sprg1
;
1226 env
->spr
[SPR_SPRG2
] = regs
.sprg2
;
1227 env
->spr
[SPR_SPRG3
] = regs
.sprg3
;
1228 env
->spr
[SPR_SPRG4
] = regs
.sprg4
;
1229 env
->spr
[SPR_SPRG5
] = regs
.sprg5
;
1230 env
->spr
[SPR_SPRG6
] = regs
.sprg6
;
1231 env
->spr
[SPR_SPRG7
] = regs
.sprg7
;
1233 env
->spr
[SPR_BOOKE_PID
] = regs
.pid
;
1235 for (i
= 0;i
< 32; i
++)
1236 env
->gpr
[i
] = regs
.gpr
[i
];
1240 if (cap_booke_sregs
) {
1241 ret
= kvmppc_get_booke_sregs(cpu
);
1248 ret
= kvmppc_get_books_sregs(cpu
);
1255 kvm_get_one_spr(cs
, KVM_REG_PPC_HIOR
, SPR_HIOR
);
1261 /* We deliberately ignore errors here, for kernels which have
1262 * the ONE_REG calls, but don't support the specific
1263 * registers, there's a reasonable chance things will still
1264 * work, at least until we try to migrate. */
1265 for (i
= 0; i
< 1024; i
++) {
1266 uint64_t id
= env
->spr_cb
[i
].one_reg_id
;
1269 kvm_get_one_spr(cs
, id
, i
);
1275 for (i
= 0; i
< ARRAY_SIZE(env
->tm_gpr
); i
++) {
1276 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_GPR(i
), &env
->tm_gpr
[i
]);
1278 for (i
= 0; i
< ARRAY_SIZE(env
->tm_vsr
); i
++) {
1279 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_VSR(i
), &env
->tm_vsr
[i
]);
1281 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_CR
, &env
->tm_cr
);
1282 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_LR
, &env
->tm_lr
);
1283 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_CTR
, &env
->tm_ctr
);
1284 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_FPSCR
, &env
->tm_fpscr
);
1285 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_AMR
, &env
->tm_amr
);
1286 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_PPR
, &env
->tm_ppr
);
1287 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_VRSAVE
, &env
->tm_vrsave
);
1288 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_VSCR
, &env
->tm_vscr
);
1289 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_DSCR
, &env
->tm_dscr
);
1290 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_TAR
, &env
->tm_tar
);
1294 if (kvm_get_vpa(cs
) < 0) {
1295 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1299 kvm_get_one_reg(cs
, KVM_REG_PPC_TB_OFFSET
, &env
->tb_env
->tb_offset
);
1306 int kvmppc_set_interrupt(PowerPCCPU
*cpu
, int irq
, int level
)
1308 unsigned virq
= level
? KVM_INTERRUPT_SET_LEVEL
: KVM_INTERRUPT_UNSET
;
1310 if (irq
!= PPC_INTERRUPT_EXT
) {
1314 if (!kvm_enabled() || !cap_interrupt_unset
|| !cap_interrupt_level
) {
1318 kvm_vcpu_ioctl(CPU(cpu
), KVM_INTERRUPT
, &virq
);
1323 #if defined(TARGET_PPC64)
1324 #define PPC_INPUT_INT PPC970_INPUT_INT
1326 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1329 void kvm_arch_pre_run(CPUState
*cs
, struct kvm_run
*run
)
1331 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
1332 CPUPPCState
*env
= &cpu
->env
;
1336 qemu_mutex_lock_iothread();
1338 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1339 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1340 if (!cap_interrupt_level
&&
1341 run
->ready_for_interrupt_injection
&&
1342 (cs
->interrupt_request
& CPU_INTERRUPT_HARD
) &&
1343 (env
->irq_input_state
& (1<<PPC_INPUT_INT
)))
1345 /* For now KVM disregards the 'irq' argument. However, in the
1346 * future KVM could cache it in-kernel to avoid a heavyweight exit
1347 * when reading the UIC.
1349 irq
= KVM_INTERRUPT_SET
;
1351 DPRINTF("injected interrupt %d\n", irq
);
1352 r
= kvm_vcpu_ioctl(cs
, KVM_INTERRUPT
, &irq
);
1354 printf("cpu %d fail inject %x\n", cs
->cpu_index
, irq
);
1357 /* Always wake up soon in case the interrupt was level based */
1358 timer_mod(idle_timer
, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) +
1359 (NANOSECONDS_PER_SECOND
/ 50));
1362 /* We don't know if there are more interrupts pending after this. However,
1363 * the guest will return to userspace in the course of handling this one
1364 * anyways, so we will get a chance to deliver the rest. */
1366 qemu_mutex_unlock_iothread();
1369 MemTxAttrs
kvm_arch_post_run(CPUState
*cs
, struct kvm_run
*run
)
1371 return MEMTXATTRS_UNSPECIFIED
;
1374 int kvm_arch_process_async_events(CPUState
*cs
)
1379 static int kvmppc_handle_halt(PowerPCCPU
*cpu
)
1381 CPUState
*cs
= CPU(cpu
);
1382 CPUPPCState
*env
= &cpu
->env
;
1384 if (!(cs
->interrupt_request
& CPU_INTERRUPT_HARD
) && (msr_ee
)) {
1386 cs
->exception_index
= EXCP_HLT
;
1392 /* map dcr access to existing qemu dcr emulation */
1393 static int kvmppc_handle_dcr_read(CPUPPCState
*env
, uint32_t dcrn
, uint32_t *data
)
1395 if (ppc_dcr_read(env
->dcr_env
, dcrn
, data
) < 0)
1396 fprintf(stderr
, "Read to unhandled DCR (0x%x)\n", dcrn
);
1401 static int kvmppc_handle_dcr_write(CPUPPCState
*env
, uint32_t dcrn
, uint32_t data
)
1403 if (ppc_dcr_write(env
->dcr_env
, dcrn
, data
) < 0)
1404 fprintf(stderr
, "Write to unhandled DCR (0x%x)\n", dcrn
);
1409 int kvm_arch_insert_sw_breakpoint(CPUState
*cs
, struct kvm_sw_breakpoint
*bp
)
1411 /* Mixed endian case is not handled */
1412 uint32_t sc
= debug_inst_opcode
;
1414 if (cpu_memory_rw_debug(cs
, bp
->pc
, (uint8_t *)&bp
->saved_insn
,
1416 cpu_memory_rw_debug(cs
, bp
->pc
, (uint8_t *)&sc
, sizeof(sc
), 1)) {
1423 int kvm_arch_remove_sw_breakpoint(CPUState
*cs
, struct kvm_sw_breakpoint
*bp
)
1427 if (cpu_memory_rw_debug(cs
, bp
->pc
, (uint8_t *)&sc
, sizeof(sc
), 0) ||
1428 sc
!= debug_inst_opcode
||
1429 cpu_memory_rw_debug(cs
, bp
->pc
, (uint8_t *)&bp
->saved_insn
,
1437 static int find_hw_breakpoint(target_ulong addr
, int type
)
1441 assert((nb_hw_breakpoint
+ nb_hw_watchpoint
)
1442 <= ARRAY_SIZE(hw_debug_points
));
1444 for (n
= 0; n
< nb_hw_breakpoint
+ nb_hw_watchpoint
; n
++) {
1445 if (hw_debug_points
[n
].addr
== addr
&&
1446 hw_debug_points
[n
].type
== type
) {
1454 static int find_hw_watchpoint(target_ulong addr
, int *flag
)
1458 n
= find_hw_breakpoint(addr
, GDB_WATCHPOINT_ACCESS
);
1460 *flag
= BP_MEM_ACCESS
;
1464 n
= find_hw_breakpoint(addr
, GDB_WATCHPOINT_WRITE
);
1466 *flag
= BP_MEM_WRITE
;
1470 n
= find_hw_breakpoint(addr
, GDB_WATCHPOINT_READ
);
1472 *flag
= BP_MEM_READ
;
1479 int kvm_arch_insert_hw_breakpoint(target_ulong addr
,
1480 target_ulong len
, int type
)
1482 if ((nb_hw_breakpoint
+ nb_hw_watchpoint
) >= ARRAY_SIZE(hw_debug_points
)) {
1486 hw_debug_points
[nb_hw_breakpoint
+ nb_hw_watchpoint
].addr
= addr
;
1487 hw_debug_points
[nb_hw_breakpoint
+ nb_hw_watchpoint
].type
= type
;
1490 case GDB_BREAKPOINT_HW
:
1491 if (nb_hw_breakpoint
>= max_hw_breakpoint
) {
1495 if (find_hw_breakpoint(addr
, type
) >= 0) {
1502 case GDB_WATCHPOINT_WRITE
:
1503 case GDB_WATCHPOINT_READ
:
1504 case GDB_WATCHPOINT_ACCESS
:
1505 if (nb_hw_watchpoint
>= max_hw_watchpoint
) {
1509 if (find_hw_breakpoint(addr
, type
) >= 0) {
1523 int kvm_arch_remove_hw_breakpoint(target_ulong addr
,
1524 target_ulong len
, int type
)
1528 n
= find_hw_breakpoint(addr
, type
);
1534 case GDB_BREAKPOINT_HW
:
1538 case GDB_WATCHPOINT_WRITE
:
1539 case GDB_WATCHPOINT_READ
:
1540 case GDB_WATCHPOINT_ACCESS
:
1547 hw_debug_points
[n
] = hw_debug_points
[nb_hw_breakpoint
+ nb_hw_watchpoint
];
1552 void kvm_arch_remove_all_hw_breakpoints(void)
1554 nb_hw_breakpoint
= nb_hw_watchpoint
= 0;
1557 void kvm_arch_update_guest_debug(CPUState
*cs
, struct kvm_guest_debug
*dbg
)
1561 /* Software Breakpoint updates */
1562 if (kvm_sw_breakpoints_active(cs
)) {
1563 dbg
->control
|= KVM_GUESTDBG_ENABLE
| KVM_GUESTDBG_USE_SW_BP
;
1566 assert((nb_hw_breakpoint
+ nb_hw_watchpoint
)
1567 <= ARRAY_SIZE(hw_debug_points
));
1568 assert((nb_hw_breakpoint
+ nb_hw_watchpoint
) <= ARRAY_SIZE(dbg
->arch
.bp
));
1570 if (nb_hw_breakpoint
+ nb_hw_watchpoint
> 0) {
1571 dbg
->control
|= KVM_GUESTDBG_ENABLE
| KVM_GUESTDBG_USE_HW_BP
;
1572 memset(dbg
->arch
.bp
, 0, sizeof(dbg
->arch
.bp
));
1573 for (n
= 0; n
< nb_hw_breakpoint
+ nb_hw_watchpoint
; n
++) {
1574 switch (hw_debug_points
[n
].type
) {
1575 case GDB_BREAKPOINT_HW
:
1576 dbg
->arch
.bp
[n
].type
= KVMPPC_DEBUG_BREAKPOINT
;
1578 case GDB_WATCHPOINT_WRITE
:
1579 dbg
->arch
.bp
[n
].type
= KVMPPC_DEBUG_WATCH_WRITE
;
1581 case GDB_WATCHPOINT_READ
:
1582 dbg
->arch
.bp
[n
].type
= KVMPPC_DEBUG_WATCH_READ
;
1584 case GDB_WATCHPOINT_ACCESS
:
1585 dbg
->arch
.bp
[n
].type
= KVMPPC_DEBUG_WATCH_WRITE
|
1586 KVMPPC_DEBUG_WATCH_READ
;
1589 cpu_abort(cs
, "Unsupported breakpoint type\n");
1591 dbg
->arch
.bp
[n
].addr
= hw_debug_points
[n
].addr
;
1596 static int kvm_handle_debug(PowerPCCPU
*cpu
, struct kvm_run
*run
)
1598 CPUState
*cs
= CPU(cpu
);
1599 CPUPPCState
*env
= &cpu
->env
;
1600 struct kvm_debug_exit_arch
*arch_info
= &run
->debug
.arch
;
1605 if (cs
->singlestep_enabled
) {
1607 } else if (arch_info
->status
) {
1608 if (nb_hw_breakpoint
+ nb_hw_watchpoint
> 0) {
1609 if (arch_info
->status
& KVMPPC_DEBUG_BREAKPOINT
) {
1610 n
= find_hw_breakpoint(arch_info
->address
, GDB_BREAKPOINT_HW
);
1614 } else if (arch_info
->status
& (KVMPPC_DEBUG_WATCH_READ
|
1615 KVMPPC_DEBUG_WATCH_WRITE
)) {
1616 n
= find_hw_watchpoint(arch_info
->address
, &flag
);
1619 cs
->watchpoint_hit
= &hw_watchpoint
;
1620 hw_watchpoint
.vaddr
= hw_debug_points
[n
].addr
;
1621 hw_watchpoint
.flags
= flag
;
1625 } else if (kvm_find_sw_breakpoint(cs
, arch_info
->address
)) {
1628 /* QEMU is not able to handle debug exception, so inject
1629 * program exception to guest;
1630 * Yes program exception NOT debug exception !!
1631 * When QEMU is using debug resources then debug exception must
1632 * be always set. To achieve this we set MSR_DE and also set
1633 * MSRP_DEP so guest cannot change MSR_DE.
1634 * When emulating debug resource for guest we want guest
1635 * to control MSR_DE (enable/disable debug interrupt on need).
1636 * Supporting both configurations are NOT possible.
1637 * So the result is that we cannot share debug resources
1638 * between QEMU and Guest on BOOKE architecture.
1639 * In the current design QEMU gets the priority over guest,
1640 * this means that if QEMU is using debug resources then guest
1642 * For software breakpoint QEMU uses a privileged instruction;
1643 * So there cannot be any reason that we are here for guest
1644 * set debug exception, only possibility is guest executed a
1645 * privileged / illegal instruction and that's why we are
1646 * injecting a program interrupt.
1649 cpu_synchronize_state(cs
);
1650 /* env->nip is PC, so increment this by 4 to use
1651 * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1654 cs
->exception_index
= POWERPC_EXCP_PROGRAM
;
1655 env
->error_code
= POWERPC_EXCP_INVAL
;
1656 ppc_cpu_do_interrupt(cs
);
1662 int kvm_arch_handle_exit(CPUState
*cs
, struct kvm_run
*run
)
1664 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
1665 CPUPPCState
*env
= &cpu
->env
;
1668 qemu_mutex_lock_iothread();
1670 switch (run
->exit_reason
) {
1672 if (run
->dcr
.is_write
) {
1673 DPRINTF("handle dcr write\n");
1674 ret
= kvmppc_handle_dcr_write(env
, run
->dcr
.dcrn
, run
->dcr
.data
);
1676 DPRINTF("handle dcr read\n");
1677 ret
= kvmppc_handle_dcr_read(env
, run
->dcr
.dcrn
, &run
->dcr
.data
);
1681 DPRINTF("handle halt\n");
1682 ret
= kvmppc_handle_halt(cpu
);
1684 #if defined(TARGET_PPC64)
1685 case KVM_EXIT_PAPR_HCALL
:
1686 DPRINTF("handle PAPR hypercall\n");
1687 run
->papr_hcall
.ret
= spapr_hypercall(cpu
,
1689 run
->papr_hcall
.args
);
1694 DPRINTF("handle epr\n");
1695 run
->epr
.epr
= ldl_phys(cs
->as
, env
->mpic_iack
);
1698 case KVM_EXIT_WATCHDOG
:
1699 DPRINTF("handle watchdog expiry\n");
1700 watchdog_perform_action();
1704 case KVM_EXIT_DEBUG
:
1705 DPRINTF("handle debug exception\n");
1706 if (kvm_handle_debug(cpu
, run
)) {
1710 /* re-enter, this exception was guest-internal */
1715 fprintf(stderr
, "KVM: unknown exit reason %d\n", run
->exit_reason
);
1720 qemu_mutex_unlock_iothread();
1724 int kvmppc_or_tsr_bits(PowerPCCPU
*cpu
, uint32_t tsr_bits
)
1726 CPUState
*cs
= CPU(cpu
);
1727 uint32_t bits
= tsr_bits
;
1728 struct kvm_one_reg reg
= {
1729 .id
= KVM_REG_PPC_OR_TSR
,
1730 .addr
= (uintptr_t) &bits
,
1733 return kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
1736 int kvmppc_clear_tsr_bits(PowerPCCPU
*cpu
, uint32_t tsr_bits
)
1739 CPUState
*cs
= CPU(cpu
);
1740 uint32_t bits
= tsr_bits
;
1741 struct kvm_one_reg reg
= {
1742 .id
= KVM_REG_PPC_CLEAR_TSR
,
1743 .addr
= (uintptr_t) &bits
,
1746 return kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
1749 int kvmppc_set_tcr(PowerPCCPU
*cpu
)
1751 CPUState
*cs
= CPU(cpu
);
1752 CPUPPCState
*env
= &cpu
->env
;
1753 uint32_t tcr
= env
->spr
[SPR_BOOKE_TCR
];
1755 struct kvm_one_reg reg
= {
1756 .id
= KVM_REG_PPC_TCR
,
1757 .addr
= (uintptr_t) &tcr
,
1760 return kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
1763 int kvmppc_booke_watchdog_enable(PowerPCCPU
*cpu
)
1765 CPUState
*cs
= CPU(cpu
);
1768 if (!kvm_enabled()) {
1772 if (!cap_ppc_watchdog
) {
1773 printf("warning: KVM does not support watchdog");
1777 ret
= kvm_vcpu_enable_cap(cs
, KVM_CAP_PPC_BOOKE_WATCHDOG
, 0);
1779 fprintf(stderr
, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1780 __func__
, strerror(-ret
));
1787 static int read_cpuinfo(const char *field
, char *value
, int len
)
1791 int field_len
= strlen(field
);
1794 f
= fopen("/proc/cpuinfo", "r");
1800 if (!fgets(line
, sizeof(line
), f
)) {
1803 if (!strncmp(line
, field
, field_len
)) {
1804 pstrcpy(value
, len
, line
);
1815 uint32_t kvmppc_get_tbfreq(void)
1819 uint32_t retval
= NANOSECONDS_PER_SECOND
;
1821 if (read_cpuinfo("timebase", line
, sizeof(line
))) {
1825 if (!(ns
= strchr(line
, ':'))) {
1834 bool kvmppc_get_host_serial(char **value
)
1836 return g_file_get_contents("/proc/device-tree/system-id", value
, NULL
,
1840 bool kvmppc_get_host_model(char **value
)
1842 return g_file_get_contents("/proc/device-tree/model", value
, NULL
, NULL
);
1845 /* Try to find a device tree node for a CPU with clock-frequency property */
1846 static int kvmppc_find_cpu_dt(char *buf
, int buf_len
)
1848 struct dirent
*dirp
;
1851 if ((dp
= opendir(PROC_DEVTREE_CPU
)) == NULL
) {
1852 printf("Can't open directory " PROC_DEVTREE_CPU
"\n");
1857 while ((dirp
= readdir(dp
)) != NULL
) {
1859 snprintf(buf
, buf_len
, "%s%s/clock-frequency", PROC_DEVTREE_CPU
,
1861 f
= fopen(buf
, "r");
1863 snprintf(buf
, buf_len
, "%s%s", PROC_DEVTREE_CPU
, dirp
->d_name
);
1870 if (buf
[0] == '\0') {
1871 printf("Unknown host!\n");
1878 static uint64_t kvmppc_read_int_dt(const char *filename
)
1887 f
= fopen(filename
, "rb");
1892 len
= fread(&u
, 1, sizeof(u
), f
);
1896 /* property is a 32-bit quantity */
1897 return be32_to_cpu(u
.v32
);
1899 return be64_to_cpu(u
.v64
);
1905 /* Read a CPU node property from the host device tree that's a single
1906 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1907 * (can't find or open the property, or doesn't understand the
1909 static uint64_t kvmppc_read_int_cpu_dt(const char *propname
)
1911 char buf
[PATH_MAX
], *tmp
;
1914 if (kvmppc_find_cpu_dt(buf
, sizeof(buf
))) {
1918 tmp
= g_strdup_printf("%s/%s", buf
, propname
);
1919 val
= kvmppc_read_int_dt(tmp
);
1925 uint64_t kvmppc_get_clockfreq(void)
1927 return kvmppc_read_int_cpu_dt("clock-frequency");
1930 static int kvmppc_get_pvinfo(CPUPPCState
*env
, struct kvm_ppc_pvinfo
*pvinfo
)
1932 PowerPCCPU
*cpu
= ppc_env_get_cpu(env
);
1933 CPUState
*cs
= CPU(cpu
);
1935 if (kvm_vm_check_extension(cs
->kvm_state
, KVM_CAP_PPC_GET_PVINFO
) &&
1936 !kvm_vm_ioctl(cs
->kvm_state
, KVM_PPC_GET_PVINFO
, pvinfo
)) {
1943 int kvmppc_get_hasidle(CPUPPCState
*env
)
1945 struct kvm_ppc_pvinfo pvinfo
;
1947 if (!kvmppc_get_pvinfo(env
, &pvinfo
) &&
1948 (pvinfo
.flags
& KVM_PPC_PVINFO_FLAGS_EV_IDLE
)) {
1955 int kvmppc_get_hypercall(CPUPPCState
*env
, uint8_t *buf
, int buf_len
)
1957 uint32_t *hc
= (uint32_t*)buf
;
1958 struct kvm_ppc_pvinfo pvinfo
;
1960 if (!kvmppc_get_pvinfo(env
, &pvinfo
)) {
1961 memcpy(buf
, pvinfo
.hcall
, buf_len
);
1966 * Fallback to always fail hypercalls regardless of endianness:
1968 * tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
1970 * b .+8 (becomes nop in wrong endian)
1971 * bswap32(li r3, -1)
1974 hc
[0] = cpu_to_be32(0x08000048);
1975 hc
[1] = cpu_to_be32(0x3860ffff);
1976 hc
[2] = cpu_to_be32(0x48000008);
1977 hc
[3] = cpu_to_be32(bswap32(0x3860ffff));
1982 static inline int kvmppc_enable_hcall(KVMState
*s
, target_ulong hcall
)
1984 return kvm_vm_enable_cap(s
, KVM_CAP_PPC_ENABLE_HCALL
, 0, hcall
, 1);
1987 void kvmppc_enable_logical_ci_hcalls(void)
1990 * FIXME: it would be nice if we could detect the cases where
1991 * we're using a device which requires the in kernel
1992 * implementation of these hcalls, but the kernel lacks them and
1993 * produce a warning.
1995 kvmppc_enable_hcall(kvm_state
, H_LOGICAL_CI_LOAD
);
1996 kvmppc_enable_hcall(kvm_state
, H_LOGICAL_CI_STORE
);
1999 void kvmppc_enable_set_mode_hcall(void)
2001 kvmppc_enable_hcall(kvm_state
, H_SET_MODE
);
2004 void kvmppc_enable_clear_ref_mod_hcalls(void)
2006 kvmppc_enable_hcall(kvm_state
, H_CLEAR_REF
);
2007 kvmppc_enable_hcall(kvm_state
, H_CLEAR_MOD
);
2010 void kvmppc_set_papr(PowerPCCPU
*cpu
)
2012 CPUState
*cs
= CPU(cpu
);
2015 if (!kvm_enabled()) {
2019 ret
= kvm_vcpu_enable_cap(cs
, KVM_CAP_PPC_PAPR
, 0);
2021 error_report("This vCPU type or KVM version does not support PAPR");
2025 /* Update the capability flag so we sync the right information
2030 int kvmppc_set_compat(PowerPCCPU
*cpu
, uint32_t compat_pvr
)
2032 return kvm_set_one_reg(CPU(cpu
), KVM_REG_PPC_ARCH_COMPAT
, &compat_pvr
);
2035 void kvmppc_set_mpic_proxy(PowerPCCPU
*cpu
, int mpic_proxy
)
2037 CPUState
*cs
= CPU(cpu
);
2040 ret
= kvm_vcpu_enable_cap(cs
, KVM_CAP_PPC_EPR
, 0, mpic_proxy
);
2041 if (ret
&& mpic_proxy
) {
2042 error_report("This KVM version does not support EPR");
2047 int kvmppc_smt_threads(void)
2049 return cap_ppc_smt
? cap_ppc_smt
: 1;
2052 int kvmppc_set_smt_threads(int smt
)
2056 ret
= kvm_vm_enable_cap(kvm_state
, KVM_CAP_PPC_SMT
, 0, smt
, 0);
2063 void kvmppc_hint_smt_possible(Error
**errp
)
2069 assert(kvm_enabled());
2070 if (cap_ppc_smt_possible
) {
2071 g
= g_string_new("Available VSMT modes:");
2072 for (i
= 63; i
>= 0; i
--) {
2073 if ((1UL << i
) & cap_ppc_smt_possible
) {
2074 g_string_append_printf(g
, " %lu", (1UL << i
));
2077 s
= g_string_free(g
, false);
2078 error_append_hint(errp
, "%s.\n", s
);
2081 error_append_hint(errp
,
2082 "This KVM seems to be too old to support VSMT.\n");
2088 uint64_t kvmppc_rma_size(uint64_t current_size
, unsigned int hash_shift
)
2090 struct kvm_ppc_smmu_info info
;
2091 long rampagesize
, best_page_shift
;
2094 /* Find the largest hardware supported page size that's less than
2095 * or equal to the (logical) backing page size of guest RAM */
2096 kvm_get_smmu_info(&info
, &error_fatal
);
2097 rampagesize
= qemu_getrampagesize();
2098 best_page_shift
= 0;
2100 for (i
= 0; i
< KVM_PPC_PAGE_SIZES_MAX_SZ
; i
++) {
2101 struct kvm_ppc_one_seg_page_size
*sps
= &info
.sps
[i
];
2103 if (!sps
->page_shift
) {
2107 if ((sps
->page_shift
> best_page_shift
)
2108 && ((1UL << sps
->page_shift
) <= rampagesize
)) {
2109 best_page_shift
= sps
->page_shift
;
2113 return MIN(current_size
,
2114 1ULL << (best_page_shift
+ hash_shift
- 7));
2118 bool kvmppc_spapr_use_multitce(void)
2120 return cap_spapr_multitce
;
2123 int kvmppc_spapr_enable_inkernel_multitce(void)
2127 ret
= kvm_vm_enable_cap(kvm_state
, KVM_CAP_PPC_ENABLE_HCALL
, 0,
2128 H_PUT_TCE_INDIRECT
, 1);
2130 ret
= kvm_vm_enable_cap(kvm_state
, KVM_CAP_PPC_ENABLE_HCALL
, 0,
2137 void *kvmppc_create_spapr_tce(uint32_t liobn
, uint32_t page_shift
,
2138 uint64_t bus_offset
, uint32_t nb_table
,
2139 int *pfd
, bool need_vfio
)
2145 /* Must set fd to -1 so we don't try to munmap when called for
2146 * destroying the table, which the upper layers -will- do
2149 if (!cap_spapr_tce
|| (need_vfio
&& !cap_spapr_vfio
)) {
2153 if (cap_spapr_tce_64
) {
2154 struct kvm_create_spapr_tce_64 args
= {
2156 .page_shift
= page_shift
,
2157 .offset
= bus_offset
>> page_shift
,
2161 fd
= kvm_vm_ioctl(kvm_state
, KVM_CREATE_SPAPR_TCE_64
, &args
);
2164 "KVM: Failed to create TCE64 table for liobn 0x%x\n",
2168 } else if (cap_spapr_tce
) {
2169 uint64_t window_size
= (uint64_t) nb_table
<< page_shift
;
2170 struct kvm_create_spapr_tce args
= {
2172 .window_size
= window_size
,
2174 if ((window_size
!= args
.window_size
) || bus_offset
) {
2177 fd
= kvm_vm_ioctl(kvm_state
, KVM_CREATE_SPAPR_TCE
, &args
);
2179 fprintf(stderr
, "KVM: Failed to create TCE table for liobn 0x%x\n",
2187 len
= nb_table
* sizeof(uint64_t);
2188 /* FIXME: round this up to page size */
2190 table
= mmap(NULL
, len
, PROT_READ
|PROT_WRITE
, MAP_SHARED
, fd
, 0);
2191 if (table
== MAP_FAILED
) {
2192 fprintf(stderr
, "KVM: Failed to map TCE table for liobn 0x%x\n",
2202 int kvmppc_remove_spapr_tce(void *table
, int fd
, uint32_t nb_table
)
2210 len
= nb_table
* sizeof(uint64_t);
2211 if ((munmap(table
, len
) < 0) ||
2213 fprintf(stderr
, "KVM: Unexpected error removing TCE table: %s",
2215 /* Leak the table */
2221 int kvmppc_reset_htab(int shift_hint
)
2223 uint32_t shift
= shift_hint
;
2225 if (!kvm_enabled()) {
2226 /* Full emulation, tell caller to allocate htab itself */
2229 if (kvm_vm_check_extension(kvm_state
, KVM_CAP_PPC_ALLOC_HTAB
)) {
2231 ret
= kvm_vm_ioctl(kvm_state
, KVM_PPC_ALLOCATE_HTAB
, &shift
);
2232 if (ret
== -ENOTTY
) {
2233 /* At least some versions of PR KVM advertise the
2234 * capability, but don't implement the ioctl(). Oops.
2235 * Return 0 so that we allocate the htab in qemu, as is
2236 * correct for PR. */
2238 } else if (ret
< 0) {
2244 /* We have a kernel that predates the htab reset calls. For PR
2245 * KVM, we need to allocate the htab ourselves, for an HV KVM of
2246 * this era, it has allocated a 16MB fixed size hash table already. */
2247 if (kvmppc_is_pr(kvm_state
)) {
2248 /* PR - tell caller to allocate htab */
2251 /* HV - assume 16MB kernel allocated htab */
2256 static inline uint32_t mfpvr(void)
2265 static void alter_insns(uint64_t *word
, uint64_t flags
, bool on
)
2274 static void kvmppc_host_cpu_class_init(ObjectClass
*oc
, void *data
)
2276 PowerPCCPUClass
*pcc
= POWERPC_CPU_CLASS(oc
);
2277 uint32_t dcache_size
= kvmppc_read_int_cpu_dt("d-cache-size");
2278 uint32_t icache_size
= kvmppc_read_int_cpu_dt("i-cache-size");
2280 /* Now fix up the class with information we can query from the host */
2283 alter_insns(&pcc
->insns_flags
, PPC_ALTIVEC
,
2284 qemu_getauxval(AT_HWCAP
) & PPC_FEATURE_HAS_ALTIVEC
);
2285 alter_insns(&pcc
->insns_flags2
, PPC2_VSX
,
2286 qemu_getauxval(AT_HWCAP
) & PPC_FEATURE_HAS_VSX
);
2287 alter_insns(&pcc
->insns_flags2
, PPC2_DFP
,
2288 qemu_getauxval(AT_HWCAP
) & PPC_FEATURE_HAS_DFP
);
2290 if (dcache_size
!= -1) {
2291 pcc
->l1_dcache_size
= dcache_size
;
2294 if (icache_size
!= -1) {
2295 pcc
->l1_icache_size
= icache_size
;
2298 #if defined(TARGET_PPC64)
2299 pcc
->radix_page_info
= kvm_get_radix_page_info();
2301 if ((pcc
->pvr
& 0xffffff00) == CPU_POWERPC_POWER9_DD1
) {
2303 * POWER9 DD1 has some bugs which make it not really ISA 3.00
2304 * compliant. More importantly, advertising ISA 3.00
2305 * architected mode may prevent guests from activating
2306 * necessary DD1 workarounds.
2308 pcc
->pcr_supported
&= ~(PCR_COMPAT_3_00
| PCR_COMPAT_2_07
2309 | PCR_COMPAT_2_06
| PCR_COMPAT_2_05
);
2311 #endif /* defined(TARGET_PPC64) */
2314 bool kvmppc_has_cap_epr(void)
2319 bool kvmppc_has_cap_fixup_hcalls(void)
2321 return cap_fixup_hcalls
;
2324 bool kvmppc_has_cap_htm(void)
2329 bool kvmppc_has_cap_mmu_radix(void)
2331 return cap_mmu_radix
;
2334 bool kvmppc_has_cap_mmu_hash_v3(void)
2336 return cap_mmu_hash_v3
;
2339 static bool kvmppc_power8_host(void)
2344 uint32_t base_pvr
= CPU_POWERPC_POWER_SERVER_MASK
& mfpvr();
2345 ret
= (base_pvr
== CPU_POWERPC_POWER8E_BASE
) ||
2346 (base_pvr
== CPU_POWERPC_POWER8NVL_BASE
) ||
2347 (base_pvr
== CPU_POWERPC_POWER8_BASE
);
2349 #endif /* TARGET_PPC64 */
2353 static int parse_cap_ppc_safe_cache(struct kvm_ppc_cpu_char c
)
2355 bool l1d_thread_priv_req
= !kvmppc_power8_host();
2357 if (~c
.behaviour
& c
.behaviour_mask
& H_CPU_BEHAV_L1D_FLUSH_PR
) {
2359 } else if ((!l1d_thread_priv_req
||
2360 c
.character
& c
.character_mask
& H_CPU_CHAR_L1D_THREAD_PRIV
) &&
2361 (c
.character
& c
.character_mask
2362 & (H_CPU_CHAR_L1D_FLUSH_ORI30
| H_CPU_CHAR_L1D_FLUSH_TRIG2
))) {
2369 static int parse_cap_ppc_safe_bounds_check(struct kvm_ppc_cpu_char c
)
2371 if (~c
.behaviour
& c
.behaviour_mask
& H_CPU_BEHAV_BNDS_CHK_SPEC_BAR
) {
2373 } else if (c
.character
& c
.character_mask
& H_CPU_CHAR_SPEC_BAR_ORI31
) {
2380 static int parse_cap_ppc_safe_indirect_branch(struct kvm_ppc_cpu_char c
)
2382 if (c
.character
& c
.character_mask
& H_CPU_CHAR_CACHE_COUNT_DIS
) {
2383 return SPAPR_CAP_FIXED_CCD
;
2384 } else if (c
.character
& c
.character_mask
& H_CPU_CHAR_BCCTRL_SERIALISED
) {
2385 return SPAPR_CAP_FIXED_IBS
;
2391 static void kvmppc_get_cpu_characteristics(KVMState
*s
)
2393 struct kvm_ppc_cpu_char c
;
2397 cap_ppc_safe_cache
= 0;
2398 cap_ppc_safe_bounds_check
= 0;
2399 cap_ppc_safe_indirect_branch
= 0;
2401 ret
= kvm_vm_check_extension(s
, KVM_CAP_PPC_GET_CPU_CHAR
);
2405 ret
= kvm_vm_ioctl(s
, KVM_PPC_GET_CPU_CHAR
, &c
);
2410 cap_ppc_safe_cache
= parse_cap_ppc_safe_cache(c
);
2411 cap_ppc_safe_bounds_check
= parse_cap_ppc_safe_bounds_check(c
);
2412 cap_ppc_safe_indirect_branch
= parse_cap_ppc_safe_indirect_branch(c
);
2415 int kvmppc_get_cap_safe_cache(void)
2417 return cap_ppc_safe_cache
;
2420 int kvmppc_get_cap_safe_bounds_check(void)
2422 return cap_ppc_safe_bounds_check
;
2425 int kvmppc_get_cap_safe_indirect_branch(void)
2427 return cap_ppc_safe_indirect_branch
;
2430 bool kvmppc_has_cap_nested_kvm_hv(void)
2432 return !!cap_ppc_nested_kvm_hv
;
2435 int kvmppc_set_cap_nested_kvm_hv(int enable
)
2437 return kvm_vm_enable_cap(kvm_state
, KVM_CAP_PPC_NESTED_HV
, 0, enable
);
2440 bool kvmppc_has_cap_spapr_vfio(void)
2442 return cap_spapr_vfio
;
2445 PowerPCCPUClass
*kvm_ppc_get_host_cpu_class(void)
2447 uint32_t host_pvr
= mfpvr();
2448 PowerPCCPUClass
*pvr_pcc
;
2450 pvr_pcc
= ppc_cpu_class_by_pvr(host_pvr
);
2451 if (pvr_pcc
== NULL
) {
2452 pvr_pcc
= ppc_cpu_class_by_pvr_mask(host_pvr
);
2458 static int kvm_ppc_register_host_cpu_type(MachineState
*ms
)
2460 TypeInfo type_info
= {
2461 .name
= TYPE_HOST_POWERPC_CPU
,
2462 .class_init
= kvmppc_host_cpu_class_init
,
2464 MachineClass
*mc
= MACHINE_GET_CLASS(ms
);
2465 PowerPCCPUClass
*pvr_pcc
;
2470 pvr_pcc
= kvm_ppc_get_host_cpu_class();
2471 if (pvr_pcc
== NULL
) {
2474 type_info
.parent
= object_class_get_name(OBJECT_CLASS(pvr_pcc
));
2475 type_register(&type_info
);
2476 if (object_dynamic_cast(OBJECT(ms
), TYPE_SPAPR_MACHINE
)) {
2477 /* override TCG default cpu type with 'host' cpu model */
2478 mc
->default_cpu_type
= TYPE_HOST_POWERPC_CPU
;
2481 oc
= object_class_by_name(type_info
.name
);
2485 * Update generic CPU family class alias (e.g. on a POWER8NVL host,
2486 * we want "POWER8" to be a "family" alias that points to the current
2487 * host CPU type, too)
2489 dc
= DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc
));
2490 for (i
= 0; ppc_cpu_aliases
[i
].alias
!= NULL
; i
++) {
2491 if (strcasecmp(ppc_cpu_aliases
[i
].alias
, dc
->desc
) == 0) {
2494 ppc_cpu_aliases
[i
].model
= g_strdup(object_class_get_name(oc
));
2495 suffix
= strstr(ppc_cpu_aliases
[i
].model
, POWERPC_CPU_TYPE_SUFFIX
);
2506 int kvmppc_define_rtas_kernel_token(uint32_t token
, const char *function
)
2508 struct kvm_rtas_token_args args
= {
2512 if (!kvm_check_extension(kvm_state
, KVM_CAP_PPC_RTAS
)) {
2516 strncpy(args
.name
, function
, sizeof(args
.name
));
2518 return kvm_vm_ioctl(kvm_state
, KVM_PPC_RTAS_DEFINE_TOKEN
, &args
);
2521 int kvmppc_get_htab_fd(bool write
, uint64_t index
, Error
**errp
)
2523 struct kvm_get_htab_fd s
= {
2524 .flags
= write
? KVM_GET_HTAB_WRITE
: 0,
2525 .start_index
= index
,
2530 error_setg(errp
, "KVM version doesn't support %s the HPT",
2531 write
? "writing" : "reading");
2535 ret
= kvm_vm_ioctl(kvm_state
, KVM_PPC_GET_HTAB_FD
, &s
);
2537 error_setg(errp
, "Unable to open fd for %s HPT %s KVM: %s",
2538 write
? "writing" : "reading", write
? "to" : "from",
2546 int kvmppc_save_htab(QEMUFile
*f
, int fd
, size_t bufsize
, int64_t max_ns
)
2548 int64_t starttime
= qemu_clock_get_ns(QEMU_CLOCK_REALTIME
);
2549 uint8_t buf
[bufsize
];
2553 rc
= read(fd
, buf
, bufsize
);
2555 fprintf(stderr
, "Error reading data from KVM HTAB fd: %s\n",
2559 uint8_t *buffer
= buf
;
2562 struct kvm_get_htab_header
*head
=
2563 (struct kvm_get_htab_header
*) buffer
;
2564 size_t chunksize
= sizeof(*head
) +
2565 HASH_PTE_SIZE_64
* head
->n_valid
;
2567 qemu_put_be32(f
, head
->index
);
2568 qemu_put_be16(f
, head
->n_valid
);
2569 qemu_put_be16(f
, head
->n_invalid
);
2570 qemu_put_buffer(f
, (void *)(head
+ 1),
2571 HASH_PTE_SIZE_64
* head
->n_valid
);
2573 buffer
+= chunksize
;
2579 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME
) - starttime
) < max_ns
)));
2581 return (rc
== 0) ? 1 : 0;
2584 int kvmppc_load_htab_chunk(QEMUFile
*f
, int fd
, uint32_t index
,
2585 uint16_t n_valid
, uint16_t n_invalid
)
2587 struct kvm_get_htab_header
*buf
;
2588 size_t chunksize
= sizeof(*buf
) + n_valid
*HASH_PTE_SIZE_64
;
2591 buf
= alloca(chunksize
);
2593 buf
->n_valid
= n_valid
;
2594 buf
->n_invalid
= n_invalid
;
2596 qemu_get_buffer(f
, (void *)(buf
+ 1), HASH_PTE_SIZE_64
*n_valid
);
2598 rc
= write(fd
, buf
, chunksize
);
2600 fprintf(stderr
, "Error writing KVM hash table: %s\n",
2604 if (rc
!= chunksize
) {
2605 /* We should never get a short write on a single chunk */
2606 fprintf(stderr
, "Short write, restoring KVM hash table\n");
2612 bool kvm_arch_stop_on_emulation_error(CPUState
*cpu
)
2617 void kvm_arch_init_irq_routing(KVMState
*s
)
2621 void kvmppc_read_hptes(ppc_hash_pte64_t
*hptes
, hwaddr ptex
, int n
)
2626 fd
= kvmppc_get_htab_fd(false, ptex
, &error_abort
);
2630 struct kvm_get_htab_header
*hdr
;
2631 int m
= n
< HPTES_PER_GROUP
? n
: HPTES_PER_GROUP
;
2632 char buf
[sizeof(*hdr
) + m
* HASH_PTE_SIZE_64
];
2634 rc
= read(fd
, buf
, sizeof(buf
));
2636 hw_error("kvmppc_read_hptes: Unable to read HPTEs");
2639 hdr
= (struct kvm_get_htab_header
*)buf
;
2640 while ((i
< n
) && ((char *)hdr
< (buf
+ rc
))) {
2641 int invalid
= hdr
->n_invalid
, valid
= hdr
->n_valid
;
2643 if (hdr
->index
!= (ptex
+ i
)) {
2644 hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32
2645 " != (%"HWADDR_PRIu
" + %d", hdr
->index
, ptex
, i
);
2648 if (n
- i
< valid
) {
2651 memcpy(hptes
+ i
, hdr
+ 1, HASH_PTE_SIZE_64
* valid
);
2654 if ((n
- i
) < invalid
) {
2657 memset(hptes
+ i
, 0, invalid
* HASH_PTE_SIZE_64
);
2660 hdr
= (struct kvm_get_htab_header
*)
2661 ((char *)(hdr
+ 1) + HASH_PTE_SIZE_64
* hdr
->n_valid
);
2668 void kvmppc_write_hpte(hwaddr ptex
, uint64_t pte0
, uint64_t pte1
)
2672 struct kvm_get_htab_header hdr
;
2677 fd
= kvmppc_get_htab_fd(true, 0 /* Ignored */, &error_abort
);
2679 buf
.hdr
.n_valid
= 1;
2680 buf
.hdr
.n_invalid
= 0;
2681 buf
.hdr
.index
= ptex
;
2682 buf
.pte0
= cpu_to_be64(pte0
);
2683 buf
.pte1
= cpu_to_be64(pte1
);
2685 rc
= write(fd
, &buf
, sizeof(buf
));
2686 if (rc
!= sizeof(buf
)) {
2687 hw_error("kvmppc_write_hpte: Unable to update KVM HPT");
2692 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry
*route
,
2693 uint64_t address
, uint32_t data
, PCIDevice
*dev
)
2698 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry
*route
,
2699 int vector
, PCIDevice
*dev
)
2704 int kvm_arch_release_virq_post(int virq
)
2709 int kvm_arch_msi_data_to_gsi(uint32_t data
)
2711 return data
& 0xffff;
2714 int kvmppc_enable_hwrng(void)
2716 if (!kvm_enabled() || !kvm_check_extension(kvm_state
, KVM_CAP_PPC_HWRNG
)) {
2720 return kvmppc_enable_hcall(kvm_state
, H_RANDOM
);
2723 void kvmppc_check_papr_resize_hpt(Error
**errp
)
2725 if (!kvm_enabled()) {
2726 return; /* No KVM, we're good */
2729 if (cap_resize_hpt
) {
2730 return; /* Kernel has explicit support, we're good */
2733 /* Otherwise fallback on looking for PR KVM */
2734 if (kvmppc_is_pr(kvm_state
)) {
2739 "Hash page table resizing not available with this KVM version");
2742 int kvmppc_resize_hpt_prepare(PowerPCCPU
*cpu
, target_ulong flags
, int shift
)
2744 CPUState
*cs
= CPU(cpu
);
2745 struct kvm_ppc_resize_hpt rhpt
= {
2750 if (!cap_resize_hpt
) {
2754 return kvm_vm_ioctl(cs
->kvm_state
, KVM_PPC_RESIZE_HPT_PREPARE
, &rhpt
);
2757 int kvmppc_resize_hpt_commit(PowerPCCPU
*cpu
, target_ulong flags
, int shift
)
2759 CPUState
*cs
= CPU(cpu
);
2760 struct kvm_ppc_resize_hpt rhpt
= {
2765 if (!cap_resize_hpt
) {
2769 return kvm_vm_ioctl(cs
->kvm_state
, KVM_PPC_RESIZE_HPT_COMMIT
, &rhpt
);
2773 * This is a helper function to detect a post migration scenario
2774 * in which a guest, running as KVM-HV, freezes in cpu_post_load because
2775 * the guest kernel can't handle a PVR value other than the actual host
2776 * PVR in KVM_SET_SREGS, even if pvr_match() returns true.
2778 * If we don't have cap_ppc_pvr_compat and we're not running in PR
2779 * (so, we're HV), return true. The workaround itself is done in
2782 * The order here is important: we'll only check for KVM PR as a
2783 * fallback if the guest kernel can't handle the situation itself.
2784 * We need to avoid as much as possible querying the running KVM type
2787 bool kvmppc_pvr_workaround_required(PowerPCCPU
*cpu
)
2789 CPUState
*cs
= CPU(cpu
);
2791 if (!kvm_enabled()) {
2795 if (cap_ppc_pvr_compat
) {
2799 return !kvmppc_is_pr(cs
->kvm_state
);
2802 void kvmppc_set_reg_ppc_online(PowerPCCPU
*cpu
, unsigned int online
)
2804 CPUState
*cs
= CPU(cpu
);
2806 if (kvm_enabled()) {
2807 kvm_set_one_reg(cs
, KVM_REG_PPC_ONLINE
, &online
);