2 * PowerPC implementation of KVM hooks
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
18 #include <sys/types.h>
19 #include <sys/ioctl.h>
23 #include <linux/kvm.h>
25 #include "qemu-common.h"
26 #include "qemu/timer.h"
27 #include "sysemu/sysemu.h"
28 #include "sysemu/kvm.h"
31 #include "sysemu/cpus.h"
32 #include "sysemu/device_tree.h"
33 #include "mmu-hash64.h"
35 #include "hw/sysbus.h"
36 #include "hw/ppc/spapr.h"
37 #include "hw/ppc/spapr_vio.h"
38 #include "hw/ppc/ppc.h"
39 #include "sysemu/watchdog.h"
45 #define DPRINTF(fmt, ...) \
46 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
48 #define DPRINTF(fmt, ...) \
52 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
54 const KVMCapabilityInfo kvm_arch_required_capabilities
[] = {
58 static int cap_interrupt_unset
= false;
59 static int cap_interrupt_level
= false;
60 static int cap_segstate
;
61 static int cap_booke_sregs
;
62 static int cap_ppc_smt
;
63 static int cap_ppc_rma
;
64 static int cap_spapr_tce
;
65 static int cap_spapr_multitce
;
66 static int cap_spapr_vfio
;
68 static int cap_one_reg
;
70 static int cap_ppc_watchdog
;
72 static int cap_htab_fd
;
73 static int cap_fixup_hcalls
;
75 /* XXX We have a race condition where we actually have a level triggered
76 * interrupt, but the infrastructure can't expose that yet, so the guest
77 * takes but ignores it, goes to sleep and never gets notified that there's
78 * still an interrupt pending.
80 * As a quick workaround, let's just wake up again 20 ms after we injected
81 * an interrupt. That way we can assure that we're always reinjecting
82 * interrupts in case the guest swallowed them.
84 static QEMUTimer
*idle_timer
;
86 static void kvm_kick_cpu(void *opaque
)
88 PowerPCCPU
*cpu
= opaque
;
90 qemu_cpu_kick(CPU(cpu
));
93 static int kvm_ppc_register_host_cpu_type(void);
95 int kvm_arch_init(KVMState
*s
)
97 cap_interrupt_unset
= kvm_check_extension(s
, KVM_CAP_PPC_UNSET_IRQ
);
98 cap_interrupt_level
= kvm_check_extension(s
, KVM_CAP_PPC_IRQ_LEVEL
);
99 cap_segstate
= kvm_check_extension(s
, KVM_CAP_PPC_SEGSTATE
);
100 cap_booke_sregs
= kvm_check_extension(s
, KVM_CAP_PPC_BOOKE_SREGS
);
101 cap_ppc_smt
= kvm_check_extension(s
, KVM_CAP_PPC_SMT
);
102 cap_ppc_rma
= kvm_check_extension(s
, KVM_CAP_PPC_RMA
);
103 cap_spapr_tce
= kvm_check_extension(s
, KVM_CAP_SPAPR_TCE
);
104 cap_spapr_multitce
= kvm_check_extension(s
, KVM_CAP_SPAPR_MULTITCE
);
105 cap_spapr_vfio
= false;
106 cap_one_reg
= kvm_check_extension(s
, KVM_CAP_ONE_REG
);
107 cap_hior
= kvm_check_extension(s
, KVM_CAP_PPC_HIOR
);
108 cap_epr
= kvm_check_extension(s
, KVM_CAP_PPC_EPR
);
109 cap_ppc_watchdog
= kvm_check_extension(s
, KVM_CAP_PPC_BOOKE_WATCHDOG
);
110 /* Note: we don't set cap_papr here, because this capability is
111 * only activated after this by kvmppc_set_papr() */
112 cap_htab_fd
= kvm_check_extension(s
, KVM_CAP_PPC_HTAB_FD
);
113 cap_fixup_hcalls
= kvm_check_extension(s
, KVM_CAP_PPC_FIXUP_HCALL
);
115 if (!cap_interrupt_level
) {
116 fprintf(stderr
, "KVM: Couldn't find level irq capability. Expect the "
117 "VM to stall at times!\n");
120 kvm_ppc_register_host_cpu_type();
125 static int kvm_arch_sync_sregs(PowerPCCPU
*cpu
)
127 CPUPPCState
*cenv
= &cpu
->env
;
128 CPUState
*cs
= CPU(cpu
);
129 struct kvm_sregs sregs
;
132 if (cenv
->excp_model
== POWERPC_EXCP_BOOKE
) {
133 /* What we're really trying to say is "if we're on BookE, we use
134 the native PVR for now". This is the only sane way to check
135 it though, so we potentially confuse users that they can run
136 BookE guests on BookS. Let's hope nobody dares enough :) */
140 fprintf(stderr
, "kvm error: missing PVR setting capability\n");
145 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_SREGS
, &sregs
);
150 sregs
.pvr
= cenv
->spr
[SPR_PVR
];
151 return kvm_vcpu_ioctl(cs
, KVM_SET_SREGS
, &sregs
);
154 /* Set up a shared TLB array with KVM */
155 static int kvm_booke206_tlb_init(PowerPCCPU
*cpu
)
157 CPUPPCState
*env
= &cpu
->env
;
158 CPUState
*cs
= CPU(cpu
);
159 struct kvm_book3e_206_tlb_params params
= {};
160 struct kvm_config_tlb cfg
= {};
161 unsigned int entries
= 0;
164 if (!kvm_enabled() ||
165 !kvm_check_extension(cs
->kvm_state
, KVM_CAP_SW_TLB
)) {
169 assert(ARRAY_SIZE(params
.tlb_sizes
) == BOOKE206_MAX_TLBN
);
171 for (i
= 0; i
< BOOKE206_MAX_TLBN
; i
++) {
172 params
.tlb_sizes
[i
] = booke206_tlb_size(env
, i
);
173 params
.tlb_ways
[i
] = booke206_tlb_ways(env
, i
);
174 entries
+= params
.tlb_sizes
[i
];
177 assert(entries
== env
->nb_tlb
);
178 assert(sizeof(struct kvm_book3e_206_tlb_entry
) == sizeof(ppcmas_tlb_t
));
180 env
->tlb_dirty
= true;
182 cfg
.array
= (uintptr_t)env
->tlb
.tlbm
;
183 cfg
.array_len
= sizeof(ppcmas_tlb_t
) * entries
;
184 cfg
.params
= (uintptr_t)¶ms
;
185 cfg
.mmu_type
= KVM_MMU_FSL_BOOKE_NOHV
;
187 ret
= kvm_vcpu_enable_cap(cs
, KVM_CAP_SW_TLB
, 0, (uintptr_t)&cfg
);
189 fprintf(stderr
, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
190 __func__
, strerror(-ret
));
194 env
->kvm_sw_tlb
= true;
199 #if defined(TARGET_PPC64)
200 static void kvm_get_fallback_smmu_info(PowerPCCPU
*cpu
,
201 struct kvm_ppc_smmu_info
*info
)
203 CPUPPCState
*env
= &cpu
->env
;
204 CPUState
*cs
= CPU(cpu
);
206 memset(info
, 0, sizeof(*info
));
208 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
209 * need to "guess" what the supported page sizes are.
211 * For that to work we make a few assumptions:
213 * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
214 * KVM which only supports 4K and 16M pages, but supports them
215 * regardless of the backing store characteritics. We also don't
216 * support 1T segments.
218 * This is safe as if HV KVM ever supports that capability or PR
219 * KVM grows supports for more page/segment sizes, those versions
220 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
221 * will not hit this fallback
223 * - Else we are running HV KVM. This means we only support page
224 * sizes that fit in the backing store. Additionally we only
225 * advertize 64K pages if the processor is ARCH 2.06 and we assume
226 * P7 encodings for the SLB and hash table. Here too, we assume
227 * support for any newer processor will mean a kernel that
228 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
231 if (kvm_check_extension(cs
->kvm_state
, KVM_CAP_PPC_GET_PVINFO
)) {
236 /* Standard 4k base page size segment */
237 info
->sps
[0].page_shift
= 12;
238 info
->sps
[0].slb_enc
= 0;
239 info
->sps
[0].enc
[0].page_shift
= 12;
240 info
->sps
[0].enc
[0].pte_enc
= 0;
242 /* Standard 16M large page size segment */
243 info
->sps
[1].page_shift
= 24;
244 info
->sps
[1].slb_enc
= SLB_VSID_L
;
245 info
->sps
[1].enc
[0].page_shift
= 24;
246 info
->sps
[1].enc
[0].pte_enc
= 0;
250 /* HV KVM has backing store size restrictions */
251 info
->flags
= KVM_PPC_PAGE_SIZES_REAL
;
253 if (env
->mmu_model
& POWERPC_MMU_1TSEG
) {
254 info
->flags
|= KVM_PPC_1T_SEGMENTS
;
257 if (env
->mmu_model
== POWERPC_MMU_2_06
) {
263 /* Standard 4k base page size segment */
264 info
->sps
[i
].page_shift
= 12;
265 info
->sps
[i
].slb_enc
= 0;
266 info
->sps
[i
].enc
[0].page_shift
= 12;
267 info
->sps
[i
].enc
[0].pte_enc
= 0;
270 /* 64K on MMU 2.06 */
271 if (env
->mmu_model
== POWERPC_MMU_2_06
) {
272 info
->sps
[i
].page_shift
= 16;
273 info
->sps
[i
].slb_enc
= 0x110;
274 info
->sps
[i
].enc
[0].page_shift
= 16;
275 info
->sps
[i
].enc
[0].pte_enc
= 1;
279 /* Standard 16M large page size segment */
280 info
->sps
[i
].page_shift
= 24;
281 info
->sps
[i
].slb_enc
= SLB_VSID_L
;
282 info
->sps
[i
].enc
[0].page_shift
= 24;
283 info
->sps
[i
].enc
[0].pte_enc
= 0;
287 static void kvm_get_smmu_info(PowerPCCPU
*cpu
, struct kvm_ppc_smmu_info
*info
)
289 CPUState
*cs
= CPU(cpu
);
292 if (kvm_check_extension(cs
->kvm_state
, KVM_CAP_PPC_GET_SMMU_INFO
)) {
293 ret
= kvm_vm_ioctl(cs
->kvm_state
, KVM_PPC_GET_SMMU_INFO
, info
);
299 kvm_get_fallback_smmu_info(cpu
, info
);
302 static long getrampagesize(void)
308 /* guest RAM is backed by normal anonymous pages */
309 return getpagesize();
313 ret
= statfs(mem_path
, &fs
);
314 } while (ret
!= 0 && errno
== EINTR
);
317 fprintf(stderr
, "Couldn't statfs() memory path: %s\n",
322 #define HUGETLBFS_MAGIC 0x958458f6
324 if (fs
.f_type
!= HUGETLBFS_MAGIC
) {
325 /* Explicit mempath, but it's ordinary pages */
326 return getpagesize();
329 /* It's hugepage, return the huge page size */
333 static bool kvm_valid_page_size(uint32_t flags
, long rampgsize
, uint32_t shift
)
335 if (!(flags
& KVM_PPC_PAGE_SIZES_REAL
)) {
339 return (1ul << shift
) <= rampgsize
;
342 static void kvm_fixup_page_sizes(PowerPCCPU
*cpu
)
344 static struct kvm_ppc_smmu_info smmu_info
;
345 static bool has_smmu_info
;
346 CPUPPCState
*env
= &cpu
->env
;
350 /* We only handle page sizes for 64-bit server guests for now */
351 if (!(env
->mmu_model
& POWERPC_MMU_64
)) {
355 /* Collect MMU info from kernel if not already */
356 if (!has_smmu_info
) {
357 kvm_get_smmu_info(cpu
, &smmu_info
);
358 has_smmu_info
= true;
361 rampagesize
= getrampagesize();
363 /* Convert to QEMU form */
364 memset(&env
->sps
, 0, sizeof(env
->sps
));
367 * XXX This loop should be an entry wide AND of the capabilities that
368 * the selected CPU has with the capabilities that KVM supports.
370 for (ik
= iq
= 0; ik
< KVM_PPC_PAGE_SIZES_MAX_SZ
; ik
++) {
371 struct ppc_one_seg_page_size
*qsps
= &env
->sps
.sps
[iq
];
372 struct kvm_ppc_one_seg_page_size
*ksps
= &smmu_info
.sps
[ik
];
374 if (!kvm_valid_page_size(smmu_info
.flags
, rampagesize
,
378 qsps
->page_shift
= ksps
->page_shift
;
379 qsps
->slb_enc
= ksps
->slb_enc
;
380 for (jk
= jq
= 0; jk
< KVM_PPC_PAGE_SIZES_MAX_SZ
; jk
++) {
381 if (!kvm_valid_page_size(smmu_info
.flags
, rampagesize
,
382 ksps
->enc
[jk
].page_shift
)) {
385 qsps
->enc
[jq
].page_shift
= ksps
->enc
[jk
].page_shift
;
386 qsps
->enc
[jq
].pte_enc
= ksps
->enc
[jk
].pte_enc
;
387 if (++jq
>= PPC_PAGE_SIZES_MAX_SZ
) {
391 if (++iq
>= PPC_PAGE_SIZES_MAX_SZ
) {
395 env
->slb_nr
= smmu_info
.slb_size
;
396 if (!(smmu_info
.flags
& KVM_PPC_1T_SEGMENTS
)) {
397 env
->mmu_model
&= ~POWERPC_MMU_1TSEG
;
400 #else /* defined (TARGET_PPC64) */
402 static inline void kvm_fixup_page_sizes(PowerPCCPU
*cpu
)
406 #endif /* !defined (TARGET_PPC64) */
408 unsigned long kvm_arch_vcpu_id(CPUState
*cpu
)
410 return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu
));
413 int kvm_arch_init_vcpu(CPUState
*cs
)
415 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
416 CPUPPCState
*cenv
= &cpu
->env
;
419 /* Gather server mmu info from KVM and update the CPU state */
420 kvm_fixup_page_sizes(cpu
);
422 /* Synchronize sregs with kvm */
423 ret
= kvm_arch_sync_sregs(cpu
);
428 idle_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL
, kvm_kick_cpu
, cpu
);
430 /* Some targets support access to KVM's guest TLB. */
431 switch (cenv
->mmu_model
) {
432 case POWERPC_MMU_BOOKE206
:
433 ret
= kvm_booke206_tlb_init(cpu
);
442 static void kvm_sw_tlb_put(PowerPCCPU
*cpu
)
444 CPUPPCState
*env
= &cpu
->env
;
445 CPUState
*cs
= CPU(cpu
);
446 struct kvm_dirty_tlb dirty_tlb
;
447 unsigned char *bitmap
;
450 if (!env
->kvm_sw_tlb
) {
454 bitmap
= g_malloc((env
->nb_tlb
+ 7) / 8);
455 memset(bitmap
, 0xFF, (env
->nb_tlb
+ 7) / 8);
457 dirty_tlb
.bitmap
= (uintptr_t)bitmap
;
458 dirty_tlb
.num_dirty
= env
->nb_tlb
;
460 ret
= kvm_vcpu_ioctl(cs
, KVM_DIRTY_TLB
, &dirty_tlb
);
462 fprintf(stderr
, "%s: KVM_DIRTY_TLB: %s\n",
463 __func__
, strerror(-ret
));
469 static void kvm_get_one_spr(CPUState
*cs
, uint64_t id
, int spr
)
471 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
472 CPUPPCState
*env
= &cpu
->env
;
477 struct kvm_one_reg reg
= {
479 .addr
= (uintptr_t) &val
,
483 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
485 trace_kvm_failed_spr_get(spr
, strerror(errno
));
487 switch (id
& KVM_REG_SIZE_MASK
) {
488 case KVM_REG_SIZE_U32
:
489 env
->spr
[spr
] = val
.u32
;
492 case KVM_REG_SIZE_U64
:
493 env
->spr
[spr
] = val
.u64
;
497 /* Don't handle this size yet */
503 static void kvm_put_one_spr(CPUState
*cs
, uint64_t id
, int spr
)
505 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
506 CPUPPCState
*env
= &cpu
->env
;
511 struct kvm_one_reg reg
= {
513 .addr
= (uintptr_t) &val
,
517 switch (id
& KVM_REG_SIZE_MASK
) {
518 case KVM_REG_SIZE_U32
:
519 val
.u32
= env
->spr
[spr
];
522 case KVM_REG_SIZE_U64
:
523 val
.u64
= env
->spr
[spr
];
527 /* Don't handle this size yet */
531 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
533 trace_kvm_failed_spr_set(spr
, strerror(errno
));
537 static int kvm_put_fp(CPUState
*cs
)
539 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
540 CPUPPCState
*env
= &cpu
->env
;
541 struct kvm_one_reg reg
;
545 if (env
->insns_flags
& PPC_FLOAT
) {
546 uint64_t fpscr
= env
->fpscr
;
547 bool vsx
= !!(env
->insns_flags2
& PPC2_VSX
);
549 reg
.id
= KVM_REG_PPC_FPSCR
;
550 reg
.addr
= (uintptr_t)&fpscr
;
551 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
553 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno
));
557 for (i
= 0; i
< 32; i
++) {
560 vsr
[0] = float64_val(env
->fpr
[i
]);
561 vsr
[1] = env
->vsr
[i
];
562 reg
.addr
= (uintptr_t) &vsr
;
563 reg
.id
= vsx
? KVM_REG_PPC_VSR(i
) : KVM_REG_PPC_FPR(i
);
565 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
567 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx
? "VSR" : "FPR",
574 if (env
->insns_flags
& PPC_ALTIVEC
) {
575 reg
.id
= KVM_REG_PPC_VSCR
;
576 reg
.addr
= (uintptr_t)&env
->vscr
;
577 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
579 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno
));
583 for (i
= 0; i
< 32; i
++) {
584 reg
.id
= KVM_REG_PPC_VR(i
);
585 reg
.addr
= (uintptr_t)&env
->avr
[i
];
586 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
588 DPRINTF("Unable to set VR%d to KVM: %s\n", i
, strerror(errno
));
597 static int kvm_get_fp(CPUState
*cs
)
599 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
600 CPUPPCState
*env
= &cpu
->env
;
601 struct kvm_one_reg reg
;
605 if (env
->insns_flags
& PPC_FLOAT
) {
607 bool vsx
= !!(env
->insns_flags2
& PPC2_VSX
);
609 reg
.id
= KVM_REG_PPC_FPSCR
;
610 reg
.addr
= (uintptr_t)&fpscr
;
611 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
613 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno
));
619 for (i
= 0; i
< 32; i
++) {
622 reg
.addr
= (uintptr_t) &vsr
;
623 reg
.id
= vsx
? KVM_REG_PPC_VSR(i
) : KVM_REG_PPC_FPR(i
);
625 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
627 DPRINTF("Unable to get %s%d from KVM: %s\n",
628 vsx
? "VSR" : "FPR", i
, strerror(errno
));
631 env
->fpr
[i
] = vsr
[0];
633 env
->vsr
[i
] = vsr
[1];
639 if (env
->insns_flags
& PPC_ALTIVEC
) {
640 reg
.id
= KVM_REG_PPC_VSCR
;
641 reg
.addr
= (uintptr_t)&env
->vscr
;
642 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
644 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno
));
648 for (i
= 0; i
< 32; i
++) {
649 reg
.id
= KVM_REG_PPC_VR(i
);
650 reg
.addr
= (uintptr_t)&env
->avr
[i
];
651 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
653 DPRINTF("Unable to get VR%d from KVM: %s\n",
663 #if defined(TARGET_PPC64)
664 static int kvm_get_vpa(CPUState
*cs
)
666 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
667 CPUPPCState
*env
= &cpu
->env
;
668 struct kvm_one_reg reg
;
671 reg
.id
= KVM_REG_PPC_VPA_ADDR
;
672 reg
.addr
= (uintptr_t)&env
->vpa_addr
;
673 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
675 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno
));
679 assert((uintptr_t)&env
->slb_shadow_size
680 == ((uintptr_t)&env
->slb_shadow_addr
+ 8));
681 reg
.id
= KVM_REG_PPC_VPA_SLB
;
682 reg
.addr
= (uintptr_t)&env
->slb_shadow_addr
;
683 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
685 DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
690 assert((uintptr_t)&env
->dtl_size
== ((uintptr_t)&env
->dtl_addr
+ 8));
691 reg
.id
= KVM_REG_PPC_VPA_DTL
;
692 reg
.addr
= (uintptr_t)&env
->dtl_addr
;
693 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
695 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
703 static int kvm_put_vpa(CPUState
*cs
)
705 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
706 CPUPPCState
*env
= &cpu
->env
;
707 struct kvm_one_reg reg
;
710 /* SLB shadow or DTL can't be registered unless a master VPA is
711 * registered. That means when restoring state, if a VPA *is*
712 * registered, we need to set that up first. If not, we need to
713 * deregister the others before deregistering the master VPA */
714 assert(env
->vpa_addr
|| !(env
->slb_shadow_addr
|| env
->dtl_addr
));
717 reg
.id
= KVM_REG_PPC_VPA_ADDR
;
718 reg
.addr
= (uintptr_t)&env
->vpa_addr
;
719 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
721 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno
));
726 assert((uintptr_t)&env
->slb_shadow_size
727 == ((uintptr_t)&env
->slb_shadow_addr
+ 8));
728 reg
.id
= KVM_REG_PPC_VPA_SLB
;
729 reg
.addr
= (uintptr_t)&env
->slb_shadow_addr
;
730 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
732 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno
));
736 assert((uintptr_t)&env
->dtl_size
== ((uintptr_t)&env
->dtl_addr
+ 8));
737 reg
.id
= KVM_REG_PPC_VPA_DTL
;
738 reg
.addr
= (uintptr_t)&env
->dtl_addr
;
739 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
741 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
746 if (!env
->vpa_addr
) {
747 reg
.id
= KVM_REG_PPC_VPA_ADDR
;
748 reg
.addr
= (uintptr_t)&env
->vpa_addr
;
749 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
751 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno
));
758 #endif /* TARGET_PPC64 */
760 int kvm_arch_put_registers(CPUState
*cs
, int level
)
762 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
763 CPUPPCState
*env
= &cpu
->env
;
764 struct kvm_regs regs
;
768 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_REGS
, ®s
);
775 regs
.xer
= cpu_read_xer(env
);
779 regs
.srr0
= env
->spr
[SPR_SRR0
];
780 regs
.srr1
= env
->spr
[SPR_SRR1
];
782 regs
.sprg0
= env
->spr
[SPR_SPRG0
];
783 regs
.sprg1
= env
->spr
[SPR_SPRG1
];
784 regs
.sprg2
= env
->spr
[SPR_SPRG2
];
785 regs
.sprg3
= env
->spr
[SPR_SPRG3
];
786 regs
.sprg4
= env
->spr
[SPR_SPRG4
];
787 regs
.sprg5
= env
->spr
[SPR_SPRG5
];
788 regs
.sprg6
= env
->spr
[SPR_SPRG6
];
789 regs
.sprg7
= env
->spr
[SPR_SPRG7
];
791 regs
.pid
= env
->spr
[SPR_BOOKE_PID
];
793 for (i
= 0;i
< 32; i
++)
794 regs
.gpr
[i
] = env
->gpr
[i
];
797 for (i
= 0; i
< 8; i
++) {
798 regs
.cr
|= (env
->crf
[i
] & 15) << (4 * (7 - i
));
801 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_REGS
, ®s
);
807 if (env
->tlb_dirty
) {
809 env
->tlb_dirty
= false;
812 if (cap_segstate
&& (level
>= KVM_PUT_RESET_STATE
)) {
813 struct kvm_sregs sregs
;
815 sregs
.pvr
= env
->spr
[SPR_PVR
];
817 sregs
.u
.s
.sdr1
= env
->spr
[SPR_SDR1
];
821 for (i
= 0; i
< ARRAY_SIZE(env
->slb
); i
++) {
822 sregs
.u
.s
.ppc64
.slb
[i
].slbe
= env
->slb
[i
].esid
;
823 if (env
->slb
[i
].esid
& SLB_ESID_V
) {
824 sregs
.u
.s
.ppc64
.slb
[i
].slbe
|= i
;
826 sregs
.u
.s
.ppc64
.slb
[i
].slbv
= env
->slb
[i
].vsid
;
831 for (i
= 0; i
< 16; i
++) {
832 sregs
.u
.s
.ppc32
.sr
[i
] = env
->sr
[i
];
836 for (i
= 0; i
< 8; i
++) {
837 /* Beware. We have to swap upper and lower bits here */
838 sregs
.u
.s
.ppc32
.dbat
[i
] = ((uint64_t)env
->DBAT
[0][i
] << 32)
840 sregs
.u
.s
.ppc32
.ibat
[i
] = ((uint64_t)env
->IBAT
[0][i
] << 32)
844 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_SREGS
, &sregs
);
850 if (cap_hior
&& (level
>= KVM_PUT_RESET_STATE
)) {
851 kvm_put_one_spr(cs
, KVM_REG_PPC_HIOR
, SPR_HIOR
);
857 /* We deliberately ignore errors here, for kernels which have
858 * the ONE_REG calls, but don't support the specific
859 * registers, there's a reasonable chance things will still
860 * work, at least until we try to migrate. */
861 for (i
= 0; i
< 1024; i
++) {
862 uint64_t id
= env
->spr_cb
[i
].one_reg_id
;
865 kvm_put_one_spr(cs
, id
, i
);
871 for (i
= 0; i
< ARRAY_SIZE(env
->tm_gpr
); i
++) {
872 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_GPR(i
), &env
->tm_gpr
[i
]);
874 for (i
= 0; i
< ARRAY_SIZE(env
->tm_vsr
); i
++) {
875 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_VSR(i
), &env
->tm_vsr
[i
]);
877 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_CR
, &env
->tm_cr
);
878 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_LR
, &env
->tm_lr
);
879 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_CTR
, &env
->tm_ctr
);
880 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_FPSCR
, &env
->tm_fpscr
);
881 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_AMR
, &env
->tm_amr
);
882 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_PPR
, &env
->tm_ppr
);
883 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_VRSAVE
, &env
->tm_vrsave
);
884 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_VSCR
, &env
->tm_vscr
);
885 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_DSCR
, &env
->tm_dscr
);
886 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_TAR
, &env
->tm_tar
);
890 if (kvm_put_vpa(cs
) < 0) {
891 DPRINTF("Warning: Unable to set VPA information to KVM\n");
895 kvm_set_one_reg(cs
, KVM_REG_PPC_TB_OFFSET
, &env
->tb_env
->tb_offset
);
896 #endif /* TARGET_PPC64 */
902 int kvm_arch_get_registers(CPUState
*cs
)
904 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
905 CPUPPCState
*env
= &cpu
->env
;
906 struct kvm_regs regs
;
907 struct kvm_sregs sregs
;
911 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_REGS
, ®s
);
916 for (i
= 7; i
>= 0; i
--) {
917 env
->crf
[i
] = cr
& 15;
923 cpu_write_xer(env
, regs
.xer
);
927 env
->spr
[SPR_SRR0
] = regs
.srr0
;
928 env
->spr
[SPR_SRR1
] = regs
.srr1
;
930 env
->spr
[SPR_SPRG0
] = regs
.sprg0
;
931 env
->spr
[SPR_SPRG1
] = regs
.sprg1
;
932 env
->spr
[SPR_SPRG2
] = regs
.sprg2
;
933 env
->spr
[SPR_SPRG3
] = regs
.sprg3
;
934 env
->spr
[SPR_SPRG4
] = regs
.sprg4
;
935 env
->spr
[SPR_SPRG5
] = regs
.sprg5
;
936 env
->spr
[SPR_SPRG6
] = regs
.sprg6
;
937 env
->spr
[SPR_SPRG7
] = regs
.sprg7
;
939 env
->spr
[SPR_BOOKE_PID
] = regs
.pid
;
941 for (i
= 0;i
< 32; i
++)
942 env
->gpr
[i
] = regs
.gpr
[i
];
946 if (cap_booke_sregs
) {
947 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_SREGS
, &sregs
);
952 if (sregs
.u
.e
.features
& KVM_SREGS_E_BASE
) {
953 env
->spr
[SPR_BOOKE_CSRR0
] = sregs
.u
.e
.csrr0
;
954 env
->spr
[SPR_BOOKE_CSRR1
] = sregs
.u
.e
.csrr1
;
955 env
->spr
[SPR_BOOKE_ESR
] = sregs
.u
.e
.esr
;
956 env
->spr
[SPR_BOOKE_DEAR
] = sregs
.u
.e
.dear
;
957 env
->spr
[SPR_BOOKE_MCSR
] = sregs
.u
.e
.mcsr
;
958 env
->spr
[SPR_BOOKE_TSR
] = sregs
.u
.e
.tsr
;
959 env
->spr
[SPR_BOOKE_TCR
] = sregs
.u
.e
.tcr
;
960 env
->spr
[SPR_DECR
] = sregs
.u
.e
.dec
;
961 env
->spr
[SPR_TBL
] = sregs
.u
.e
.tb
& 0xffffffff;
962 env
->spr
[SPR_TBU
] = sregs
.u
.e
.tb
>> 32;
963 env
->spr
[SPR_VRSAVE
] = sregs
.u
.e
.vrsave
;
966 if (sregs
.u
.e
.features
& KVM_SREGS_E_ARCH206
) {
967 env
->spr
[SPR_BOOKE_PIR
] = sregs
.u
.e
.pir
;
968 env
->spr
[SPR_BOOKE_MCSRR0
] = sregs
.u
.e
.mcsrr0
;
969 env
->spr
[SPR_BOOKE_MCSRR1
] = sregs
.u
.e
.mcsrr1
;
970 env
->spr
[SPR_BOOKE_DECAR
] = sregs
.u
.e
.decar
;
971 env
->spr
[SPR_BOOKE_IVPR
] = sregs
.u
.e
.ivpr
;
974 if (sregs
.u
.e
.features
& KVM_SREGS_E_64
) {
975 env
->spr
[SPR_BOOKE_EPCR
] = sregs
.u
.e
.epcr
;
978 if (sregs
.u
.e
.features
& KVM_SREGS_E_SPRG8
) {
979 env
->spr
[SPR_BOOKE_SPRG8
] = sregs
.u
.e
.sprg8
;
982 if (sregs
.u
.e
.features
& KVM_SREGS_E_IVOR
) {
983 env
->spr
[SPR_BOOKE_IVOR0
] = sregs
.u
.e
.ivor_low
[0];
984 env
->spr
[SPR_BOOKE_IVOR1
] = sregs
.u
.e
.ivor_low
[1];
985 env
->spr
[SPR_BOOKE_IVOR2
] = sregs
.u
.e
.ivor_low
[2];
986 env
->spr
[SPR_BOOKE_IVOR3
] = sregs
.u
.e
.ivor_low
[3];
987 env
->spr
[SPR_BOOKE_IVOR4
] = sregs
.u
.e
.ivor_low
[4];
988 env
->spr
[SPR_BOOKE_IVOR5
] = sregs
.u
.e
.ivor_low
[5];
989 env
->spr
[SPR_BOOKE_IVOR6
] = sregs
.u
.e
.ivor_low
[6];
990 env
->spr
[SPR_BOOKE_IVOR7
] = sregs
.u
.e
.ivor_low
[7];
991 env
->spr
[SPR_BOOKE_IVOR8
] = sregs
.u
.e
.ivor_low
[8];
992 env
->spr
[SPR_BOOKE_IVOR9
] = sregs
.u
.e
.ivor_low
[9];
993 env
->spr
[SPR_BOOKE_IVOR10
] = sregs
.u
.e
.ivor_low
[10];
994 env
->spr
[SPR_BOOKE_IVOR11
] = sregs
.u
.e
.ivor_low
[11];
995 env
->spr
[SPR_BOOKE_IVOR12
] = sregs
.u
.e
.ivor_low
[12];
996 env
->spr
[SPR_BOOKE_IVOR13
] = sregs
.u
.e
.ivor_low
[13];
997 env
->spr
[SPR_BOOKE_IVOR14
] = sregs
.u
.e
.ivor_low
[14];
998 env
->spr
[SPR_BOOKE_IVOR15
] = sregs
.u
.e
.ivor_low
[15];
1000 if (sregs
.u
.e
.features
& KVM_SREGS_E_SPE
) {
1001 env
->spr
[SPR_BOOKE_IVOR32
] = sregs
.u
.e
.ivor_high
[0];
1002 env
->spr
[SPR_BOOKE_IVOR33
] = sregs
.u
.e
.ivor_high
[1];
1003 env
->spr
[SPR_BOOKE_IVOR34
] = sregs
.u
.e
.ivor_high
[2];
1006 if (sregs
.u
.e
.features
& KVM_SREGS_E_PM
) {
1007 env
->spr
[SPR_BOOKE_IVOR35
] = sregs
.u
.e
.ivor_high
[3];
1010 if (sregs
.u
.e
.features
& KVM_SREGS_E_PC
) {
1011 env
->spr
[SPR_BOOKE_IVOR36
] = sregs
.u
.e
.ivor_high
[4];
1012 env
->spr
[SPR_BOOKE_IVOR37
] = sregs
.u
.e
.ivor_high
[5];
1016 if (sregs
.u
.e
.features
& KVM_SREGS_E_ARCH206_MMU
) {
1017 env
->spr
[SPR_BOOKE_MAS0
] = sregs
.u
.e
.mas0
;
1018 env
->spr
[SPR_BOOKE_MAS1
] = sregs
.u
.e
.mas1
;
1019 env
->spr
[SPR_BOOKE_MAS2
] = sregs
.u
.e
.mas2
;
1020 env
->spr
[SPR_BOOKE_MAS3
] = sregs
.u
.e
.mas7_3
& 0xffffffff;
1021 env
->spr
[SPR_BOOKE_MAS4
] = sregs
.u
.e
.mas4
;
1022 env
->spr
[SPR_BOOKE_MAS6
] = sregs
.u
.e
.mas6
;
1023 env
->spr
[SPR_BOOKE_MAS7
] = sregs
.u
.e
.mas7_3
>> 32;
1024 env
->spr
[SPR_MMUCFG
] = sregs
.u
.e
.mmucfg
;
1025 env
->spr
[SPR_BOOKE_TLB0CFG
] = sregs
.u
.e
.tlbcfg
[0];
1026 env
->spr
[SPR_BOOKE_TLB1CFG
] = sregs
.u
.e
.tlbcfg
[1];
1029 if (sregs
.u
.e
.features
& KVM_SREGS_EXP
) {
1030 env
->spr
[SPR_BOOKE_EPR
] = sregs
.u
.e
.epr
;
1033 if (sregs
.u
.e
.features
& KVM_SREGS_E_PD
) {
1034 env
->spr
[SPR_BOOKE_EPLC
] = sregs
.u
.e
.eplc
;
1035 env
->spr
[SPR_BOOKE_EPSC
] = sregs
.u
.e
.epsc
;
1038 if (sregs
.u
.e
.impl_id
== KVM_SREGS_E_IMPL_FSL
) {
1039 env
->spr
[SPR_E500_SVR
] = sregs
.u
.e
.impl
.fsl
.svr
;
1040 env
->spr
[SPR_Exxx_MCAR
] = sregs
.u
.e
.impl
.fsl
.mcar
;
1041 env
->spr
[SPR_HID0
] = sregs
.u
.e
.impl
.fsl
.hid0
;
1043 if (sregs
.u
.e
.impl
.fsl
.features
& KVM_SREGS_E_FSL_PIDn
) {
1044 env
->spr
[SPR_BOOKE_PID1
] = sregs
.u
.e
.impl
.fsl
.pid1
;
1045 env
->spr
[SPR_BOOKE_PID2
] = sregs
.u
.e
.impl
.fsl
.pid2
;
1051 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_SREGS
, &sregs
);
1056 if (!env
->external_htab
) {
1057 ppc_store_sdr1(env
, sregs
.u
.s
.sdr1
);
1063 * The packed SLB array we get from KVM_GET_SREGS only contains
1064 * information about valid entries. So we flush our internal
1065 * copy to get rid of stale ones, then put all valid SLB entries
1068 memset(env
->slb
, 0, sizeof(env
->slb
));
1069 for (i
= 0; i
< ARRAY_SIZE(env
->slb
); i
++) {
1070 target_ulong rb
= sregs
.u
.s
.ppc64
.slb
[i
].slbe
;
1071 target_ulong rs
= sregs
.u
.s
.ppc64
.slb
[i
].slbv
;
1073 * Only restore valid entries
1075 if (rb
& SLB_ESID_V
) {
1076 ppc_store_slb(env
, rb
, rs
);
1082 for (i
= 0; i
< 16; i
++) {
1083 env
->sr
[i
] = sregs
.u
.s
.ppc32
.sr
[i
];
1087 for (i
= 0; i
< 8; i
++) {
1088 env
->DBAT
[0][i
] = sregs
.u
.s
.ppc32
.dbat
[i
] & 0xffffffff;
1089 env
->DBAT
[1][i
] = sregs
.u
.s
.ppc32
.dbat
[i
] >> 32;
1090 env
->IBAT
[0][i
] = sregs
.u
.s
.ppc32
.ibat
[i
] & 0xffffffff;
1091 env
->IBAT
[1][i
] = sregs
.u
.s
.ppc32
.ibat
[i
] >> 32;
1096 kvm_get_one_spr(cs
, KVM_REG_PPC_HIOR
, SPR_HIOR
);
1102 /* We deliberately ignore errors here, for kernels which have
1103 * the ONE_REG calls, but don't support the specific
1104 * registers, there's a reasonable chance things will still
1105 * work, at least until we try to migrate. */
1106 for (i
= 0; i
< 1024; i
++) {
1107 uint64_t id
= env
->spr_cb
[i
].one_reg_id
;
1110 kvm_get_one_spr(cs
, id
, i
);
1116 for (i
= 0; i
< ARRAY_SIZE(env
->tm_gpr
); i
++) {
1117 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_GPR(i
), &env
->tm_gpr
[i
]);
1119 for (i
= 0; i
< ARRAY_SIZE(env
->tm_vsr
); i
++) {
1120 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_VSR(i
), &env
->tm_vsr
[i
]);
1122 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_CR
, &env
->tm_cr
);
1123 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_LR
, &env
->tm_lr
);
1124 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_CTR
, &env
->tm_ctr
);
1125 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_FPSCR
, &env
->tm_fpscr
);
1126 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_AMR
, &env
->tm_amr
);
1127 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_PPR
, &env
->tm_ppr
);
1128 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_VRSAVE
, &env
->tm_vrsave
);
1129 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_VSCR
, &env
->tm_vscr
);
1130 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_DSCR
, &env
->tm_dscr
);
1131 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_TAR
, &env
->tm_tar
);
1135 if (kvm_get_vpa(cs
) < 0) {
1136 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1140 kvm_get_one_reg(cs
, KVM_REG_PPC_TB_OFFSET
, &env
->tb_env
->tb_offset
);
1147 int kvmppc_set_interrupt(PowerPCCPU
*cpu
, int irq
, int level
)
1149 unsigned virq
= level
? KVM_INTERRUPT_SET_LEVEL
: KVM_INTERRUPT_UNSET
;
1151 if (irq
!= PPC_INTERRUPT_EXT
) {
1155 if (!kvm_enabled() || !cap_interrupt_unset
|| !cap_interrupt_level
) {
1159 kvm_vcpu_ioctl(CPU(cpu
), KVM_INTERRUPT
, &virq
);
1164 #if defined(TARGET_PPCEMB)
1165 #define PPC_INPUT_INT PPC40x_INPUT_INT
1166 #elif defined(TARGET_PPC64)
1167 #define PPC_INPUT_INT PPC970_INPUT_INT
1169 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1172 void kvm_arch_pre_run(CPUState
*cs
, struct kvm_run
*run
)
1174 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
1175 CPUPPCState
*env
= &cpu
->env
;
1179 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1180 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1181 if (!cap_interrupt_level
&&
1182 run
->ready_for_interrupt_injection
&&
1183 (cs
->interrupt_request
& CPU_INTERRUPT_HARD
) &&
1184 (env
->irq_input_state
& (1<<PPC_INPUT_INT
)))
1186 /* For now KVM disregards the 'irq' argument. However, in the
1187 * future KVM could cache it in-kernel to avoid a heavyweight exit
1188 * when reading the UIC.
1190 irq
= KVM_INTERRUPT_SET
;
1192 DPRINTF("injected interrupt %d\n", irq
);
1193 r
= kvm_vcpu_ioctl(cs
, KVM_INTERRUPT
, &irq
);
1195 printf("cpu %d fail inject %x\n", cs
->cpu_index
, irq
);
1198 /* Always wake up soon in case the interrupt was level based */
1199 timer_mod(idle_timer
, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) +
1200 (get_ticks_per_sec() / 50));
1203 /* We don't know if there are more interrupts pending after this. However,
1204 * the guest will return to userspace in the course of handling this one
1205 * anyways, so we will get a chance to deliver the rest. */
1208 void kvm_arch_post_run(CPUState
*cpu
, struct kvm_run
*run
)
1212 int kvm_arch_process_async_events(CPUState
*cs
)
1217 static int kvmppc_handle_halt(PowerPCCPU
*cpu
)
1219 CPUState
*cs
= CPU(cpu
);
1220 CPUPPCState
*env
= &cpu
->env
;
1222 if (!(cs
->interrupt_request
& CPU_INTERRUPT_HARD
) && (msr_ee
)) {
1224 cs
->exception_index
= EXCP_HLT
;
1230 /* map dcr access to existing qemu dcr emulation */
1231 static int kvmppc_handle_dcr_read(CPUPPCState
*env
, uint32_t dcrn
, uint32_t *data
)
1233 if (ppc_dcr_read(env
->dcr_env
, dcrn
, data
) < 0)
1234 fprintf(stderr
, "Read to unhandled DCR (0x%x)\n", dcrn
);
1239 static int kvmppc_handle_dcr_write(CPUPPCState
*env
, uint32_t dcrn
, uint32_t data
)
1241 if (ppc_dcr_write(env
->dcr_env
, dcrn
, data
) < 0)
1242 fprintf(stderr
, "Write to unhandled DCR (0x%x)\n", dcrn
);
1247 int kvm_arch_handle_exit(CPUState
*cs
, struct kvm_run
*run
)
1249 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
1250 CPUPPCState
*env
= &cpu
->env
;
1253 switch (run
->exit_reason
) {
1255 if (run
->dcr
.is_write
) {
1256 DPRINTF("handle dcr write\n");
1257 ret
= kvmppc_handle_dcr_write(env
, run
->dcr
.dcrn
, run
->dcr
.data
);
1259 DPRINTF("handle dcr read\n");
1260 ret
= kvmppc_handle_dcr_read(env
, run
->dcr
.dcrn
, &run
->dcr
.data
);
1264 DPRINTF("handle halt\n");
1265 ret
= kvmppc_handle_halt(cpu
);
1267 #if defined(TARGET_PPC64)
1268 case KVM_EXIT_PAPR_HCALL
:
1269 DPRINTF("handle PAPR hypercall\n");
1270 run
->papr_hcall
.ret
= spapr_hypercall(cpu
,
1272 run
->papr_hcall
.args
);
1277 DPRINTF("handle epr\n");
1278 run
->epr
.epr
= ldl_phys(cs
->as
, env
->mpic_iack
);
1281 case KVM_EXIT_WATCHDOG
:
1282 DPRINTF("handle watchdog expiry\n");
1283 watchdog_perform_action();
1288 fprintf(stderr
, "KVM: unknown exit reason %d\n", run
->exit_reason
);
1296 int kvmppc_or_tsr_bits(PowerPCCPU
*cpu
, uint32_t tsr_bits
)
1298 CPUState
*cs
= CPU(cpu
);
1299 uint32_t bits
= tsr_bits
;
1300 struct kvm_one_reg reg
= {
1301 .id
= KVM_REG_PPC_OR_TSR
,
1302 .addr
= (uintptr_t) &bits
,
1305 return kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
1308 int kvmppc_clear_tsr_bits(PowerPCCPU
*cpu
, uint32_t tsr_bits
)
1311 CPUState
*cs
= CPU(cpu
);
1312 uint32_t bits
= tsr_bits
;
1313 struct kvm_one_reg reg
= {
1314 .id
= KVM_REG_PPC_CLEAR_TSR
,
1315 .addr
= (uintptr_t) &bits
,
1318 return kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
1321 int kvmppc_set_tcr(PowerPCCPU
*cpu
)
1323 CPUState
*cs
= CPU(cpu
);
1324 CPUPPCState
*env
= &cpu
->env
;
1325 uint32_t tcr
= env
->spr
[SPR_BOOKE_TCR
];
1327 struct kvm_one_reg reg
= {
1328 .id
= KVM_REG_PPC_TCR
,
1329 .addr
= (uintptr_t) &tcr
,
1332 return kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
1335 int kvmppc_booke_watchdog_enable(PowerPCCPU
*cpu
)
1337 CPUState
*cs
= CPU(cpu
);
1340 if (!kvm_enabled()) {
1344 if (!cap_ppc_watchdog
) {
1345 printf("warning: KVM does not support watchdog");
1349 ret
= kvm_vcpu_enable_cap(cs
, KVM_CAP_PPC_BOOKE_WATCHDOG
, 0);
1351 fprintf(stderr
, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1352 __func__
, strerror(-ret
));
1359 static int read_cpuinfo(const char *field
, char *value
, int len
)
1363 int field_len
= strlen(field
);
1366 f
= fopen("/proc/cpuinfo", "r");
1372 if(!fgets(line
, sizeof(line
), f
)) {
1375 if (!strncmp(line
, field
, field_len
)) {
1376 pstrcpy(value
, len
, line
);
1387 uint32_t kvmppc_get_tbfreq(void)
1391 uint32_t retval
= get_ticks_per_sec();
1393 if (read_cpuinfo("timebase", line
, sizeof(line
))) {
1397 if (!(ns
= strchr(line
, ':'))) {
1407 /* Try to find a device tree node for a CPU with clock-frequency property */
1408 static int kvmppc_find_cpu_dt(char *buf
, int buf_len
)
1410 struct dirent
*dirp
;
1413 if ((dp
= opendir(PROC_DEVTREE_CPU
)) == NULL
) {
1414 printf("Can't open directory " PROC_DEVTREE_CPU
"\n");
1419 while ((dirp
= readdir(dp
)) != NULL
) {
1421 snprintf(buf
, buf_len
, "%s%s/clock-frequency", PROC_DEVTREE_CPU
,
1423 f
= fopen(buf
, "r");
1425 snprintf(buf
, buf_len
, "%s%s", PROC_DEVTREE_CPU
, dirp
->d_name
);
1432 if (buf
[0] == '\0') {
1433 printf("Unknown host!\n");
1440 /* Read a CPU node property from the host device tree that's a single
1441 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1442 * (can't find or open the property, or doesn't understand the
1444 static uint64_t kvmppc_read_int_cpu_dt(const char *propname
)
1454 if (kvmppc_find_cpu_dt(buf
, sizeof(buf
))) {
1458 strncat(buf
, "/", sizeof(buf
) - strlen(buf
));
1459 strncat(buf
, propname
, sizeof(buf
) - strlen(buf
));
1461 f
= fopen(buf
, "rb");
1466 len
= fread(&u
, 1, sizeof(u
), f
);
1470 /* property is a 32-bit quantity */
1471 return be32_to_cpu(u
.v32
);
1473 return be64_to_cpu(u
.v64
);
1479 uint64_t kvmppc_get_clockfreq(void)
1481 return kvmppc_read_int_cpu_dt("clock-frequency");
1484 uint32_t kvmppc_get_vmx(void)
1486 return kvmppc_read_int_cpu_dt("ibm,vmx");
1489 uint32_t kvmppc_get_dfp(void)
1491 return kvmppc_read_int_cpu_dt("ibm,dfp");
1494 static int kvmppc_get_pvinfo(CPUPPCState
*env
, struct kvm_ppc_pvinfo
*pvinfo
)
1496 PowerPCCPU
*cpu
= ppc_env_get_cpu(env
);
1497 CPUState
*cs
= CPU(cpu
);
1499 if (kvm_check_extension(cs
->kvm_state
, KVM_CAP_PPC_GET_PVINFO
) &&
1500 !kvm_vm_ioctl(cs
->kvm_state
, KVM_PPC_GET_PVINFO
, pvinfo
)) {
1507 int kvmppc_get_hasidle(CPUPPCState
*env
)
1509 struct kvm_ppc_pvinfo pvinfo
;
1511 if (!kvmppc_get_pvinfo(env
, &pvinfo
) &&
1512 (pvinfo
.flags
& KVM_PPC_PVINFO_FLAGS_EV_IDLE
)) {
1519 int kvmppc_get_hypercall(CPUPPCState
*env
, uint8_t *buf
, int buf_len
)
1521 uint32_t *hc
= (uint32_t*)buf
;
1522 struct kvm_ppc_pvinfo pvinfo
;
1524 if (!kvmppc_get_pvinfo(env
, &pvinfo
)) {
1525 memcpy(buf
, pvinfo
.hcall
, buf_len
);
1530 * Fallback to always fail hypercalls regardless of endianness:
1532 * tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
1534 * b .+8 (becomes nop in wrong endian)
1535 * bswap32(li r3, -1)
1538 hc
[0] = cpu_to_be32(0x08000048);
1539 hc
[1] = cpu_to_be32(0x3860ffff);
1540 hc
[2] = cpu_to_be32(0x48000008);
1541 hc
[3] = cpu_to_be32(bswap32(0x3860ffff));
1546 void kvmppc_set_papr(PowerPCCPU
*cpu
)
1548 CPUState
*cs
= CPU(cpu
);
1551 ret
= kvm_vcpu_enable_cap(cs
, KVM_CAP_PPC_PAPR
, 0);
1553 cpu_abort(cs
, "This KVM version does not support PAPR\n");
1556 /* Update the capability flag so we sync the right information
1561 int kvmppc_set_compat(PowerPCCPU
*cpu
, uint32_t cpu_version
)
1563 return kvm_set_one_reg(CPU(cpu
), KVM_REG_PPC_ARCH_COMPAT
, &cpu_version
);
1566 void kvmppc_set_mpic_proxy(PowerPCCPU
*cpu
, int mpic_proxy
)
1568 CPUState
*cs
= CPU(cpu
);
1571 ret
= kvm_vcpu_enable_cap(cs
, KVM_CAP_PPC_EPR
, 0, mpic_proxy
);
1572 if (ret
&& mpic_proxy
) {
1573 cpu_abort(cs
, "This KVM version does not support EPR\n");
1577 int kvmppc_smt_threads(void)
1579 return cap_ppc_smt
? cap_ppc_smt
: 1;
1583 off_t
kvmppc_alloc_rma(void **rma
)
1587 struct kvm_allocate_rma ret
;
1589 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
1590 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
1591 * not necessary on this hardware
1592 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
1594 * FIXME: We should allow the user to force contiguous RMA
1595 * allocation in the cap_ppc_rma==1 case.
1597 if (cap_ppc_rma
< 2) {
1601 fd
= kvm_vm_ioctl(kvm_state
, KVM_ALLOCATE_RMA
, &ret
);
1603 fprintf(stderr
, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
1608 size
= MIN(ret
.rma_size
, 256ul << 20);
1610 *rma
= mmap(NULL
, size
, PROT_READ
|PROT_WRITE
, MAP_SHARED
, fd
, 0);
1611 if (*rma
== MAP_FAILED
) {
1612 fprintf(stderr
, "KVM: Error mapping RMA: %s\n", strerror(errno
));
1619 uint64_t kvmppc_rma_size(uint64_t current_size
, unsigned int hash_shift
)
1621 struct kvm_ppc_smmu_info info
;
1622 long rampagesize
, best_page_shift
;
1625 if (cap_ppc_rma
>= 2) {
1626 return current_size
;
1629 /* Find the largest hardware supported page size that's less than
1630 * or equal to the (logical) backing page size of guest RAM */
1631 kvm_get_smmu_info(POWERPC_CPU(first_cpu
), &info
);
1632 rampagesize
= getrampagesize();
1633 best_page_shift
= 0;
1635 for (i
= 0; i
< KVM_PPC_PAGE_SIZES_MAX_SZ
; i
++) {
1636 struct kvm_ppc_one_seg_page_size
*sps
= &info
.sps
[i
];
1638 if (!sps
->page_shift
) {
1642 if ((sps
->page_shift
> best_page_shift
)
1643 && ((1UL << sps
->page_shift
) <= rampagesize
)) {
1644 best_page_shift
= sps
->page_shift
;
1648 return MIN(current_size
,
1649 1ULL << (best_page_shift
+ hash_shift
- 7));
1653 bool kvmppc_spapr_use_multitce(void)
1655 return cap_spapr_multitce
;
1658 void *kvmppc_create_spapr_tce(uint32_t liobn
, uint32_t window_size
, int *pfd
,
1661 struct kvm_create_spapr_tce args
= {
1663 .window_size
= window_size
,
1669 /* Must set fd to -1 so we don't try to munmap when called for
1670 * destroying the table, which the upper layers -will- do
1673 if (!cap_spapr_tce
|| (vfio_accel
&& !cap_spapr_vfio
)) {
1677 fd
= kvm_vm_ioctl(kvm_state
, KVM_CREATE_SPAPR_TCE
, &args
);
1679 fprintf(stderr
, "KVM: Failed to create TCE table for liobn 0x%x\n",
1684 len
= (window_size
/ SPAPR_TCE_PAGE_SIZE
) * sizeof(uint64_t);
1685 /* FIXME: round this up to page size */
1687 table
= mmap(NULL
, len
, PROT_READ
|PROT_WRITE
, MAP_SHARED
, fd
, 0);
1688 if (table
== MAP_FAILED
) {
1689 fprintf(stderr
, "KVM: Failed to map TCE table for liobn 0x%x\n",
1699 int kvmppc_remove_spapr_tce(void *table
, int fd
, uint32_t nb_table
)
1707 len
= nb_table
* sizeof(uint64_t);
1708 if ((munmap(table
, len
) < 0) ||
1710 fprintf(stderr
, "KVM: Unexpected error removing TCE table: %s",
1712 /* Leak the table */
1718 int kvmppc_reset_htab(int shift_hint
)
1720 uint32_t shift
= shift_hint
;
1722 if (!kvm_enabled()) {
1723 /* Full emulation, tell caller to allocate htab itself */
1726 if (kvm_check_extension(kvm_state
, KVM_CAP_PPC_ALLOC_HTAB
)) {
1728 ret
= kvm_vm_ioctl(kvm_state
, KVM_PPC_ALLOCATE_HTAB
, &shift
);
1729 if (ret
== -ENOTTY
) {
1730 /* At least some versions of PR KVM advertise the
1731 * capability, but don't implement the ioctl(). Oops.
1732 * Return 0 so that we allocate the htab in qemu, as is
1733 * correct for PR. */
1735 } else if (ret
< 0) {
1741 /* We have a kernel that predates the htab reset calls. For PR
1742 * KVM, we need to allocate the htab ourselves, for an HV KVM of
1743 * this era, it has allocated a 16MB fixed size hash table
1744 * already. Kernels of this era have the GET_PVINFO capability
1745 * only on PR, so we use this hack to determine the right
1747 if (kvm_check_extension(kvm_state
, KVM_CAP_PPC_GET_PVINFO
)) {
1748 /* PR - tell caller to allocate htab */
1751 /* HV - assume 16MB kernel allocated htab */
1756 static inline uint32_t mfpvr(void)
1765 static void alter_insns(uint64_t *word
, uint64_t flags
, bool on
)
1774 static void kvmppc_host_cpu_initfn(Object
*obj
)
1776 assert(kvm_enabled());
1779 static void kvmppc_host_cpu_class_init(ObjectClass
*oc
, void *data
)
1781 PowerPCCPUClass
*pcc
= POWERPC_CPU_CLASS(oc
);
1782 uint32_t vmx
= kvmppc_get_vmx();
1783 uint32_t dfp
= kvmppc_get_dfp();
1784 uint32_t dcache_size
= kvmppc_read_int_cpu_dt("d-cache-size");
1785 uint32_t icache_size
= kvmppc_read_int_cpu_dt("i-cache-size");
1787 /* Now fix up the class with information we can query from the host */
1791 /* Only override when we know what the host supports */
1792 alter_insns(&pcc
->insns_flags
, PPC_ALTIVEC
, vmx
> 0);
1793 alter_insns(&pcc
->insns_flags2
, PPC2_VSX
, vmx
> 1);
1796 /* Only override when we know what the host supports */
1797 alter_insns(&pcc
->insns_flags2
, PPC2_DFP
, dfp
);
1800 if (dcache_size
!= -1) {
1801 pcc
->l1_dcache_size
= dcache_size
;
1804 if (icache_size
!= -1) {
1805 pcc
->l1_icache_size
= icache_size
;
1809 bool kvmppc_has_cap_epr(void)
1814 bool kvmppc_has_cap_htab_fd(void)
1819 bool kvmppc_has_cap_fixup_hcalls(void)
1821 return cap_fixup_hcalls
;
1824 static PowerPCCPUClass
*ppc_cpu_get_family_class(PowerPCCPUClass
*pcc
)
1826 ObjectClass
*oc
= OBJECT_CLASS(pcc
);
1828 while (oc
&& !object_class_is_abstract(oc
)) {
1829 oc
= object_class_get_parent(oc
);
1833 return POWERPC_CPU_CLASS(oc
);
1836 static int kvm_ppc_register_host_cpu_type(void)
1838 TypeInfo type_info
= {
1839 .name
= TYPE_HOST_POWERPC_CPU
,
1840 .instance_init
= kvmppc_host_cpu_initfn
,
1841 .class_init
= kvmppc_host_cpu_class_init
,
1843 uint32_t host_pvr
= mfpvr();
1844 PowerPCCPUClass
*pvr_pcc
;
1847 pvr_pcc
= ppc_cpu_class_by_pvr(host_pvr
);
1848 if (pvr_pcc
== NULL
) {
1849 pvr_pcc
= ppc_cpu_class_by_pvr_mask(host_pvr
);
1851 if (pvr_pcc
== NULL
) {
1854 type_info
.parent
= object_class_get_name(OBJECT_CLASS(pvr_pcc
));
1855 type_register(&type_info
);
1857 /* Register generic family CPU class for a family */
1858 pvr_pcc
= ppc_cpu_get_family_class(pvr_pcc
);
1859 dc
= DEVICE_CLASS(pvr_pcc
);
1860 type_info
.parent
= object_class_get_name(OBJECT_CLASS(pvr_pcc
));
1861 type_info
.name
= g_strdup_printf("%s-"TYPE_POWERPC_CPU
, dc
->desc
);
1862 type_register(&type_info
);
1867 int kvmppc_define_rtas_kernel_token(uint32_t token
, const char *function
)
1869 struct kvm_rtas_token_args args
= {
1873 if (!kvm_check_extension(kvm_state
, KVM_CAP_PPC_RTAS
)) {
1877 strncpy(args
.name
, function
, sizeof(args
.name
));
1879 return kvm_vm_ioctl(kvm_state
, KVM_PPC_RTAS_DEFINE_TOKEN
, &args
);
1882 int kvmppc_get_htab_fd(bool write
)
1884 struct kvm_get_htab_fd s
= {
1885 .flags
= write
? KVM_GET_HTAB_WRITE
: 0,
1890 fprintf(stderr
, "KVM version doesn't support saving the hash table\n");
1894 return kvm_vm_ioctl(kvm_state
, KVM_PPC_GET_HTAB_FD
, &s
);
1897 int kvmppc_save_htab(QEMUFile
*f
, int fd
, size_t bufsize
, int64_t max_ns
)
1899 int64_t starttime
= qemu_clock_get_ns(QEMU_CLOCK_REALTIME
);
1900 uint8_t buf
[bufsize
];
1904 rc
= read(fd
, buf
, bufsize
);
1906 fprintf(stderr
, "Error reading data from KVM HTAB fd: %s\n",
1910 /* Kernel already retuns data in BE format for the file */
1911 qemu_put_buffer(f
, buf
, rc
);
1915 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME
) - starttime
) < max_ns
)));
1917 return (rc
== 0) ? 1 : 0;
1920 int kvmppc_load_htab_chunk(QEMUFile
*f
, int fd
, uint32_t index
,
1921 uint16_t n_valid
, uint16_t n_invalid
)
1923 struct kvm_get_htab_header
*buf
;
1924 size_t chunksize
= sizeof(*buf
) + n_valid
*HASH_PTE_SIZE_64
;
1927 buf
= alloca(chunksize
);
1928 /* This is KVM on ppc, so this is all big-endian */
1930 buf
->n_valid
= n_valid
;
1931 buf
->n_invalid
= n_invalid
;
1933 qemu_get_buffer(f
, (void *)(buf
+ 1), HASH_PTE_SIZE_64
*n_valid
);
1935 rc
= write(fd
, buf
, chunksize
);
1937 fprintf(stderr
, "Error writing KVM hash table: %s\n",
1941 if (rc
!= chunksize
) {
1942 /* We should never get a short write on a single chunk */
1943 fprintf(stderr
, "Short write, restoring KVM hash table\n");
1949 bool kvm_arch_stop_on_emulation_error(CPUState
*cpu
)
1954 int kvm_arch_on_sigbus_vcpu(CPUState
*cpu
, int code
, void *addr
)
1959 int kvm_arch_on_sigbus(int code
, void *addr
)
1964 void kvm_arch_init_irq_routing(KVMState
*s
)
1968 int kvm_arch_insert_sw_breakpoint(CPUState
*cpu
, struct kvm_sw_breakpoint
*bp
)
1973 int kvm_arch_remove_sw_breakpoint(CPUState
*cpu
, struct kvm_sw_breakpoint
*bp
)
1978 int kvm_arch_insert_hw_breakpoint(target_ulong addr
, target_ulong len
, int type
)
1983 int kvm_arch_remove_hw_breakpoint(target_ulong addr
, target_ulong len
, int type
)
1988 void kvm_arch_remove_all_hw_breakpoints(void)
1992 void kvm_arch_update_guest_debug(CPUState
*cpu
, struct kvm_guest_debug
*dbg
)
1996 struct kvm_get_htab_buf
{
1997 struct kvm_get_htab_header header
;
1999 * We require one extra byte for read
2001 target_ulong hpte
[(HPTES_PER_GROUP
* 2) + 1];
2004 uint64_t kvmppc_hash64_read_pteg(PowerPCCPU
*cpu
, target_ulong pte_index
)
2007 struct kvm_get_htab_fd ghf
;
2008 struct kvm_get_htab_buf
*hpte_buf
;
2011 ghf
.start_index
= pte_index
;
2012 htab_fd
= kvm_vm_ioctl(kvm_state
, KVM_PPC_GET_HTAB_FD
, &ghf
);
2017 hpte_buf
= g_malloc0(sizeof(*hpte_buf
));
2019 * Read the hpte group
2021 if (read(htab_fd
, hpte_buf
, sizeof(*hpte_buf
)) < 0) {
2026 return (uint64_t)(uintptr_t) hpte_buf
->hpte
;
2035 void kvmppc_hash64_free_pteg(uint64_t token
)
2037 struct kvm_get_htab_buf
*htab_buf
;
2039 htab_buf
= container_of((void *)(uintptr_t) token
, struct kvm_get_htab_buf
,
2045 void kvmppc_hash64_write_pte(CPUPPCState
*env
, target_ulong pte_index
,
2046 target_ulong pte0
, target_ulong pte1
)
2049 struct kvm_get_htab_fd ghf
;
2050 struct kvm_get_htab_buf hpte_buf
;
2053 ghf
.start_index
= 0; /* Ignored */
2054 htab_fd
= kvm_vm_ioctl(kvm_state
, KVM_PPC_GET_HTAB_FD
, &ghf
);
2059 hpte_buf
.header
.n_valid
= 1;
2060 hpte_buf
.header
.n_invalid
= 0;
2061 hpte_buf
.header
.index
= pte_index
;
2062 hpte_buf
.hpte
[0] = pte0
;
2063 hpte_buf
.hpte
[1] = pte1
;
2065 * Write the hpte entry.
2066 * CAUTION: write() has the warn_unused_result attribute. Hence we
2067 * need to check the return value, even though we do nothing.
2069 if (write(htab_fd
, &hpte_buf
, sizeof(hpte_buf
)) < 0) {