2 * PowerPC implementation of KVM hooks
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
18 #include <sys/types.h>
19 #include <sys/ioctl.h>
23 #include <linux/kvm.h>
25 #include "qemu-common.h"
26 #include "qemu/timer.h"
27 #include "sysemu/sysemu.h"
28 #include "sysemu/kvm.h"
31 #include "sysemu/cpus.h"
32 #include "sysemu/device_tree.h"
33 #include "mmu-hash64.h"
35 #include "hw/sysbus.h"
36 #include "hw/ppc/spapr.h"
37 #include "hw/ppc/spapr_vio.h"
38 #include "sysemu/watchdog.h"
43 #define dprintf(fmt, ...) \
44 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
46 #define dprintf(fmt, ...) \
50 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
52 const KVMCapabilityInfo kvm_arch_required_capabilities
[] = {
56 static int cap_interrupt_unset
= false;
57 static int cap_interrupt_level
= false;
58 static int cap_segstate
;
59 static int cap_booke_sregs
;
60 static int cap_ppc_smt
;
61 static int cap_ppc_rma
;
62 static int cap_spapr_tce
;
64 static int cap_one_reg
;
66 static int cap_ppc_watchdog
;
69 /* XXX We have a race condition where we actually have a level triggered
70 * interrupt, but the infrastructure can't expose that yet, so the guest
71 * takes but ignores it, goes to sleep and never gets notified that there's
72 * still an interrupt pending.
74 * As a quick workaround, let's just wake up again 20 ms after we injected
75 * an interrupt. That way we can assure that we're always reinjecting
76 * interrupts in case the guest swallowed them.
78 static QEMUTimer
*idle_timer
;
80 static void kvm_kick_cpu(void *opaque
)
82 PowerPCCPU
*cpu
= opaque
;
84 qemu_cpu_kick(CPU(cpu
));
87 static int kvm_ppc_register_host_cpu_type(void);
89 int kvm_arch_init(KVMState
*s
)
91 cap_interrupt_unset
= kvm_check_extension(s
, KVM_CAP_PPC_UNSET_IRQ
);
92 cap_interrupt_level
= kvm_check_extension(s
, KVM_CAP_PPC_IRQ_LEVEL
);
93 cap_segstate
= kvm_check_extension(s
, KVM_CAP_PPC_SEGSTATE
);
94 cap_booke_sregs
= kvm_check_extension(s
, KVM_CAP_PPC_BOOKE_SREGS
);
95 cap_ppc_smt
= kvm_check_extension(s
, KVM_CAP_PPC_SMT
);
96 cap_ppc_rma
= kvm_check_extension(s
, KVM_CAP_PPC_RMA
);
97 cap_spapr_tce
= kvm_check_extension(s
, KVM_CAP_SPAPR_TCE
);
98 cap_one_reg
= kvm_check_extension(s
, KVM_CAP_ONE_REG
);
99 cap_hior
= kvm_check_extension(s
, KVM_CAP_PPC_HIOR
);
100 cap_epr
= kvm_check_extension(s
, KVM_CAP_PPC_EPR
);
101 cap_ppc_watchdog
= kvm_check_extension(s
, KVM_CAP_PPC_BOOKE_WATCHDOG
);
102 /* Note: we don't set cap_papr here, because this capability is
103 * only activated after this by kvmppc_set_papr() */
105 if (!cap_interrupt_level
) {
106 fprintf(stderr
, "KVM: Couldn't find level irq capability. Expect the "
107 "VM to stall at times!\n");
110 kvm_ppc_register_host_cpu_type();
115 static int kvm_arch_sync_sregs(PowerPCCPU
*cpu
)
117 CPUPPCState
*cenv
= &cpu
->env
;
118 CPUState
*cs
= CPU(cpu
);
119 struct kvm_sregs sregs
;
122 if (cenv
->excp_model
== POWERPC_EXCP_BOOKE
) {
123 /* What we're really trying to say is "if we're on BookE, we use
124 the native PVR for now". This is the only sane way to check
125 it though, so we potentially confuse users that they can run
126 BookE guests on BookS. Let's hope nobody dares enough :) */
130 fprintf(stderr
, "kvm error: missing PVR setting capability\n");
135 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_SREGS
, &sregs
);
140 sregs
.pvr
= cenv
->spr
[SPR_PVR
];
141 return kvm_vcpu_ioctl(cs
, KVM_SET_SREGS
, &sregs
);
144 /* Set up a shared TLB array with KVM */
145 static int kvm_booke206_tlb_init(PowerPCCPU
*cpu
)
147 CPUPPCState
*env
= &cpu
->env
;
148 CPUState
*cs
= CPU(cpu
);
149 struct kvm_book3e_206_tlb_params params
= {};
150 struct kvm_config_tlb cfg
= {};
151 struct kvm_enable_cap encap
= {};
152 unsigned int entries
= 0;
155 if (!kvm_enabled() ||
156 !kvm_check_extension(cs
->kvm_state
, KVM_CAP_SW_TLB
)) {
160 assert(ARRAY_SIZE(params
.tlb_sizes
) == BOOKE206_MAX_TLBN
);
162 for (i
= 0; i
< BOOKE206_MAX_TLBN
; i
++) {
163 params
.tlb_sizes
[i
] = booke206_tlb_size(env
, i
);
164 params
.tlb_ways
[i
] = booke206_tlb_ways(env
, i
);
165 entries
+= params
.tlb_sizes
[i
];
168 assert(entries
== env
->nb_tlb
);
169 assert(sizeof(struct kvm_book3e_206_tlb_entry
) == sizeof(ppcmas_tlb_t
));
171 env
->tlb_dirty
= true;
173 cfg
.array
= (uintptr_t)env
->tlb
.tlbm
;
174 cfg
.array_len
= sizeof(ppcmas_tlb_t
) * entries
;
175 cfg
.params
= (uintptr_t)¶ms
;
176 cfg
.mmu_type
= KVM_MMU_FSL_BOOKE_NOHV
;
178 encap
.cap
= KVM_CAP_SW_TLB
;
179 encap
.args
[0] = (uintptr_t)&cfg
;
181 ret
= kvm_vcpu_ioctl(cs
, KVM_ENABLE_CAP
, &encap
);
183 fprintf(stderr
, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
184 __func__
, strerror(-ret
));
188 env
->kvm_sw_tlb
= true;
193 #if defined(TARGET_PPC64)
194 static void kvm_get_fallback_smmu_info(PowerPCCPU
*cpu
,
195 struct kvm_ppc_smmu_info
*info
)
197 CPUPPCState
*env
= &cpu
->env
;
198 CPUState
*cs
= CPU(cpu
);
200 memset(info
, 0, sizeof(*info
));
202 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
203 * need to "guess" what the supported page sizes are.
205 * For that to work we make a few assumptions:
207 * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
208 * KVM which only supports 4K and 16M pages, but supports them
209 * regardless of the backing store characteritics. We also don't
210 * support 1T segments.
212 * This is safe as if HV KVM ever supports that capability or PR
213 * KVM grows supports for more page/segment sizes, those versions
214 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
215 * will not hit this fallback
217 * - Else we are running HV KVM. This means we only support page
218 * sizes that fit in the backing store. Additionally we only
219 * advertize 64K pages if the processor is ARCH 2.06 and we assume
220 * P7 encodings for the SLB and hash table. Here too, we assume
221 * support for any newer processor will mean a kernel that
222 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
225 if (kvm_check_extension(cs
->kvm_state
, KVM_CAP_PPC_GET_PVINFO
)) {
230 /* Standard 4k base page size segment */
231 info
->sps
[0].page_shift
= 12;
232 info
->sps
[0].slb_enc
= 0;
233 info
->sps
[0].enc
[0].page_shift
= 12;
234 info
->sps
[0].enc
[0].pte_enc
= 0;
236 /* Standard 16M large page size segment */
237 info
->sps
[1].page_shift
= 24;
238 info
->sps
[1].slb_enc
= SLB_VSID_L
;
239 info
->sps
[1].enc
[0].page_shift
= 24;
240 info
->sps
[1].enc
[0].pte_enc
= 0;
244 /* HV KVM has backing store size restrictions */
245 info
->flags
= KVM_PPC_PAGE_SIZES_REAL
;
247 if (env
->mmu_model
& POWERPC_MMU_1TSEG
) {
248 info
->flags
|= KVM_PPC_1T_SEGMENTS
;
251 if (env
->mmu_model
== POWERPC_MMU_2_06
) {
257 /* Standard 4k base page size segment */
258 info
->sps
[i
].page_shift
= 12;
259 info
->sps
[i
].slb_enc
= 0;
260 info
->sps
[i
].enc
[0].page_shift
= 12;
261 info
->sps
[i
].enc
[0].pte_enc
= 0;
264 /* 64K on MMU 2.06 */
265 if (env
->mmu_model
== POWERPC_MMU_2_06
) {
266 info
->sps
[i
].page_shift
= 16;
267 info
->sps
[i
].slb_enc
= 0x110;
268 info
->sps
[i
].enc
[0].page_shift
= 16;
269 info
->sps
[i
].enc
[0].pte_enc
= 1;
273 /* Standard 16M large page size segment */
274 info
->sps
[i
].page_shift
= 24;
275 info
->sps
[i
].slb_enc
= SLB_VSID_L
;
276 info
->sps
[i
].enc
[0].page_shift
= 24;
277 info
->sps
[i
].enc
[0].pte_enc
= 0;
281 static void kvm_get_smmu_info(PowerPCCPU
*cpu
, struct kvm_ppc_smmu_info
*info
)
283 CPUState
*cs
= CPU(cpu
);
286 if (kvm_check_extension(cs
->kvm_state
, KVM_CAP_PPC_GET_SMMU_INFO
)) {
287 ret
= kvm_vm_ioctl(cs
->kvm_state
, KVM_PPC_GET_SMMU_INFO
, info
);
293 kvm_get_fallback_smmu_info(cpu
, info
);
296 static long getrampagesize(void)
302 /* guest RAM is backed by normal anonymous pages */
303 return getpagesize();
307 ret
= statfs(mem_path
, &fs
);
308 } while (ret
!= 0 && errno
== EINTR
);
311 fprintf(stderr
, "Couldn't statfs() memory path: %s\n",
316 #define HUGETLBFS_MAGIC 0x958458f6
318 if (fs
.f_type
!= HUGETLBFS_MAGIC
) {
319 /* Explicit mempath, but it's ordinary pages */
320 return getpagesize();
323 /* It's hugepage, return the huge page size */
327 static bool kvm_valid_page_size(uint32_t flags
, long rampgsize
, uint32_t shift
)
329 if (!(flags
& KVM_PPC_PAGE_SIZES_REAL
)) {
333 return (1ul << shift
) <= rampgsize
;
336 static void kvm_fixup_page_sizes(PowerPCCPU
*cpu
)
338 static struct kvm_ppc_smmu_info smmu_info
;
339 static bool has_smmu_info
;
340 CPUPPCState
*env
= &cpu
->env
;
344 /* We only handle page sizes for 64-bit server guests for now */
345 if (!(env
->mmu_model
& POWERPC_MMU_64
)) {
349 /* Collect MMU info from kernel if not already */
350 if (!has_smmu_info
) {
351 kvm_get_smmu_info(cpu
, &smmu_info
);
352 has_smmu_info
= true;
355 rampagesize
= getrampagesize();
357 /* Convert to QEMU form */
358 memset(&env
->sps
, 0, sizeof(env
->sps
));
360 for (ik
= iq
= 0; ik
< KVM_PPC_PAGE_SIZES_MAX_SZ
; ik
++) {
361 struct ppc_one_seg_page_size
*qsps
= &env
->sps
.sps
[iq
];
362 struct kvm_ppc_one_seg_page_size
*ksps
= &smmu_info
.sps
[ik
];
364 if (!kvm_valid_page_size(smmu_info
.flags
, rampagesize
,
368 qsps
->page_shift
= ksps
->page_shift
;
369 qsps
->slb_enc
= ksps
->slb_enc
;
370 for (jk
= jq
= 0; jk
< KVM_PPC_PAGE_SIZES_MAX_SZ
; jk
++) {
371 if (!kvm_valid_page_size(smmu_info
.flags
, rampagesize
,
372 ksps
->enc
[jk
].page_shift
)) {
375 qsps
->enc
[jq
].page_shift
= ksps
->enc
[jk
].page_shift
;
376 qsps
->enc
[jq
].pte_enc
= ksps
->enc
[jk
].pte_enc
;
377 if (++jq
>= PPC_PAGE_SIZES_MAX_SZ
) {
381 if (++iq
>= PPC_PAGE_SIZES_MAX_SZ
) {
385 env
->slb_nr
= smmu_info
.slb_size
;
386 if (smmu_info
.flags
& KVM_PPC_1T_SEGMENTS
) {
387 env
->mmu_model
|= POWERPC_MMU_1TSEG
;
389 env
->mmu_model
&= ~POWERPC_MMU_1TSEG
;
392 #else /* defined (TARGET_PPC64) */
394 static inline void kvm_fixup_page_sizes(PowerPCCPU
*cpu
)
398 #endif /* !defined (TARGET_PPC64) */
400 unsigned long kvm_arch_vcpu_id(CPUState
*cpu
)
402 return cpu
->cpu_index
;
405 int kvm_arch_init_vcpu(CPUState
*cs
)
407 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
408 CPUPPCState
*cenv
= &cpu
->env
;
411 /* Gather server mmu info from KVM and update the CPU state */
412 kvm_fixup_page_sizes(cpu
);
414 /* Synchronize sregs with kvm */
415 ret
= kvm_arch_sync_sregs(cpu
);
420 idle_timer
= qemu_new_timer_ns(vm_clock
, kvm_kick_cpu
, cpu
);
422 /* Some targets support access to KVM's guest TLB. */
423 switch (cenv
->mmu_model
) {
424 case POWERPC_MMU_BOOKE206
:
425 ret
= kvm_booke206_tlb_init(cpu
);
434 void kvm_arch_reset_vcpu(CPUState
*cpu
)
438 static void kvm_sw_tlb_put(PowerPCCPU
*cpu
)
440 CPUPPCState
*env
= &cpu
->env
;
441 CPUState
*cs
= CPU(cpu
);
442 struct kvm_dirty_tlb dirty_tlb
;
443 unsigned char *bitmap
;
446 if (!env
->kvm_sw_tlb
) {
450 bitmap
= g_malloc((env
->nb_tlb
+ 7) / 8);
451 memset(bitmap
, 0xFF, (env
->nb_tlb
+ 7) / 8);
453 dirty_tlb
.bitmap
= (uintptr_t)bitmap
;
454 dirty_tlb
.num_dirty
= env
->nb_tlb
;
456 ret
= kvm_vcpu_ioctl(cs
, KVM_DIRTY_TLB
, &dirty_tlb
);
458 fprintf(stderr
, "%s: KVM_DIRTY_TLB: %s\n",
459 __func__
, strerror(-ret
));
465 static void kvm_get_one_spr(CPUState
*cs
, uint64_t id
, int spr
)
467 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
468 CPUPPCState
*env
= &cpu
->env
;
473 struct kvm_one_reg reg
= {
475 .addr
= (uintptr_t) &val
,
479 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
481 fprintf(stderr
, "Warning: Unable to retrieve SPR %d from KVM: %s\n",
482 spr
, strerror(errno
));
484 switch (id
& KVM_REG_SIZE_MASK
) {
485 case KVM_REG_SIZE_U32
:
486 env
->spr
[spr
] = val
.u32
;
489 case KVM_REG_SIZE_U64
:
490 env
->spr
[spr
] = val
.u64
;
494 /* Don't handle this size yet */
500 static void kvm_put_one_spr(CPUState
*cs
, uint64_t id
, int spr
)
502 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
503 CPUPPCState
*env
= &cpu
->env
;
508 struct kvm_one_reg reg
= {
510 .addr
= (uintptr_t) &val
,
514 switch (id
& KVM_REG_SIZE_MASK
) {
515 case KVM_REG_SIZE_U32
:
516 val
.u32
= env
->spr
[spr
];
519 case KVM_REG_SIZE_U64
:
520 val
.u64
= env
->spr
[spr
];
524 /* Don't handle this size yet */
528 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
530 fprintf(stderr
, "Warning: Unable to set SPR %d to KVM: %s\n",
531 spr
, strerror(errno
));
535 static int kvm_put_fp(CPUState
*cs
)
537 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
538 CPUPPCState
*env
= &cpu
->env
;
539 struct kvm_one_reg reg
;
543 if (env
->insns_flags
& PPC_FLOAT
) {
544 uint64_t fpscr
= env
->fpscr
;
545 bool vsx
= !!(env
->insns_flags2
& PPC2_VSX
);
547 reg
.id
= KVM_REG_PPC_FPSCR
;
548 reg
.addr
= (uintptr_t)&fpscr
;
549 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
551 dprintf("Unable to set FPSCR to KVM: %s\n", strerror(errno
));
555 for (i
= 0; i
< 32; i
++) {
558 vsr
[0] = float64_val(env
->fpr
[i
]);
559 vsr
[1] = env
->vsr
[i
];
560 reg
.addr
= (uintptr_t) &vsr
;
561 reg
.id
= vsx
? KVM_REG_PPC_VSR(i
) : KVM_REG_PPC_FPR(i
);
563 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
565 dprintf("Unable to set %s%d to KVM: %s\n", vsx
? "VSR" : "FPR",
572 if (env
->insns_flags
& PPC_ALTIVEC
) {
573 reg
.id
= KVM_REG_PPC_VSCR
;
574 reg
.addr
= (uintptr_t)&env
->vscr
;
575 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
577 dprintf("Unable to set VSCR to KVM: %s\n", strerror(errno
));
581 for (i
= 0; i
< 32; i
++) {
582 reg
.id
= KVM_REG_PPC_VR(i
);
583 reg
.addr
= (uintptr_t)&env
->avr
[i
];
584 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
586 dprintf("Unable to set VR%d to KVM: %s\n", i
, strerror(errno
));
595 static int kvm_get_fp(CPUState
*cs
)
597 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
598 CPUPPCState
*env
= &cpu
->env
;
599 struct kvm_one_reg reg
;
603 if (env
->insns_flags
& PPC_FLOAT
) {
605 bool vsx
= !!(env
->insns_flags2
& PPC2_VSX
);
607 reg
.id
= KVM_REG_PPC_FPSCR
;
608 reg
.addr
= (uintptr_t)&fpscr
;
609 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
611 dprintf("Unable to get FPSCR from KVM: %s\n", strerror(errno
));
617 for (i
= 0; i
< 32; i
++) {
620 reg
.addr
= (uintptr_t) &vsr
;
621 reg
.id
= vsx
? KVM_REG_PPC_VSR(i
) : KVM_REG_PPC_FPR(i
);
623 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
625 dprintf("Unable to get %s%d from KVM: %s\n",
626 vsx
? "VSR" : "FPR", i
, strerror(errno
));
629 env
->fpr
[i
] = vsr
[0];
631 env
->vsr
[i
] = vsr
[1];
637 if (env
->insns_flags
& PPC_ALTIVEC
) {
638 reg
.id
= KVM_REG_PPC_VSCR
;
639 reg
.addr
= (uintptr_t)&env
->vscr
;
640 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
642 dprintf("Unable to get VSCR from KVM: %s\n", strerror(errno
));
646 for (i
= 0; i
< 32; i
++) {
647 reg
.id
= KVM_REG_PPC_VR(i
);
648 reg
.addr
= (uintptr_t)&env
->avr
[i
];
649 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
651 dprintf("Unable to get VR%d from KVM: %s\n",
661 #if defined(TARGET_PPC64)
662 static int kvm_get_vpa(CPUState
*cs
)
664 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
665 CPUPPCState
*env
= &cpu
->env
;
666 struct kvm_one_reg reg
;
669 reg
.id
= KVM_REG_PPC_VPA_ADDR
;
670 reg
.addr
= (uintptr_t)&env
->vpa_addr
;
671 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
673 dprintf("Unable to get VPA address from KVM: %s\n", strerror(errno
));
677 assert((uintptr_t)&env
->slb_shadow_size
678 == ((uintptr_t)&env
->slb_shadow_addr
+ 8));
679 reg
.id
= KVM_REG_PPC_VPA_SLB
;
680 reg
.addr
= (uintptr_t)&env
->slb_shadow_addr
;
681 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
683 dprintf("Unable to get SLB shadow state from KVM: %s\n",
688 assert((uintptr_t)&env
->dtl_size
== ((uintptr_t)&env
->dtl_addr
+ 8));
689 reg
.id
= KVM_REG_PPC_VPA_DTL
;
690 reg
.addr
= (uintptr_t)&env
->dtl_addr
;
691 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
693 dprintf("Unable to get dispatch trace log state from KVM: %s\n",
701 static int kvm_put_vpa(CPUState
*cs
)
703 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
704 CPUPPCState
*env
= &cpu
->env
;
705 struct kvm_one_reg reg
;
708 /* SLB shadow or DTL can't be registered unless a master VPA is
709 * registered. That means when restoring state, if a VPA *is*
710 * registered, we need to set that up first. If not, we need to
711 * deregister the others before deregistering the master VPA */
712 assert(env
->vpa_addr
|| !(env
->slb_shadow_addr
|| env
->dtl_addr
));
715 reg
.id
= KVM_REG_PPC_VPA_ADDR
;
716 reg
.addr
= (uintptr_t)&env
->vpa_addr
;
717 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
719 dprintf("Unable to set VPA address to KVM: %s\n", strerror(errno
));
724 assert((uintptr_t)&env
->slb_shadow_size
725 == ((uintptr_t)&env
->slb_shadow_addr
+ 8));
726 reg
.id
= KVM_REG_PPC_VPA_SLB
;
727 reg
.addr
= (uintptr_t)&env
->slb_shadow_addr
;
728 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
730 dprintf("Unable to set SLB shadow state to KVM: %s\n", strerror(errno
));
734 assert((uintptr_t)&env
->dtl_size
== ((uintptr_t)&env
->dtl_addr
+ 8));
735 reg
.id
= KVM_REG_PPC_VPA_DTL
;
736 reg
.addr
= (uintptr_t)&env
->dtl_addr
;
737 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
739 dprintf("Unable to set dispatch trace log state to KVM: %s\n",
744 if (!env
->vpa_addr
) {
745 reg
.id
= KVM_REG_PPC_VPA_ADDR
;
746 reg
.addr
= (uintptr_t)&env
->vpa_addr
;
747 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
749 dprintf("Unable to set VPA address to KVM: %s\n", strerror(errno
));
756 #endif /* TARGET_PPC64 */
758 int kvm_arch_put_registers(CPUState
*cs
, int level
)
760 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
761 CPUPPCState
*env
= &cpu
->env
;
762 struct kvm_regs regs
;
766 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_REGS
, ®s
);
773 regs
.xer
= cpu_read_xer(env
);
777 regs
.srr0
= env
->spr
[SPR_SRR0
];
778 regs
.srr1
= env
->spr
[SPR_SRR1
];
780 regs
.sprg0
= env
->spr
[SPR_SPRG0
];
781 regs
.sprg1
= env
->spr
[SPR_SPRG1
];
782 regs
.sprg2
= env
->spr
[SPR_SPRG2
];
783 regs
.sprg3
= env
->spr
[SPR_SPRG3
];
784 regs
.sprg4
= env
->spr
[SPR_SPRG4
];
785 regs
.sprg5
= env
->spr
[SPR_SPRG5
];
786 regs
.sprg6
= env
->spr
[SPR_SPRG6
];
787 regs
.sprg7
= env
->spr
[SPR_SPRG7
];
789 regs
.pid
= env
->spr
[SPR_BOOKE_PID
];
791 for (i
= 0;i
< 32; i
++)
792 regs
.gpr
[i
] = env
->gpr
[i
];
794 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_REGS
, ®s
);
800 if (env
->tlb_dirty
) {
802 env
->tlb_dirty
= false;
805 if (cap_segstate
&& (level
>= KVM_PUT_RESET_STATE
)) {
806 struct kvm_sregs sregs
;
808 sregs
.pvr
= env
->spr
[SPR_PVR
];
810 sregs
.u
.s
.sdr1
= env
->spr
[SPR_SDR1
];
814 for (i
= 0; i
< 64; i
++) {
815 sregs
.u
.s
.ppc64
.slb
[i
].slbe
= env
->slb
[i
].esid
;
816 sregs
.u
.s
.ppc64
.slb
[i
].slbv
= env
->slb
[i
].vsid
;
821 for (i
= 0; i
< 16; i
++) {
822 sregs
.u
.s
.ppc32
.sr
[i
] = env
->sr
[i
];
826 for (i
= 0; i
< 8; i
++) {
827 /* Beware. We have to swap upper and lower bits here */
828 sregs
.u
.s
.ppc32
.dbat
[i
] = ((uint64_t)env
->DBAT
[0][i
] << 32)
830 sregs
.u
.s
.ppc32
.ibat
[i
] = ((uint64_t)env
->IBAT
[0][i
] << 32)
834 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_SREGS
, &sregs
);
840 if (cap_hior
&& (level
>= KVM_PUT_RESET_STATE
)) {
841 kvm_put_one_spr(cs
, KVM_REG_PPC_HIOR
, SPR_HIOR
);
847 /* We deliberately ignore errors here, for kernels which have
848 * the ONE_REG calls, but don't support the specific
849 * registers, there's a reasonable chance things will still
850 * work, at least until we try to migrate. */
851 for (i
= 0; i
< 1024; i
++) {
852 uint64_t id
= env
->spr_cb
[i
].one_reg_id
;
855 kvm_put_one_spr(cs
, id
, i
);
861 if (kvm_put_vpa(cs
) < 0) {
862 dprintf("Warning: Unable to set VPA information to KVM\n");
865 #endif /* TARGET_PPC64 */
871 int kvm_arch_get_registers(CPUState
*cs
)
873 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
874 CPUPPCState
*env
= &cpu
->env
;
875 struct kvm_regs regs
;
876 struct kvm_sregs sregs
;
880 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_REGS
, ®s
);
885 for (i
= 7; i
>= 0; i
--) {
886 env
->crf
[i
] = cr
& 15;
892 cpu_write_xer(env
, regs
.xer
);
896 env
->spr
[SPR_SRR0
] = regs
.srr0
;
897 env
->spr
[SPR_SRR1
] = regs
.srr1
;
899 env
->spr
[SPR_SPRG0
] = regs
.sprg0
;
900 env
->spr
[SPR_SPRG1
] = regs
.sprg1
;
901 env
->spr
[SPR_SPRG2
] = regs
.sprg2
;
902 env
->spr
[SPR_SPRG3
] = regs
.sprg3
;
903 env
->spr
[SPR_SPRG4
] = regs
.sprg4
;
904 env
->spr
[SPR_SPRG5
] = regs
.sprg5
;
905 env
->spr
[SPR_SPRG6
] = regs
.sprg6
;
906 env
->spr
[SPR_SPRG7
] = regs
.sprg7
;
908 env
->spr
[SPR_BOOKE_PID
] = regs
.pid
;
910 for (i
= 0;i
< 32; i
++)
911 env
->gpr
[i
] = regs
.gpr
[i
];
915 if (cap_booke_sregs
) {
916 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_SREGS
, &sregs
);
921 if (sregs
.u
.e
.features
& KVM_SREGS_E_BASE
) {
922 env
->spr
[SPR_BOOKE_CSRR0
] = sregs
.u
.e
.csrr0
;
923 env
->spr
[SPR_BOOKE_CSRR1
] = sregs
.u
.e
.csrr1
;
924 env
->spr
[SPR_BOOKE_ESR
] = sregs
.u
.e
.esr
;
925 env
->spr
[SPR_BOOKE_DEAR
] = sregs
.u
.e
.dear
;
926 env
->spr
[SPR_BOOKE_MCSR
] = sregs
.u
.e
.mcsr
;
927 env
->spr
[SPR_BOOKE_TSR
] = sregs
.u
.e
.tsr
;
928 env
->spr
[SPR_BOOKE_TCR
] = sregs
.u
.e
.tcr
;
929 env
->spr
[SPR_DECR
] = sregs
.u
.e
.dec
;
930 env
->spr
[SPR_TBL
] = sregs
.u
.e
.tb
& 0xffffffff;
931 env
->spr
[SPR_TBU
] = sregs
.u
.e
.tb
>> 32;
932 env
->spr
[SPR_VRSAVE
] = sregs
.u
.e
.vrsave
;
935 if (sregs
.u
.e
.features
& KVM_SREGS_E_ARCH206
) {
936 env
->spr
[SPR_BOOKE_PIR
] = sregs
.u
.e
.pir
;
937 env
->spr
[SPR_BOOKE_MCSRR0
] = sregs
.u
.e
.mcsrr0
;
938 env
->spr
[SPR_BOOKE_MCSRR1
] = sregs
.u
.e
.mcsrr1
;
939 env
->spr
[SPR_BOOKE_DECAR
] = sregs
.u
.e
.decar
;
940 env
->spr
[SPR_BOOKE_IVPR
] = sregs
.u
.e
.ivpr
;
943 if (sregs
.u
.e
.features
& KVM_SREGS_E_64
) {
944 env
->spr
[SPR_BOOKE_EPCR
] = sregs
.u
.e
.epcr
;
947 if (sregs
.u
.e
.features
& KVM_SREGS_E_SPRG8
) {
948 env
->spr
[SPR_BOOKE_SPRG8
] = sregs
.u
.e
.sprg8
;
951 if (sregs
.u
.e
.features
& KVM_SREGS_E_IVOR
) {
952 env
->spr
[SPR_BOOKE_IVOR0
] = sregs
.u
.e
.ivor_low
[0];
953 env
->spr
[SPR_BOOKE_IVOR1
] = sregs
.u
.e
.ivor_low
[1];
954 env
->spr
[SPR_BOOKE_IVOR2
] = sregs
.u
.e
.ivor_low
[2];
955 env
->spr
[SPR_BOOKE_IVOR3
] = sregs
.u
.e
.ivor_low
[3];
956 env
->spr
[SPR_BOOKE_IVOR4
] = sregs
.u
.e
.ivor_low
[4];
957 env
->spr
[SPR_BOOKE_IVOR5
] = sregs
.u
.e
.ivor_low
[5];
958 env
->spr
[SPR_BOOKE_IVOR6
] = sregs
.u
.e
.ivor_low
[6];
959 env
->spr
[SPR_BOOKE_IVOR7
] = sregs
.u
.e
.ivor_low
[7];
960 env
->spr
[SPR_BOOKE_IVOR8
] = sregs
.u
.e
.ivor_low
[8];
961 env
->spr
[SPR_BOOKE_IVOR9
] = sregs
.u
.e
.ivor_low
[9];
962 env
->spr
[SPR_BOOKE_IVOR10
] = sregs
.u
.e
.ivor_low
[10];
963 env
->spr
[SPR_BOOKE_IVOR11
] = sregs
.u
.e
.ivor_low
[11];
964 env
->spr
[SPR_BOOKE_IVOR12
] = sregs
.u
.e
.ivor_low
[12];
965 env
->spr
[SPR_BOOKE_IVOR13
] = sregs
.u
.e
.ivor_low
[13];
966 env
->spr
[SPR_BOOKE_IVOR14
] = sregs
.u
.e
.ivor_low
[14];
967 env
->spr
[SPR_BOOKE_IVOR15
] = sregs
.u
.e
.ivor_low
[15];
969 if (sregs
.u
.e
.features
& KVM_SREGS_E_SPE
) {
970 env
->spr
[SPR_BOOKE_IVOR32
] = sregs
.u
.e
.ivor_high
[0];
971 env
->spr
[SPR_BOOKE_IVOR33
] = sregs
.u
.e
.ivor_high
[1];
972 env
->spr
[SPR_BOOKE_IVOR34
] = sregs
.u
.e
.ivor_high
[2];
975 if (sregs
.u
.e
.features
& KVM_SREGS_E_PM
) {
976 env
->spr
[SPR_BOOKE_IVOR35
] = sregs
.u
.e
.ivor_high
[3];
979 if (sregs
.u
.e
.features
& KVM_SREGS_E_PC
) {
980 env
->spr
[SPR_BOOKE_IVOR36
] = sregs
.u
.e
.ivor_high
[4];
981 env
->spr
[SPR_BOOKE_IVOR37
] = sregs
.u
.e
.ivor_high
[5];
985 if (sregs
.u
.e
.features
& KVM_SREGS_E_ARCH206_MMU
) {
986 env
->spr
[SPR_BOOKE_MAS0
] = sregs
.u
.e
.mas0
;
987 env
->spr
[SPR_BOOKE_MAS1
] = sregs
.u
.e
.mas1
;
988 env
->spr
[SPR_BOOKE_MAS2
] = sregs
.u
.e
.mas2
;
989 env
->spr
[SPR_BOOKE_MAS3
] = sregs
.u
.e
.mas7_3
& 0xffffffff;
990 env
->spr
[SPR_BOOKE_MAS4
] = sregs
.u
.e
.mas4
;
991 env
->spr
[SPR_BOOKE_MAS6
] = sregs
.u
.e
.mas6
;
992 env
->spr
[SPR_BOOKE_MAS7
] = sregs
.u
.e
.mas7_3
>> 32;
993 env
->spr
[SPR_MMUCFG
] = sregs
.u
.e
.mmucfg
;
994 env
->spr
[SPR_BOOKE_TLB0CFG
] = sregs
.u
.e
.tlbcfg
[0];
995 env
->spr
[SPR_BOOKE_TLB1CFG
] = sregs
.u
.e
.tlbcfg
[1];
998 if (sregs
.u
.e
.features
& KVM_SREGS_EXP
) {
999 env
->spr
[SPR_BOOKE_EPR
] = sregs
.u
.e
.epr
;
1002 if (sregs
.u
.e
.features
& KVM_SREGS_E_PD
) {
1003 env
->spr
[SPR_BOOKE_EPLC
] = sregs
.u
.e
.eplc
;
1004 env
->spr
[SPR_BOOKE_EPSC
] = sregs
.u
.e
.epsc
;
1007 if (sregs
.u
.e
.impl_id
== KVM_SREGS_E_IMPL_FSL
) {
1008 env
->spr
[SPR_E500_SVR
] = sregs
.u
.e
.impl
.fsl
.svr
;
1009 env
->spr
[SPR_Exxx_MCAR
] = sregs
.u
.e
.impl
.fsl
.mcar
;
1010 env
->spr
[SPR_HID0
] = sregs
.u
.e
.impl
.fsl
.hid0
;
1012 if (sregs
.u
.e
.impl
.fsl
.features
& KVM_SREGS_E_FSL_PIDn
) {
1013 env
->spr
[SPR_BOOKE_PID1
] = sregs
.u
.e
.impl
.fsl
.pid1
;
1014 env
->spr
[SPR_BOOKE_PID2
] = sregs
.u
.e
.impl
.fsl
.pid2
;
1020 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_SREGS
, &sregs
);
1025 ppc_store_sdr1(env
, sregs
.u
.s
.sdr1
);
1029 for (i
= 0; i
< 64; i
++) {
1030 ppc_store_slb(env
, sregs
.u
.s
.ppc64
.slb
[i
].slbe
,
1031 sregs
.u
.s
.ppc64
.slb
[i
].slbv
);
1036 for (i
= 0; i
< 16; i
++) {
1037 env
->sr
[i
] = sregs
.u
.s
.ppc32
.sr
[i
];
1041 for (i
= 0; i
< 8; i
++) {
1042 env
->DBAT
[0][i
] = sregs
.u
.s
.ppc32
.dbat
[i
] & 0xffffffff;
1043 env
->DBAT
[1][i
] = sregs
.u
.s
.ppc32
.dbat
[i
] >> 32;
1044 env
->IBAT
[0][i
] = sregs
.u
.s
.ppc32
.ibat
[i
] & 0xffffffff;
1045 env
->IBAT
[1][i
] = sregs
.u
.s
.ppc32
.ibat
[i
] >> 32;
1050 kvm_get_one_spr(cs
, KVM_REG_PPC_HIOR
, SPR_HIOR
);
1056 /* We deliberately ignore errors here, for kernels which have
1057 * the ONE_REG calls, but don't support the specific
1058 * registers, there's a reasonable chance things will still
1059 * work, at least until we try to migrate. */
1060 for (i
= 0; i
< 1024; i
++) {
1061 uint64_t id
= env
->spr_cb
[i
].one_reg_id
;
1064 kvm_get_one_spr(cs
, id
, i
);
1070 if (kvm_get_vpa(cs
) < 0) {
1071 dprintf("Warning: Unable to get VPA information from KVM\n");
1080 int kvmppc_set_interrupt(PowerPCCPU
*cpu
, int irq
, int level
)
1082 unsigned virq
= level
? KVM_INTERRUPT_SET_LEVEL
: KVM_INTERRUPT_UNSET
;
1084 if (irq
!= PPC_INTERRUPT_EXT
) {
1088 if (!kvm_enabled() || !cap_interrupt_unset
|| !cap_interrupt_level
) {
1092 kvm_vcpu_ioctl(CPU(cpu
), KVM_INTERRUPT
, &virq
);
1097 #if defined(TARGET_PPCEMB)
1098 #define PPC_INPUT_INT PPC40x_INPUT_INT
1099 #elif defined(TARGET_PPC64)
1100 #define PPC_INPUT_INT PPC970_INPUT_INT
1102 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1105 void kvm_arch_pre_run(CPUState
*cs
, struct kvm_run
*run
)
1107 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
1108 CPUPPCState
*env
= &cpu
->env
;
1112 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1113 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1114 if (!cap_interrupt_level
&&
1115 run
->ready_for_interrupt_injection
&&
1116 (cs
->interrupt_request
& CPU_INTERRUPT_HARD
) &&
1117 (env
->irq_input_state
& (1<<PPC_INPUT_INT
)))
1119 /* For now KVM disregards the 'irq' argument. However, in the
1120 * future KVM could cache it in-kernel to avoid a heavyweight exit
1121 * when reading the UIC.
1123 irq
= KVM_INTERRUPT_SET
;
1125 dprintf("injected interrupt %d\n", irq
);
1126 r
= kvm_vcpu_ioctl(cs
, KVM_INTERRUPT
, &irq
);
1128 printf("cpu %d fail inject %x\n", cs
->cpu_index
, irq
);
1131 /* Always wake up soon in case the interrupt was level based */
1132 qemu_mod_timer(idle_timer
, qemu_get_clock_ns(vm_clock
) +
1133 (get_ticks_per_sec() / 50));
1136 /* We don't know if there are more interrupts pending after this. However,
1137 * the guest will return to userspace in the course of handling this one
1138 * anyways, so we will get a chance to deliver the rest. */
1141 void kvm_arch_post_run(CPUState
*cpu
, struct kvm_run
*run
)
1145 int kvm_arch_process_async_events(CPUState
*cs
)
1150 static int kvmppc_handle_halt(PowerPCCPU
*cpu
)
1152 CPUState
*cs
= CPU(cpu
);
1153 CPUPPCState
*env
= &cpu
->env
;
1155 if (!(cs
->interrupt_request
& CPU_INTERRUPT_HARD
) && (msr_ee
)) {
1157 env
->exception_index
= EXCP_HLT
;
1163 /* map dcr access to existing qemu dcr emulation */
1164 static int kvmppc_handle_dcr_read(CPUPPCState
*env
, uint32_t dcrn
, uint32_t *data
)
1166 if (ppc_dcr_read(env
->dcr_env
, dcrn
, data
) < 0)
1167 fprintf(stderr
, "Read to unhandled DCR (0x%x)\n", dcrn
);
1172 static int kvmppc_handle_dcr_write(CPUPPCState
*env
, uint32_t dcrn
, uint32_t data
)
1174 if (ppc_dcr_write(env
->dcr_env
, dcrn
, data
) < 0)
1175 fprintf(stderr
, "Write to unhandled DCR (0x%x)\n", dcrn
);
1180 int kvm_arch_handle_exit(CPUState
*cs
, struct kvm_run
*run
)
1182 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
1183 CPUPPCState
*env
= &cpu
->env
;
1186 switch (run
->exit_reason
) {
1188 if (run
->dcr
.is_write
) {
1189 dprintf("handle dcr write\n");
1190 ret
= kvmppc_handle_dcr_write(env
, run
->dcr
.dcrn
, run
->dcr
.data
);
1192 dprintf("handle dcr read\n");
1193 ret
= kvmppc_handle_dcr_read(env
, run
->dcr
.dcrn
, &run
->dcr
.data
);
1197 dprintf("handle halt\n");
1198 ret
= kvmppc_handle_halt(cpu
);
1200 #if defined(TARGET_PPC64)
1201 case KVM_EXIT_PAPR_HCALL
:
1202 dprintf("handle PAPR hypercall\n");
1203 run
->papr_hcall
.ret
= spapr_hypercall(cpu
,
1205 run
->papr_hcall
.args
);
1210 dprintf("handle epr\n");
1211 run
->epr
.epr
= ldl_phys(env
->mpic_iack
);
1214 case KVM_EXIT_WATCHDOG
:
1215 dprintf("handle watchdog expiry\n");
1216 watchdog_perform_action();
1221 fprintf(stderr
, "KVM: unknown exit reason %d\n", run
->exit_reason
);
1229 int kvmppc_or_tsr_bits(PowerPCCPU
*cpu
, uint32_t tsr_bits
)
1231 CPUState
*cs
= CPU(cpu
);
1232 uint32_t bits
= tsr_bits
;
1233 struct kvm_one_reg reg
= {
1234 .id
= KVM_REG_PPC_OR_TSR
,
1235 .addr
= (uintptr_t) &bits
,
1238 return kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
1241 int kvmppc_clear_tsr_bits(PowerPCCPU
*cpu
, uint32_t tsr_bits
)
1244 CPUState
*cs
= CPU(cpu
);
1245 uint32_t bits
= tsr_bits
;
1246 struct kvm_one_reg reg
= {
1247 .id
= KVM_REG_PPC_CLEAR_TSR
,
1248 .addr
= (uintptr_t) &bits
,
1251 return kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
1254 int kvmppc_set_tcr(PowerPCCPU
*cpu
)
1256 CPUState
*cs
= CPU(cpu
);
1257 CPUPPCState
*env
= &cpu
->env
;
1258 uint32_t tcr
= env
->spr
[SPR_BOOKE_TCR
];
1260 struct kvm_one_reg reg
= {
1261 .id
= KVM_REG_PPC_TCR
,
1262 .addr
= (uintptr_t) &tcr
,
1265 return kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
1268 int kvmppc_booke_watchdog_enable(PowerPCCPU
*cpu
)
1270 CPUState
*cs
= CPU(cpu
);
1271 struct kvm_enable_cap encap
= {};
1274 if (!kvm_enabled()) {
1278 if (!cap_ppc_watchdog
) {
1279 printf("warning: KVM does not support watchdog");
1283 encap
.cap
= KVM_CAP_PPC_BOOKE_WATCHDOG
;
1284 ret
= kvm_vcpu_ioctl(cs
, KVM_ENABLE_CAP
, &encap
);
1286 fprintf(stderr
, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1287 __func__
, strerror(-ret
));
1294 static int read_cpuinfo(const char *field
, char *value
, int len
)
1298 int field_len
= strlen(field
);
1301 f
= fopen("/proc/cpuinfo", "r");
1307 if(!fgets(line
, sizeof(line
), f
)) {
1310 if (!strncmp(line
, field
, field_len
)) {
1311 pstrcpy(value
, len
, line
);
1322 uint32_t kvmppc_get_tbfreq(void)
1326 uint32_t retval
= get_ticks_per_sec();
1328 if (read_cpuinfo("timebase", line
, sizeof(line
))) {
1332 if (!(ns
= strchr(line
, ':'))) {
1342 /* Try to find a device tree node for a CPU with clock-frequency property */
1343 static int kvmppc_find_cpu_dt(char *buf
, int buf_len
)
1345 struct dirent
*dirp
;
1348 if ((dp
= opendir(PROC_DEVTREE_CPU
)) == NULL
) {
1349 printf("Can't open directory " PROC_DEVTREE_CPU
"\n");
1354 while ((dirp
= readdir(dp
)) != NULL
) {
1356 snprintf(buf
, buf_len
, "%s%s/clock-frequency", PROC_DEVTREE_CPU
,
1358 f
= fopen(buf
, "r");
1360 snprintf(buf
, buf_len
, "%s%s", PROC_DEVTREE_CPU
, dirp
->d_name
);
1367 if (buf
[0] == '\0') {
1368 printf("Unknown host!\n");
1375 /* Read a CPU node property from the host device tree that's a single
1376 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1377 * (can't find or open the property, or doesn't understand the
1379 static uint64_t kvmppc_read_int_cpu_dt(const char *propname
)
1389 if (kvmppc_find_cpu_dt(buf
, sizeof(buf
))) {
1393 strncat(buf
, "/", sizeof(buf
) - strlen(buf
));
1394 strncat(buf
, propname
, sizeof(buf
) - strlen(buf
));
1396 f
= fopen(buf
, "rb");
1401 len
= fread(&u
, 1, sizeof(u
), f
);
1405 /* property is a 32-bit quantity */
1406 return be32_to_cpu(u
.v32
);
1408 return be64_to_cpu(u
.v64
);
1414 uint64_t kvmppc_get_clockfreq(void)
1416 return kvmppc_read_int_cpu_dt("clock-frequency");
1419 uint32_t kvmppc_get_vmx(void)
1421 return kvmppc_read_int_cpu_dt("ibm,vmx");
1424 uint32_t kvmppc_get_dfp(void)
1426 return kvmppc_read_int_cpu_dt("ibm,dfp");
1429 static int kvmppc_get_pvinfo(CPUPPCState
*env
, struct kvm_ppc_pvinfo
*pvinfo
)
1431 PowerPCCPU
*cpu
= ppc_env_get_cpu(env
);
1432 CPUState
*cs
= CPU(cpu
);
1434 if (kvm_check_extension(cs
->kvm_state
, KVM_CAP_PPC_GET_PVINFO
) &&
1435 !kvm_vm_ioctl(cs
->kvm_state
, KVM_PPC_GET_PVINFO
, pvinfo
)) {
1442 int kvmppc_get_hasidle(CPUPPCState
*env
)
1444 struct kvm_ppc_pvinfo pvinfo
;
1446 if (!kvmppc_get_pvinfo(env
, &pvinfo
) &&
1447 (pvinfo
.flags
& KVM_PPC_PVINFO_FLAGS_EV_IDLE
)) {
1454 int kvmppc_get_hypercall(CPUPPCState
*env
, uint8_t *buf
, int buf_len
)
1456 uint32_t *hc
= (uint32_t*)buf
;
1457 struct kvm_ppc_pvinfo pvinfo
;
1459 if (!kvmppc_get_pvinfo(env
, &pvinfo
)) {
1460 memcpy(buf
, pvinfo
.hcall
, buf_len
);
1465 * Fallback to always fail hypercalls:
1481 void kvmppc_set_papr(PowerPCCPU
*cpu
)
1483 CPUPPCState
*env
= &cpu
->env
;
1484 CPUState
*cs
= CPU(cpu
);
1485 struct kvm_enable_cap cap
= {};
1488 cap
.cap
= KVM_CAP_PPC_PAPR
;
1489 ret
= kvm_vcpu_ioctl(cs
, KVM_ENABLE_CAP
, &cap
);
1492 cpu_abort(env
, "This KVM version does not support PAPR\n");
1495 /* Update the capability flag so we sync the right information
1500 void kvmppc_set_mpic_proxy(PowerPCCPU
*cpu
, int mpic_proxy
)
1502 CPUPPCState
*env
= &cpu
->env
;
1503 CPUState
*cs
= CPU(cpu
);
1504 struct kvm_enable_cap cap
= {};
1507 cap
.cap
= KVM_CAP_PPC_EPR
;
1508 cap
.args
[0] = mpic_proxy
;
1509 ret
= kvm_vcpu_ioctl(cs
, KVM_ENABLE_CAP
, &cap
);
1511 if (ret
&& mpic_proxy
) {
1512 cpu_abort(env
, "This KVM version does not support EPR\n");
1516 int kvmppc_smt_threads(void)
1518 return cap_ppc_smt
? cap_ppc_smt
: 1;
1522 off_t
kvmppc_alloc_rma(const char *name
, MemoryRegion
*sysmem
)
1527 struct kvm_allocate_rma ret
;
1528 MemoryRegion
*rma_region
;
1530 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
1531 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
1532 * not necessary on this hardware
1533 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
1535 * FIXME: We should allow the user to force contiguous RMA
1536 * allocation in the cap_ppc_rma==1 case.
1538 if (cap_ppc_rma
< 2) {
1542 fd
= kvm_vm_ioctl(kvm_state
, KVM_ALLOCATE_RMA
, &ret
);
1544 fprintf(stderr
, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
1549 size
= MIN(ret
.rma_size
, 256ul << 20);
1551 rma
= mmap(NULL
, size
, PROT_READ
|PROT_WRITE
, MAP_SHARED
, fd
, 0);
1552 if (rma
== MAP_FAILED
) {
1553 fprintf(stderr
, "KVM: Error mapping RMA: %s\n", strerror(errno
));
1557 rma_region
= g_new(MemoryRegion
, 1);
1558 memory_region_init_ram_ptr(rma_region
, name
, size
, rma
);
1559 vmstate_register_ram_global(rma_region
);
1560 memory_region_add_subregion(sysmem
, 0, rma_region
);
1565 uint64_t kvmppc_rma_size(uint64_t current_size
, unsigned int hash_shift
)
1567 struct kvm_ppc_smmu_info info
;
1568 long rampagesize
, best_page_shift
;
1571 if (cap_ppc_rma
>= 2) {
1572 return current_size
;
1575 /* Find the largest hardware supported page size that's less than
1576 * or equal to the (logical) backing page size of guest RAM */
1577 kvm_get_smmu_info(ppc_env_get_cpu(first_cpu
), &info
);
1578 rampagesize
= getrampagesize();
1579 best_page_shift
= 0;
1581 for (i
= 0; i
< KVM_PPC_PAGE_SIZES_MAX_SZ
; i
++) {
1582 struct kvm_ppc_one_seg_page_size
*sps
= &info
.sps
[i
];
1584 if (!sps
->page_shift
) {
1588 if ((sps
->page_shift
> best_page_shift
)
1589 && ((1UL << sps
->page_shift
) <= rampagesize
)) {
1590 best_page_shift
= sps
->page_shift
;
1594 return MIN(current_size
,
1595 1ULL << (best_page_shift
+ hash_shift
- 7));
1599 void *kvmppc_create_spapr_tce(uint32_t liobn
, uint32_t window_size
, int *pfd
)
1601 struct kvm_create_spapr_tce args
= {
1603 .window_size
= window_size
,
1609 /* Must set fd to -1 so we don't try to munmap when called for
1610 * destroying the table, which the upper layers -will- do
1613 if (!cap_spapr_tce
) {
1617 fd
= kvm_vm_ioctl(kvm_state
, KVM_CREATE_SPAPR_TCE
, &args
);
1619 fprintf(stderr
, "KVM: Failed to create TCE table for liobn 0x%x\n",
1624 len
= (window_size
/ SPAPR_TCE_PAGE_SIZE
) * sizeof(sPAPRTCE
);
1625 /* FIXME: round this up to page size */
1627 table
= mmap(NULL
, len
, PROT_READ
|PROT_WRITE
, MAP_SHARED
, fd
, 0);
1628 if (table
== MAP_FAILED
) {
1629 fprintf(stderr
, "KVM: Failed to map TCE table for liobn 0x%x\n",
1639 int kvmppc_remove_spapr_tce(void *table
, int fd
, uint32_t window_size
)
1647 len
= (window_size
/ SPAPR_TCE_PAGE_SIZE
)*sizeof(sPAPRTCE
);
1648 if ((munmap(table
, len
) < 0) ||
1650 fprintf(stderr
, "KVM: Unexpected error removing TCE table: %s",
1652 /* Leak the table */
1658 int kvmppc_reset_htab(int shift_hint
)
1660 uint32_t shift
= shift_hint
;
1662 if (!kvm_enabled()) {
1663 /* Full emulation, tell caller to allocate htab itself */
1666 if (kvm_check_extension(kvm_state
, KVM_CAP_PPC_ALLOC_HTAB
)) {
1668 ret
= kvm_vm_ioctl(kvm_state
, KVM_PPC_ALLOCATE_HTAB
, &shift
);
1669 if (ret
== -ENOTTY
) {
1670 /* At least some versions of PR KVM advertise the
1671 * capability, but don't implement the ioctl(). Oops.
1672 * Return 0 so that we allocate the htab in qemu, as is
1673 * correct for PR. */
1675 } else if (ret
< 0) {
1681 /* We have a kernel that predates the htab reset calls. For PR
1682 * KVM, we need to allocate the htab ourselves, for an HV KVM of
1683 * this era, it has allocated a 16MB fixed size hash table
1684 * already. Kernels of this era have the GET_PVINFO capability
1685 * only on PR, so we use this hack to determine the right
1687 if (kvm_check_extension(kvm_state
, KVM_CAP_PPC_GET_PVINFO
)) {
1688 /* PR - tell caller to allocate htab */
1691 /* HV - assume 16MB kernel allocated htab */
1696 static inline uint32_t mfpvr(void)
1705 static void alter_insns(uint64_t *word
, uint64_t flags
, bool on
)
1714 static void kvmppc_host_cpu_initfn(Object
*obj
)
1716 assert(kvm_enabled());
1719 static void kvmppc_host_cpu_class_init(ObjectClass
*oc
, void *data
)
1721 PowerPCCPUClass
*pcc
= POWERPC_CPU_CLASS(oc
);
1722 uint32_t vmx
= kvmppc_get_vmx();
1723 uint32_t dfp
= kvmppc_get_dfp();
1724 uint32_t dcache_size
= kvmppc_read_int_cpu_dt("d-cache-size");
1725 uint32_t icache_size
= kvmppc_read_int_cpu_dt("i-cache-size");
1727 /* Now fix up the class with information we can query from the host */
1730 /* Only override when we know what the host supports */
1731 alter_insns(&pcc
->insns_flags
, PPC_ALTIVEC
, vmx
> 0);
1732 alter_insns(&pcc
->insns_flags2
, PPC2_VSX
, vmx
> 1);
1735 /* Only override when we know what the host supports */
1736 alter_insns(&pcc
->insns_flags2
, PPC2_DFP
, dfp
);
1739 if (dcache_size
!= -1) {
1740 pcc
->l1_dcache_size
= dcache_size
;
1743 if (icache_size
!= -1) {
1744 pcc
->l1_icache_size
= icache_size
;
1748 int kvmppc_fixup_cpu(PowerPCCPU
*cpu
)
1750 CPUState
*cs
= CPU(cpu
);
1753 /* Adjust cpu index for SMT */
1754 smt
= kvmppc_smt_threads();
1755 cs
->cpu_index
= (cs
->cpu_index
/ smp_threads
) * smt
1756 + (cs
->cpu_index
% smp_threads
);
1761 bool kvmppc_has_cap_epr(void)
1766 static int kvm_ppc_register_host_cpu_type(void)
1768 TypeInfo type_info
= {
1769 .name
= TYPE_HOST_POWERPC_CPU
,
1770 .instance_init
= kvmppc_host_cpu_initfn
,
1771 .class_init
= kvmppc_host_cpu_class_init
,
1773 uint32_t host_pvr
= mfpvr();
1774 PowerPCCPUClass
*pvr_pcc
;
1776 pvr_pcc
= ppc_cpu_class_by_pvr(host_pvr
);
1777 if (pvr_pcc
== NULL
) {
1780 type_info
.parent
= object_class_get_name(OBJECT_CLASS(pvr_pcc
));
1781 type_register(&type_info
);
1786 bool kvm_arch_stop_on_emulation_error(CPUState
*cpu
)
1791 int kvm_arch_on_sigbus_vcpu(CPUState
*cpu
, int code
, void *addr
)
1796 int kvm_arch_on_sigbus(int code
, void *addr
)
1801 void kvm_arch_init_irq_routing(KVMState
*s
)