2 * PowerPC implementation of KVM hooks
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
18 #include <sys/types.h>
19 #include <sys/ioctl.h>
23 #include <linux/kvm.h>
25 #include "qemu-common.h"
26 #include "qemu/timer.h"
27 #include "sysemu/sysemu.h"
28 #include "sysemu/kvm.h"
31 #include "sysemu/cpus.h"
32 #include "sysemu/device_tree.h"
33 #include "mmu-hash64.h"
35 #include "hw/sysbus.h"
36 #include "hw/ppc/spapr.h"
37 #include "hw/ppc/spapr_vio.h"
38 #include "sysemu/watchdog.h"
43 #define dprintf(fmt, ...) \
44 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
46 #define dprintf(fmt, ...) \
50 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
52 const KVMCapabilityInfo kvm_arch_required_capabilities
[] = {
56 static int cap_interrupt_unset
= false;
57 static int cap_interrupt_level
= false;
58 static int cap_segstate
;
59 static int cap_booke_sregs
;
60 static int cap_ppc_smt
;
61 static int cap_ppc_rma
;
62 static int cap_spapr_tce
;
64 static int cap_one_reg
;
66 static int cap_ppc_watchdog
;
69 /* XXX We have a race condition where we actually have a level triggered
70 * interrupt, but the infrastructure can't expose that yet, so the guest
71 * takes but ignores it, goes to sleep and never gets notified that there's
72 * still an interrupt pending.
74 * As a quick workaround, let's just wake up again 20 ms after we injected
75 * an interrupt. That way we can assure that we're always reinjecting
76 * interrupts in case the guest swallowed them.
78 static QEMUTimer
*idle_timer
;
80 static void kvm_kick_cpu(void *opaque
)
82 PowerPCCPU
*cpu
= opaque
;
84 qemu_cpu_kick(CPU(cpu
));
87 static int kvm_ppc_register_host_cpu_type(void);
89 int kvm_arch_init(KVMState
*s
)
91 cap_interrupt_unset
= kvm_check_extension(s
, KVM_CAP_PPC_UNSET_IRQ
);
92 cap_interrupt_level
= kvm_check_extension(s
, KVM_CAP_PPC_IRQ_LEVEL
);
93 cap_segstate
= kvm_check_extension(s
, KVM_CAP_PPC_SEGSTATE
);
94 cap_booke_sregs
= kvm_check_extension(s
, KVM_CAP_PPC_BOOKE_SREGS
);
95 cap_ppc_smt
= kvm_check_extension(s
, KVM_CAP_PPC_SMT
);
96 cap_ppc_rma
= kvm_check_extension(s
, KVM_CAP_PPC_RMA
);
97 cap_spapr_tce
= kvm_check_extension(s
, KVM_CAP_SPAPR_TCE
);
98 cap_one_reg
= kvm_check_extension(s
, KVM_CAP_ONE_REG
);
99 cap_hior
= kvm_check_extension(s
, KVM_CAP_PPC_HIOR
);
100 cap_epr
= kvm_check_extension(s
, KVM_CAP_PPC_EPR
);
101 cap_ppc_watchdog
= kvm_check_extension(s
, KVM_CAP_PPC_BOOKE_WATCHDOG
);
102 /* Note: we don't set cap_papr here, because this capability is
103 * only activated after this by kvmppc_set_papr() */
105 if (!cap_interrupt_level
) {
106 fprintf(stderr
, "KVM: Couldn't find level irq capability. Expect the "
107 "VM to stall at times!\n");
110 kvm_ppc_register_host_cpu_type();
115 static int kvm_arch_sync_sregs(PowerPCCPU
*cpu
)
117 CPUPPCState
*cenv
= &cpu
->env
;
118 CPUState
*cs
= CPU(cpu
);
119 struct kvm_sregs sregs
;
122 if (cenv
->excp_model
== POWERPC_EXCP_BOOKE
) {
123 /* What we're really trying to say is "if we're on BookE, we use
124 the native PVR for now". This is the only sane way to check
125 it though, so we potentially confuse users that they can run
126 BookE guests on BookS. Let's hope nobody dares enough :) */
130 fprintf(stderr
, "kvm error: missing PVR setting capability\n");
135 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_SREGS
, &sregs
);
140 sregs
.pvr
= cenv
->spr
[SPR_PVR
];
141 return kvm_vcpu_ioctl(cs
, KVM_SET_SREGS
, &sregs
);
144 /* Set up a shared TLB array with KVM */
145 static int kvm_booke206_tlb_init(PowerPCCPU
*cpu
)
147 CPUPPCState
*env
= &cpu
->env
;
148 CPUState
*cs
= CPU(cpu
);
149 struct kvm_book3e_206_tlb_params params
= {};
150 struct kvm_config_tlb cfg
= {};
151 struct kvm_enable_cap encap
= {};
152 unsigned int entries
= 0;
155 if (!kvm_enabled() ||
156 !kvm_check_extension(cs
->kvm_state
, KVM_CAP_SW_TLB
)) {
160 assert(ARRAY_SIZE(params
.tlb_sizes
) == BOOKE206_MAX_TLBN
);
162 for (i
= 0; i
< BOOKE206_MAX_TLBN
; i
++) {
163 params
.tlb_sizes
[i
] = booke206_tlb_size(env
, i
);
164 params
.tlb_ways
[i
] = booke206_tlb_ways(env
, i
);
165 entries
+= params
.tlb_sizes
[i
];
168 assert(entries
== env
->nb_tlb
);
169 assert(sizeof(struct kvm_book3e_206_tlb_entry
) == sizeof(ppcmas_tlb_t
));
171 env
->tlb_dirty
= true;
173 cfg
.array
= (uintptr_t)env
->tlb
.tlbm
;
174 cfg
.array_len
= sizeof(ppcmas_tlb_t
) * entries
;
175 cfg
.params
= (uintptr_t)¶ms
;
176 cfg
.mmu_type
= KVM_MMU_FSL_BOOKE_NOHV
;
178 encap
.cap
= KVM_CAP_SW_TLB
;
179 encap
.args
[0] = (uintptr_t)&cfg
;
181 ret
= kvm_vcpu_ioctl(cs
, KVM_ENABLE_CAP
, &encap
);
183 fprintf(stderr
, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
184 __func__
, strerror(-ret
));
188 env
->kvm_sw_tlb
= true;
193 #if defined(TARGET_PPC64)
194 static void kvm_get_fallback_smmu_info(PowerPCCPU
*cpu
,
195 struct kvm_ppc_smmu_info
*info
)
197 CPUPPCState
*env
= &cpu
->env
;
198 CPUState
*cs
= CPU(cpu
);
200 memset(info
, 0, sizeof(*info
));
202 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
203 * need to "guess" what the supported page sizes are.
205 * For that to work we make a few assumptions:
207 * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
208 * KVM which only supports 4K and 16M pages, but supports them
209 * regardless of the backing store characteritics. We also don't
210 * support 1T segments.
212 * This is safe as if HV KVM ever supports that capability or PR
213 * KVM grows supports for more page/segment sizes, those versions
214 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
215 * will not hit this fallback
217 * - Else we are running HV KVM. This means we only support page
218 * sizes that fit in the backing store. Additionally we only
219 * advertize 64K pages if the processor is ARCH 2.06 and we assume
220 * P7 encodings for the SLB and hash table. Here too, we assume
221 * support for any newer processor will mean a kernel that
222 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
225 if (kvm_check_extension(cs
->kvm_state
, KVM_CAP_PPC_GET_PVINFO
)) {
230 /* Standard 4k base page size segment */
231 info
->sps
[0].page_shift
= 12;
232 info
->sps
[0].slb_enc
= 0;
233 info
->sps
[0].enc
[0].page_shift
= 12;
234 info
->sps
[0].enc
[0].pte_enc
= 0;
236 /* Standard 16M large page size segment */
237 info
->sps
[1].page_shift
= 24;
238 info
->sps
[1].slb_enc
= SLB_VSID_L
;
239 info
->sps
[1].enc
[0].page_shift
= 24;
240 info
->sps
[1].enc
[0].pte_enc
= 0;
244 /* HV KVM has backing store size restrictions */
245 info
->flags
= KVM_PPC_PAGE_SIZES_REAL
;
247 if (env
->mmu_model
& POWERPC_MMU_1TSEG
) {
248 info
->flags
|= KVM_PPC_1T_SEGMENTS
;
251 if (env
->mmu_model
== POWERPC_MMU_2_06
) {
257 /* Standard 4k base page size segment */
258 info
->sps
[i
].page_shift
= 12;
259 info
->sps
[i
].slb_enc
= 0;
260 info
->sps
[i
].enc
[0].page_shift
= 12;
261 info
->sps
[i
].enc
[0].pte_enc
= 0;
264 /* 64K on MMU 2.06 */
265 if (env
->mmu_model
== POWERPC_MMU_2_06
) {
266 info
->sps
[i
].page_shift
= 16;
267 info
->sps
[i
].slb_enc
= 0x110;
268 info
->sps
[i
].enc
[0].page_shift
= 16;
269 info
->sps
[i
].enc
[0].pte_enc
= 1;
273 /* Standard 16M large page size segment */
274 info
->sps
[i
].page_shift
= 24;
275 info
->sps
[i
].slb_enc
= SLB_VSID_L
;
276 info
->sps
[i
].enc
[0].page_shift
= 24;
277 info
->sps
[i
].enc
[0].pte_enc
= 0;
281 static void kvm_get_smmu_info(PowerPCCPU
*cpu
, struct kvm_ppc_smmu_info
*info
)
283 CPUState
*cs
= CPU(cpu
);
286 if (kvm_check_extension(cs
->kvm_state
, KVM_CAP_PPC_GET_SMMU_INFO
)) {
287 ret
= kvm_vm_ioctl(cs
->kvm_state
, KVM_PPC_GET_SMMU_INFO
, info
);
293 kvm_get_fallback_smmu_info(cpu
, info
);
296 static long getrampagesize(void)
302 /* guest RAM is backed by normal anonymous pages */
303 return getpagesize();
307 ret
= statfs(mem_path
, &fs
);
308 } while (ret
!= 0 && errno
== EINTR
);
311 fprintf(stderr
, "Couldn't statfs() memory path: %s\n",
316 #define HUGETLBFS_MAGIC 0x958458f6
318 if (fs
.f_type
!= HUGETLBFS_MAGIC
) {
319 /* Explicit mempath, but it's ordinary pages */
320 return getpagesize();
323 /* It's hugepage, return the huge page size */
327 static bool kvm_valid_page_size(uint32_t flags
, long rampgsize
, uint32_t shift
)
329 if (!(flags
& KVM_PPC_PAGE_SIZES_REAL
)) {
333 return (1ul << shift
) <= rampgsize
;
336 static void kvm_fixup_page_sizes(PowerPCCPU
*cpu
)
338 static struct kvm_ppc_smmu_info smmu_info
;
339 static bool has_smmu_info
;
340 CPUPPCState
*env
= &cpu
->env
;
344 /* We only handle page sizes for 64-bit server guests for now */
345 if (!(env
->mmu_model
& POWERPC_MMU_64
)) {
349 /* Collect MMU info from kernel if not already */
350 if (!has_smmu_info
) {
351 kvm_get_smmu_info(cpu
, &smmu_info
);
352 has_smmu_info
= true;
355 rampagesize
= getrampagesize();
357 /* Convert to QEMU form */
358 memset(&env
->sps
, 0, sizeof(env
->sps
));
360 for (ik
= iq
= 0; ik
< KVM_PPC_PAGE_SIZES_MAX_SZ
; ik
++) {
361 struct ppc_one_seg_page_size
*qsps
= &env
->sps
.sps
[iq
];
362 struct kvm_ppc_one_seg_page_size
*ksps
= &smmu_info
.sps
[ik
];
364 if (!kvm_valid_page_size(smmu_info
.flags
, rampagesize
,
368 qsps
->page_shift
= ksps
->page_shift
;
369 qsps
->slb_enc
= ksps
->slb_enc
;
370 for (jk
= jq
= 0; jk
< KVM_PPC_PAGE_SIZES_MAX_SZ
; jk
++) {
371 if (!kvm_valid_page_size(smmu_info
.flags
, rampagesize
,
372 ksps
->enc
[jk
].page_shift
)) {
375 qsps
->enc
[jq
].page_shift
= ksps
->enc
[jk
].page_shift
;
376 qsps
->enc
[jq
].pte_enc
= ksps
->enc
[jk
].pte_enc
;
377 if (++jq
>= PPC_PAGE_SIZES_MAX_SZ
) {
381 if (++iq
>= PPC_PAGE_SIZES_MAX_SZ
) {
385 env
->slb_nr
= smmu_info
.slb_size
;
386 if (smmu_info
.flags
& KVM_PPC_1T_SEGMENTS
) {
387 env
->mmu_model
|= POWERPC_MMU_1TSEG
;
389 env
->mmu_model
&= ~POWERPC_MMU_1TSEG
;
392 #else /* defined (TARGET_PPC64) */
394 static inline void kvm_fixup_page_sizes(PowerPCCPU
*cpu
)
398 #endif /* !defined (TARGET_PPC64) */
400 unsigned long kvm_arch_vcpu_id(CPUState
*cpu
)
402 return cpu
->cpu_index
;
405 int kvm_arch_init_vcpu(CPUState
*cs
)
407 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
408 CPUPPCState
*cenv
= &cpu
->env
;
411 /* Gather server mmu info from KVM and update the CPU state */
412 kvm_fixup_page_sizes(cpu
);
414 /* Synchronize sregs with kvm */
415 ret
= kvm_arch_sync_sregs(cpu
);
420 idle_timer
= qemu_new_timer_ns(vm_clock
, kvm_kick_cpu
, cpu
);
422 /* Some targets support access to KVM's guest TLB. */
423 switch (cenv
->mmu_model
) {
424 case POWERPC_MMU_BOOKE206
:
425 ret
= kvm_booke206_tlb_init(cpu
);
434 void kvm_arch_reset_vcpu(CPUState
*cpu
)
438 static void kvm_sw_tlb_put(PowerPCCPU
*cpu
)
440 CPUPPCState
*env
= &cpu
->env
;
441 CPUState
*cs
= CPU(cpu
);
442 struct kvm_dirty_tlb dirty_tlb
;
443 unsigned char *bitmap
;
446 if (!env
->kvm_sw_tlb
) {
450 bitmap
= g_malloc((env
->nb_tlb
+ 7) / 8);
451 memset(bitmap
, 0xFF, (env
->nb_tlb
+ 7) / 8);
453 dirty_tlb
.bitmap
= (uintptr_t)bitmap
;
454 dirty_tlb
.num_dirty
= env
->nb_tlb
;
456 ret
= kvm_vcpu_ioctl(cs
, KVM_DIRTY_TLB
, &dirty_tlb
);
458 fprintf(stderr
, "%s: KVM_DIRTY_TLB: %s\n",
459 __func__
, strerror(-ret
));
465 static void kvm_get_one_spr(CPUState
*cs
, uint64_t id
, int spr
)
467 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
468 CPUPPCState
*env
= &cpu
->env
;
473 struct kvm_one_reg reg
= {
475 .addr
= (uintptr_t) &val
,
479 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
481 fprintf(stderr
, "Warning: Unable to retrieve SPR %d from KVM: %s\n",
482 spr
, strerror(errno
));
484 switch (id
& KVM_REG_SIZE_MASK
) {
485 case KVM_REG_SIZE_U32
:
486 env
->spr
[spr
] = val
.u32
;
489 case KVM_REG_SIZE_U64
:
490 env
->spr
[spr
] = val
.u64
;
494 /* Don't handle this size yet */
500 static void kvm_put_one_spr(CPUState
*cs
, uint64_t id
, int spr
)
502 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
503 CPUPPCState
*env
= &cpu
->env
;
508 struct kvm_one_reg reg
= {
510 .addr
= (uintptr_t) &val
,
514 switch (id
& KVM_REG_SIZE_MASK
) {
515 case KVM_REG_SIZE_U32
:
516 val
.u32
= env
->spr
[spr
];
519 case KVM_REG_SIZE_U64
:
520 val
.u64
= env
->spr
[spr
];
524 /* Don't handle this size yet */
528 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
530 fprintf(stderr
, "Warning: Unable to set SPR %d to KVM: %s\n",
531 spr
, strerror(errno
));
535 static int kvm_put_fp(CPUState
*cs
)
537 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
538 CPUPPCState
*env
= &cpu
->env
;
539 struct kvm_one_reg reg
;
543 if (env
->insns_flags
& PPC_FLOAT
) {
544 uint64_t fpscr
= env
->fpscr
;
545 bool vsx
= !!(env
->insns_flags2
& PPC2_VSX
);
547 reg
.id
= KVM_REG_PPC_FPSCR
;
548 reg
.addr
= (uintptr_t)&fpscr
;
549 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
551 dprintf("Unable to set FPSCR to KVM: %s\n", strerror(errno
));
555 for (i
= 0; i
< 32; i
++) {
558 vsr
[0] = float64_val(env
->fpr
[i
]);
559 vsr
[1] = env
->vsr
[i
];
560 reg
.addr
= (uintptr_t) &vsr
;
561 reg
.id
= vsx
? KVM_REG_PPC_VSR(i
) : KVM_REG_PPC_FPR(i
);
563 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
565 dprintf("Unable to set %s%d to KVM: %s\n", vsx
? "VSR" : "FPR",
572 if (env
->insns_flags
& PPC_ALTIVEC
) {
573 reg
.id
= KVM_REG_PPC_VSCR
;
574 reg
.addr
= (uintptr_t)&env
->vscr
;
575 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
577 dprintf("Unable to set VSCR to KVM: %s\n", strerror(errno
));
581 for (i
= 0; i
< 32; i
++) {
582 reg
.id
= KVM_REG_PPC_VR(i
);
583 reg
.addr
= (uintptr_t)&env
->avr
[i
];
584 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
586 dprintf("Unable to set VR%d to KVM: %s\n", i
, strerror(errno
));
595 static int kvm_get_fp(CPUState
*cs
)
597 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
598 CPUPPCState
*env
= &cpu
->env
;
599 struct kvm_one_reg reg
;
603 if (env
->insns_flags
& PPC_FLOAT
) {
605 bool vsx
= !!(env
->insns_flags2
& PPC2_VSX
);
607 reg
.id
= KVM_REG_PPC_FPSCR
;
608 reg
.addr
= (uintptr_t)&fpscr
;
609 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
611 dprintf("Unable to get FPSCR from KVM: %s\n", strerror(errno
));
617 for (i
= 0; i
< 32; i
++) {
620 reg
.addr
= (uintptr_t) &vsr
;
621 reg
.id
= vsx
? KVM_REG_PPC_VSR(i
) : KVM_REG_PPC_FPR(i
);
623 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
625 dprintf("Unable to get %s%d from KVM: %s\n",
626 vsx
? "VSR" : "FPR", i
, strerror(errno
));
629 env
->fpr
[i
] = vsr
[0];
631 env
->vsr
[i
] = vsr
[1];
637 if (env
->insns_flags
& PPC_ALTIVEC
) {
638 reg
.id
= KVM_REG_PPC_VSCR
;
639 reg
.addr
= (uintptr_t)&env
->vscr
;
640 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
642 dprintf("Unable to get VSCR from KVM: %s\n", strerror(errno
));
646 for (i
= 0; i
< 32; i
++) {
647 reg
.id
= KVM_REG_PPC_VR(i
);
648 reg
.addr
= (uintptr_t)&env
->avr
[i
];
649 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
651 dprintf("Unable to get VR%d from KVM: %s\n",
661 #if defined(TARGET_PPC64)
662 static int kvm_get_vpa(CPUState
*cs
)
664 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
665 CPUPPCState
*env
= &cpu
->env
;
666 struct kvm_one_reg reg
;
669 reg
.id
= KVM_REG_PPC_VPA_ADDR
;
670 reg
.addr
= (uintptr_t)&env
->vpa_addr
;
671 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
673 dprintf("Unable to get VPA address from KVM: %s\n", strerror(errno
));
677 assert((uintptr_t)&env
->slb_shadow_size
678 == ((uintptr_t)&env
->slb_shadow_addr
+ 8));
679 reg
.id
= KVM_REG_PPC_VPA_SLB
;
680 reg
.addr
= (uintptr_t)&env
->slb_shadow_addr
;
681 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
683 dprintf("Unable to get SLB shadow state from KVM: %s\n",
688 assert((uintptr_t)&env
->dtl_size
== ((uintptr_t)&env
->dtl_addr
+ 8));
689 reg
.id
= KVM_REG_PPC_VPA_DTL
;
690 reg
.addr
= (uintptr_t)&env
->dtl_addr
;
691 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
693 dprintf("Unable to get dispatch trace log state from KVM: %s\n",
701 static int kvm_put_vpa(CPUState
*cs
)
703 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
704 CPUPPCState
*env
= &cpu
->env
;
705 struct kvm_one_reg reg
;
708 /* SLB shadow or DTL can't be registered unless a master VPA is
709 * registered. That means when restoring state, if a VPA *is*
710 * registered, we need to set that up first. If not, we need to
711 * deregister the others before deregistering the master VPA */
712 assert(env
->vpa_addr
|| !(env
->slb_shadow_addr
|| env
->dtl_addr
));
715 reg
.id
= KVM_REG_PPC_VPA_ADDR
;
716 reg
.addr
= (uintptr_t)&env
->vpa_addr
;
717 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
719 dprintf("Unable to set VPA address to KVM: %s\n", strerror(errno
));
724 assert((uintptr_t)&env
->slb_shadow_size
725 == ((uintptr_t)&env
->slb_shadow_addr
+ 8));
726 reg
.id
= KVM_REG_PPC_VPA_SLB
;
727 reg
.addr
= (uintptr_t)&env
->slb_shadow_addr
;
728 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
730 dprintf("Unable to set SLB shadow state to KVM: %s\n", strerror(errno
));
734 assert((uintptr_t)&env
->dtl_size
== ((uintptr_t)&env
->dtl_addr
+ 8));
735 reg
.id
= KVM_REG_PPC_VPA_DTL
;
736 reg
.addr
= (uintptr_t)&env
->dtl_addr
;
737 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
739 dprintf("Unable to set dispatch trace log state to KVM: %s\n",
744 if (!env
->vpa_addr
) {
745 reg
.id
= KVM_REG_PPC_VPA_ADDR
;
746 reg
.addr
= (uintptr_t)&env
->vpa_addr
;
747 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
749 dprintf("Unable to set VPA address to KVM: %s\n", strerror(errno
));
756 #endif /* TARGET_PPC64 */
758 int kvm_arch_put_registers(CPUState
*cs
, int level
)
760 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
761 CPUPPCState
*env
= &cpu
->env
;
762 struct kvm_regs regs
;
766 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_REGS
, ®s
);
773 regs
.xer
= cpu_read_xer(env
);
777 regs
.srr0
= env
->spr
[SPR_SRR0
];
778 regs
.srr1
= env
->spr
[SPR_SRR1
];
780 regs
.sprg0
= env
->spr
[SPR_SPRG0
];
781 regs
.sprg1
= env
->spr
[SPR_SPRG1
];
782 regs
.sprg2
= env
->spr
[SPR_SPRG2
];
783 regs
.sprg3
= env
->spr
[SPR_SPRG3
];
784 regs
.sprg4
= env
->spr
[SPR_SPRG4
];
785 regs
.sprg5
= env
->spr
[SPR_SPRG5
];
786 regs
.sprg6
= env
->spr
[SPR_SPRG6
];
787 regs
.sprg7
= env
->spr
[SPR_SPRG7
];
789 regs
.pid
= env
->spr
[SPR_BOOKE_PID
];
791 for (i
= 0;i
< 32; i
++)
792 regs
.gpr
[i
] = env
->gpr
[i
];
795 for (i
= 0; i
< 8; i
++) {
796 regs
.cr
|= (env
->crf
[i
] & 15) << (4 * (7 - i
));
799 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_REGS
, ®s
);
805 if (env
->tlb_dirty
) {
807 env
->tlb_dirty
= false;
810 if (cap_segstate
&& (level
>= KVM_PUT_RESET_STATE
)) {
811 struct kvm_sregs sregs
;
813 sregs
.pvr
= env
->spr
[SPR_PVR
];
815 sregs
.u
.s
.sdr1
= env
->spr
[SPR_SDR1
];
819 for (i
= 0; i
< 64; i
++) {
820 sregs
.u
.s
.ppc64
.slb
[i
].slbe
= env
->slb
[i
].esid
;
821 sregs
.u
.s
.ppc64
.slb
[i
].slbv
= env
->slb
[i
].vsid
;
826 for (i
= 0; i
< 16; i
++) {
827 sregs
.u
.s
.ppc32
.sr
[i
] = env
->sr
[i
];
831 for (i
= 0; i
< 8; i
++) {
832 /* Beware. We have to swap upper and lower bits here */
833 sregs
.u
.s
.ppc32
.dbat
[i
] = ((uint64_t)env
->DBAT
[0][i
] << 32)
835 sregs
.u
.s
.ppc32
.ibat
[i
] = ((uint64_t)env
->IBAT
[0][i
] << 32)
839 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_SREGS
, &sregs
);
845 if (cap_hior
&& (level
>= KVM_PUT_RESET_STATE
)) {
846 kvm_put_one_spr(cs
, KVM_REG_PPC_HIOR
, SPR_HIOR
);
852 /* We deliberately ignore errors here, for kernels which have
853 * the ONE_REG calls, but don't support the specific
854 * registers, there's a reasonable chance things will still
855 * work, at least until we try to migrate. */
856 for (i
= 0; i
< 1024; i
++) {
857 uint64_t id
= env
->spr_cb
[i
].one_reg_id
;
860 kvm_put_one_spr(cs
, id
, i
);
866 if (kvm_put_vpa(cs
) < 0) {
867 dprintf("Warning: Unable to set VPA information to KVM\n");
870 #endif /* TARGET_PPC64 */
876 int kvm_arch_get_registers(CPUState
*cs
)
878 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
879 CPUPPCState
*env
= &cpu
->env
;
880 struct kvm_regs regs
;
881 struct kvm_sregs sregs
;
885 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_REGS
, ®s
);
890 for (i
= 7; i
>= 0; i
--) {
891 env
->crf
[i
] = cr
& 15;
897 cpu_write_xer(env
, regs
.xer
);
901 env
->spr
[SPR_SRR0
] = regs
.srr0
;
902 env
->spr
[SPR_SRR1
] = regs
.srr1
;
904 env
->spr
[SPR_SPRG0
] = regs
.sprg0
;
905 env
->spr
[SPR_SPRG1
] = regs
.sprg1
;
906 env
->spr
[SPR_SPRG2
] = regs
.sprg2
;
907 env
->spr
[SPR_SPRG3
] = regs
.sprg3
;
908 env
->spr
[SPR_SPRG4
] = regs
.sprg4
;
909 env
->spr
[SPR_SPRG5
] = regs
.sprg5
;
910 env
->spr
[SPR_SPRG6
] = regs
.sprg6
;
911 env
->spr
[SPR_SPRG7
] = regs
.sprg7
;
913 env
->spr
[SPR_BOOKE_PID
] = regs
.pid
;
915 for (i
= 0;i
< 32; i
++)
916 env
->gpr
[i
] = regs
.gpr
[i
];
920 if (cap_booke_sregs
) {
921 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_SREGS
, &sregs
);
926 if (sregs
.u
.e
.features
& KVM_SREGS_E_BASE
) {
927 env
->spr
[SPR_BOOKE_CSRR0
] = sregs
.u
.e
.csrr0
;
928 env
->spr
[SPR_BOOKE_CSRR1
] = sregs
.u
.e
.csrr1
;
929 env
->spr
[SPR_BOOKE_ESR
] = sregs
.u
.e
.esr
;
930 env
->spr
[SPR_BOOKE_DEAR
] = sregs
.u
.e
.dear
;
931 env
->spr
[SPR_BOOKE_MCSR
] = sregs
.u
.e
.mcsr
;
932 env
->spr
[SPR_BOOKE_TSR
] = sregs
.u
.e
.tsr
;
933 env
->spr
[SPR_BOOKE_TCR
] = sregs
.u
.e
.tcr
;
934 env
->spr
[SPR_DECR
] = sregs
.u
.e
.dec
;
935 env
->spr
[SPR_TBL
] = sregs
.u
.e
.tb
& 0xffffffff;
936 env
->spr
[SPR_TBU
] = sregs
.u
.e
.tb
>> 32;
937 env
->spr
[SPR_VRSAVE
] = sregs
.u
.e
.vrsave
;
940 if (sregs
.u
.e
.features
& KVM_SREGS_E_ARCH206
) {
941 env
->spr
[SPR_BOOKE_PIR
] = sregs
.u
.e
.pir
;
942 env
->spr
[SPR_BOOKE_MCSRR0
] = sregs
.u
.e
.mcsrr0
;
943 env
->spr
[SPR_BOOKE_MCSRR1
] = sregs
.u
.e
.mcsrr1
;
944 env
->spr
[SPR_BOOKE_DECAR
] = sregs
.u
.e
.decar
;
945 env
->spr
[SPR_BOOKE_IVPR
] = sregs
.u
.e
.ivpr
;
948 if (sregs
.u
.e
.features
& KVM_SREGS_E_64
) {
949 env
->spr
[SPR_BOOKE_EPCR
] = sregs
.u
.e
.epcr
;
952 if (sregs
.u
.e
.features
& KVM_SREGS_E_SPRG8
) {
953 env
->spr
[SPR_BOOKE_SPRG8
] = sregs
.u
.e
.sprg8
;
956 if (sregs
.u
.e
.features
& KVM_SREGS_E_IVOR
) {
957 env
->spr
[SPR_BOOKE_IVOR0
] = sregs
.u
.e
.ivor_low
[0];
958 env
->spr
[SPR_BOOKE_IVOR1
] = sregs
.u
.e
.ivor_low
[1];
959 env
->spr
[SPR_BOOKE_IVOR2
] = sregs
.u
.e
.ivor_low
[2];
960 env
->spr
[SPR_BOOKE_IVOR3
] = sregs
.u
.e
.ivor_low
[3];
961 env
->spr
[SPR_BOOKE_IVOR4
] = sregs
.u
.e
.ivor_low
[4];
962 env
->spr
[SPR_BOOKE_IVOR5
] = sregs
.u
.e
.ivor_low
[5];
963 env
->spr
[SPR_BOOKE_IVOR6
] = sregs
.u
.e
.ivor_low
[6];
964 env
->spr
[SPR_BOOKE_IVOR7
] = sregs
.u
.e
.ivor_low
[7];
965 env
->spr
[SPR_BOOKE_IVOR8
] = sregs
.u
.e
.ivor_low
[8];
966 env
->spr
[SPR_BOOKE_IVOR9
] = sregs
.u
.e
.ivor_low
[9];
967 env
->spr
[SPR_BOOKE_IVOR10
] = sregs
.u
.e
.ivor_low
[10];
968 env
->spr
[SPR_BOOKE_IVOR11
] = sregs
.u
.e
.ivor_low
[11];
969 env
->spr
[SPR_BOOKE_IVOR12
] = sregs
.u
.e
.ivor_low
[12];
970 env
->spr
[SPR_BOOKE_IVOR13
] = sregs
.u
.e
.ivor_low
[13];
971 env
->spr
[SPR_BOOKE_IVOR14
] = sregs
.u
.e
.ivor_low
[14];
972 env
->spr
[SPR_BOOKE_IVOR15
] = sregs
.u
.e
.ivor_low
[15];
974 if (sregs
.u
.e
.features
& KVM_SREGS_E_SPE
) {
975 env
->spr
[SPR_BOOKE_IVOR32
] = sregs
.u
.e
.ivor_high
[0];
976 env
->spr
[SPR_BOOKE_IVOR33
] = sregs
.u
.e
.ivor_high
[1];
977 env
->spr
[SPR_BOOKE_IVOR34
] = sregs
.u
.e
.ivor_high
[2];
980 if (sregs
.u
.e
.features
& KVM_SREGS_E_PM
) {
981 env
->spr
[SPR_BOOKE_IVOR35
] = sregs
.u
.e
.ivor_high
[3];
984 if (sregs
.u
.e
.features
& KVM_SREGS_E_PC
) {
985 env
->spr
[SPR_BOOKE_IVOR36
] = sregs
.u
.e
.ivor_high
[4];
986 env
->spr
[SPR_BOOKE_IVOR37
] = sregs
.u
.e
.ivor_high
[5];
990 if (sregs
.u
.e
.features
& KVM_SREGS_E_ARCH206_MMU
) {
991 env
->spr
[SPR_BOOKE_MAS0
] = sregs
.u
.e
.mas0
;
992 env
->spr
[SPR_BOOKE_MAS1
] = sregs
.u
.e
.mas1
;
993 env
->spr
[SPR_BOOKE_MAS2
] = sregs
.u
.e
.mas2
;
994 env
->spr
[SPR_BOOKE_MAS3
] = sregs
.u
.e
.mas7_3
& 0xffffffff;
995 env
->spr
[SPR_BOOKE_MAS4
] = sregs
.u
.e
.mas4
;
996 env
->spr
[SPR_BOOKE_MAS6
] = sregs
.u
.e
.mas6
;
997 env
->spr
[SPR_BOOKE_MAS7
] = sregs
.u
.e
.mas7_3
>> 32;
998 env
->spr
[SPR_MMUCFG
] = sregs
.u
.e
.mmucfg
;
999 env
->spr
[SPR_BOOKE_TLB0CFG
] = sregs
.u
.e
.tlbcfg
[0];
1000 env
->spr
[SPR_BOOKE_TLB1CFG
] = sregs
.u
.e
.tlbcfg
[1];
1003 if (sregs
.u
.e
.features
& KVM_SREGS_EXP
) {
1004 env
->spr
[SPR_BOOKE_EPR
] = sregs
.u
.e
.epr
;
1007 if (sregs
.u
.e
.features
& KVM_SREGS_E_PD
) {
1008 env
->spr
[SPR_BOOKE_EPLC
] = sregs
.u
.e
.eplc
;
1009 env
->spr
[SPR_BOOKE_EPSC
] = sregs
.u
.e
.epsc
;
1012 if (sregs
.u
.e
.impl_id
== KVM_SREGS_E_IMPL_FSL
) {
1013 env
->spr
[SPR_E500_SVR
] = sregs
.u
.e
.impl
.fsl
.svr
;
1014 env
->spr
[SPR_Exxx_MCAR
] = sregs
.u
.e
.impl
.fsl
.mcar
;
1015 env
->spr
[SPR_HID0
] = sregs
.u
.e
.impl
.fsl
.hid0
;
1017 if (sregs
.u
.e
.impl
.fsl
.features
& KVM_SREGS_E_FSL_PIDn
) {
1018 env
->spr
[SPR_BOOKE_PID1
] = sregs
.u
.e
.impl
.fsl
.pid1
;
1019 env
->spr
[SPR_BOOKE_PID2
] = sregs
.u
.e
.impl
.fsl
.pid2
;
1025 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_SREGS
, &sregs
);
1030 ppc_store_sdr1(env
, sregs
.u
.s
.sdr1
);
1034 for (i
= 0; i
< 64; i
++) {
1035 ppc_store_slb(env
, sregs
.u
.s
.ppc64
.slb
[i
].slbe
,
1036 sregs
.u
.s
.ppc64
.slb
[i
].slbv
);
1041 for (i
= 0; i
< 16; i
++) {
1042 env
->sr
[i
] = sregs
.u
.s
.ppc32
.sr
[i
];
1046 for (i
= 0; i
< 8; i
++) {
1047 env
->DBAT
[0][i
] = sregs
.u
.s
.ppc32
.dbat
[i
] & 0xffffffff;
1048 env
->DBAT
[1][i
] = sregs
.u
.s
.ppc32
.dbat
[i
] >> 32;
1049 env
->IBAT
[0][i
] = sregs
.u
.s
.ppc32
.ibat
[i
] & 0xffffffff;
1050 env
->IBAT
[1][i
] = sregs
.u
.s
.ppc32
.ibat
[i
] >> 32;
1055 kvm_get_one_spr(cs
, KVM_REG_PPC_HIOR
, SPR_HIOR
);
1061 /* We deliberately ignore errors here, for kernels which have
1062 * the ONE_REG calls, but don't support the specific
1063 * registers, there's a reasonable chance things will still
1064 * work, at least until we try to migrate. */
1065 for (i
= 0; i
< 1024; i
++) {
1066 uint64_t id
= env
->spr_cb
[i
].one_reg_id
;
1069 kvm_get_one_spr(cs
, id
, i
);
1075 if (kvm_get_vpa(cs
) < 0) {
1076 dprintf("Warning: Unable to get VPA information from KVM\n");
1085 int kvmppc_set_interrupt(PowerPCCPU
*cpu
, int irq
, int level
)
1087 unsigned virq
= level
? KVM_INTERRUPT_SET_LEVEL
: KVM_INTERRUPT_UNSET
;
1089 if (irq
!= PPC_INTERRUPT_EXT
) {
1093 if (!kvm_enabled() || !cap_interrupt_unset
|| !cap_interrupt_level
) {
1097 kvm_vcpu_ioctl(CPU(cpu
), KVM_INTERRUPT
, &virq
);
1102 #if defined(TARGET_PPCEMB)
1103 #define PPC_INPUT_INT PPC40x_INPUT_INT
1104 #elif defined(TARGET_PPC64)
1105 #define PPC_INPUT_INT PPC970_INPUT_INT
1107 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1110 void kvm_arch_pre_run(CPUState
*cs
, struct kvm_run
*run
)
1112 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
1113 CPUPPCState
*env
= &cpu
->env
;
1117 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1118 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1119 if (!cap_interrupt_level
&&
1120 run
->ready_for_interrupt_injection
&&
1121 (cs
->interrupt_request
& CPU_INTERRUPT_HARD
) &&
1122 (env
->irq_input_state
& (1<<PPC_INPUT_INT
)))
1124 /* For now KVM disregards the 'irq' argument. However, in the
1125 * future KVM could cache it in-kernel to avoid a heavyweight exit
1126 * when reading the UIC.
1128 irq
= KVM_INTERRUPT_SET
;
1130 dprintf("injected interrupt %d\n", irq
);
1131 r
= kvm_vcpu_ioctl(cs
, KVM_INTERRUPT
, &irq
);
1133 printf("cpu %d fail inject %x\n", cs
->cpu_index
, irq
);
1136 /* Always wake up soon in case the interrupt was level based */
1137 qemu_mod_timer(idle_timer
, qemu_get_clock_ns(vm_clock
) +
1138 (get_ticks_per_sec() / 50));
1141 /* We don't know if there are more interrupts pending after this. However,
1142 * the guest will return to userspace in the course of handling this one
1143 * anyways, so we will get a chance to deliver the rest. */
1146 void kvm_arch_post_run(CPUState
*cpu
, struct kvm_run
*run
)
1150 int kvm_arch_process_async_events(CPUState
*cs
)
1155 static int kvmppc_handle_halt(PowerPCCPU
*cpu
)
1157 CPUState
*cs
= CPU(cpu
);
1158 CPUPPCState
*env
= &cpu
->env
;
1160 if (!(cs
->interrupt_request
& CPU_INTERRUPT_HARD
) && (msr_ee
)) {
1162 env
->exception_index
= EXCP_HLT
;
1168 /* map dcr access to existing qemu dcr emulation */
1169 static int kvmppc_handle_dcr_read(CPUPPCState
*env
, uint32_t dcrn
, uint32_t *data
)
1171 if (ppc_dcr_read(env
->dcr_env
, dcrn
, data
) < 0)
1172 fprintf(stderr
, "Read to unhandled DCR (0x%x)\n", dcrn
);
1177 static int kvmppc_handle_dcr_write(CPUPPCState
*env
, uint32_t dcrn
, uint32_t data
)
1179 if (ppc_dcr_write(env
->dcr_env
, dcrn
, data
) < 0)
1180 fprintf(stderr
, "Write to unhandled DCR (0x%x)\n", dcrn
);
1185 int kvm_arch_handle_exit(CPUState
*cs
, struct kvm_run
*run
)
1187 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
1188 CPUPPCState
*env
= &cpu
->env
;
1191 switch (run
->exit_reason
) {
1193 if (run
->dcr
.is_write
) {
1194 dprintf("handle dcr write\n");
1195 ret
= kvmppc_handle_dcr_write(env
, run
->dcr
.dcrn
, run
->dcr
.data
);
1197 dprintf("handle dcr read\n");
1198 ret
= kvmppc_handle_dcr_read(env
, run
->dcr
.dcrn
, &run
->dcr
.data
);
1202 dprintf("handle halt\n");
1203 ret
= kvmppc_handle_halt(cpu
);
1205 #if defined(TARGET_PPC64)
1206 case KVM_EXIT_PAPR_HCALL
:
1207 dprintf("handle PAPR hypercall\n");
1208 run
->papr_hcall
.ret
= spapr_hypercall(cpu
,
1210 run
->papr_hcall
.args
);
1215 dprintf("handle epr\n");
1216 run
->epr
.epr
= ldl_phys(env
->mpic_iack
);
1219 case KVM_EXIT_WATCHDOG
:
1220 dprintf("handle watchdog expiry\n");
1221 watchdog_perform_action();
1226 fprintf(stderr
, "KVM: unknown exit reason %d\n", run
->exit_reason
);
1234 int kvmppc_or_tsr_bits(PowerPCCPU
*cpu
, uint32_t tsr_bits
)
1236 CPUState
*cs
= CPU(cpu
);
1237 uint32_t bits
= tsr_bits
;
1238 struct kvm_one_reg reg
= {
1239 .id
= KVM_REG_PPC_OR_TSR
,
1240 .addr
= (uintptr_t) &bits
,
1243 return kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
1246 int kvmppc_clear_tsr_bits(PowerPCCPU
*cpu
, uint32_t tsr_bits
)
1249 CPUState
*cs
= CPU(cpu
);
1250 uint32_t bits
= tsr_bits
;
1251 struct kvm_one_reg reg
= {
1252 .id
= KVM_REG_PPC_CLEAR_TSR
,
1253 .addr
= (uintptr_t) &bits
,
1256 return kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
1259 int kvmppc_set_tcr(PowerPCCPU
*cpu
)
1261 CPUState
*cs
= CPU(cpu
);
1262 CPUPPCState
*env
= &cpu
->env
;
1263 uint32_t tcr
= env
->spr
[SPR_BOOKE_TCR
];
1265 struct kvm_one_reg reg
= {
1266 .id
= KVM_REG_PPC_TCR
,
1267 .addr
= (uintptr_t) &tcr
,
1270 return kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
1273 int kvmppc_booke_watchdog_enable(PowerPCCPU
*cpu
)
1275 CPUState
*cs
= CPU(cpu
);
1276 struct kvm_enable_cap encap
= {};
1279 if (!kvm_enabled()) {
1283 if (!cap_ppc_watchdog
) {
1284 printf("warning: KVM does not support watchdog");
1288 encap
.cap
= KVM_CAP_PPC_BOOKE_WATCHDOG
;
1289 ret
= kvm_vcpu_ioctl(cs
, KVM_ENABLE_CAP
, &encap
);
1291 fprintf(stderr
, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1292 __func__
, strerror(-ret
));
1299 static int read_cpuinfo(const char *field
, char *value
, int len
)
1303 int field_len
= strlen(field
);
1306 f
= fopen("/proc/cpuinfo", "r");
1312 if(!fgets(line
, sizeof(line
), f
)) {
1315 if (!strncmp(line
, field
, field_len
)) {
1316 pstrcpy(value
, len
, line
);
1327 uint32_t kvmppc_get_tbfreq(void)
1331 uint32_t retval
= get_ticks_per_sec();
1333 if (read_cpuinfo("timebase", line
, sizeof(line
))) {
1337 if (!(ns
= strchr(line
, ':'))) {
1347 /* Try to find a device tree node for a CPU with clock-frequency property */
1348 static int kvmppc_find_cpu_dt(char *buf
, int buf_len
)
1350 struct dirent
*dirp
;
1353 if ((dp
= opendir(PROC_DEVTREE_CPU
)) == NULL
) {
1354 printf("Can't open directory " PROC_DEVTREE_CPU
"\n");
1359 while ((dirp
= readdir(dp
)) != NULL
) {
1361 snprintf(buf
, buf_len
, "%s%s/clock-frequency", PROC_DEVTREE_CPU
,
1363 f
= fopen(buf
, "r");
1365 snprintf(buf
, buf_len
, "%s%s", PROC_DEVTREE_CPU
, dirp
->d_name
);
1372 if (buf
[0] == '\0') {
1373 printf("Unknown host!\n");
1380 /* Read a CPU node property from the host device tree that's a single
1381 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1382 * (can't find or open the property, or doesn't understand the
1384 static uint64_t kvmppc_read_int_cpu_dt(const char *propname
)
1394 if (kvmppc_find_cpu_dt(buf
, sizeof(buf
))) {
1398 strncat(buf
, "/", sizeof(buf
) - strlen(buf
));
1399 strncat(buf
, propname
, sizeof(buf
) - strlen(buf
));
1401 f
= fopen(buf
, "rb");
1406 len
= fread(&u
, 1, sizeof(u
), f
);
1410 /* property is a 32-bit quantity */
1411 return be32_to_cpu(u
.v32
);
1413 return be64_to_cpu(u
.v64
);
1419 uint64_t kvmppc_get_clockfreq(void)
1421 return kvmppc_read_int_cpu_dt("clock-frequency");
1424 uint32_t kvmppc_get_vmx(void)
1426 return kvmppc_read_int_cpu_dt("ibm,vmx");
1429 uint32_t kvmppc_get_dfp(void)
1431 return kvmppc_read_int_cpu_dt("ibm,dfp");
1434 static int kvmppc_get_pvinfo(CPUPPCState
*env
, struct kvm_ppc_pvinfo
*pvinfo
)
1436 PowerPCCPU
*cpu
= ppc_env_get_cpu(env
);
1437 CPUState
*cs
= CPU(cpu
);
1439 if (kvm_check_extension(cs
->kvm_state
, KVM_CAP_PPC_GET_PVINFO
) &&
1440 !kvm_vm_ioctl(cs
->kvm_state
, KVM_PPC_GET_PVINFO
, pvinfo
)) {
1447 int kvmppc_get_hasidle(CPUPPCState
*env
)
1449 struct kvm_ppc_pvinfo pvinfo
;
1451 if (!kvmppc_get_pvinfo(env
, &pvinfo
) &&
1452 (pvinfo
.flags
& KVM_PPC_PVINFO_FLAGS_EV_IDLE
)) {
1459 int kvmppc_get_hypercall(CPUPPCState
*env
, uint8_t *buf
, int buf_len
)
1461 uint32_t *hc
= (uint32_t*)buf
;
1462 struct kvm_ppc_pvinfo pvinfo
;
1464 if (!kvmppc_get_pvinfo(env
, &pvinfo
)) {
1465 memcpy(buf
, pvinfo
.hcall
, buf_len
);
1470 * Fallback to always fail hypercalls:
1486 void kvmppc_set_papr(PowerPCCPU
*cpu
)
1488 CPUPPCState
*env
= &cpu
->env
;
1489 CPUState
*cs
= CPU(cpu
);
1490 struct kvm_enable_cap cap
= {};
1493 cap
.cap
= KVM_CAP_PPC_PAPR
;
1494 ret
= kvm_vcpu_ioctl(cs
, KVM_ENABLE_CAP
, &cap
);
1497 cpu_abort(env
, "This KVM version does not support PAPR\n");
1500 /* Update the capability flag so we sync the right information
1505 void kvmppc_set_mpic_proxy(PowerPCCPU
*cpu
, int mpic_proxy
)
1507 CPUPPCState
*env
= &cpu
->env
;
1508 CPUState
*cs
= CPU(cpu
);
1509 struct kvm_enable_cap cap
= {};
1512 cap
.cap
= KVM_CAP_PPC_EPR
;
1513 cap
.args
[0] = mpic_proxy
;
1514 ret
= kvm_vcpu_ioctl(cs
, KVM_ENABLE_CAP
, &cap
);
1516 if (ret
&& mpic_proxy
) {
1517 cpu_abort(env
, "This KVM version does not support EPR\n");
1521 int kvmppc_smt_threads(void)
1523 return cap_ppc_smt
? cap_ppc_smt
: 1;
1527 off_t
kvmppc_alloc_rma(const char *name
, MemoryRegion
*sysmem
)
1532 struct kvm_allocate_rma ret
;
1533 MemoryRegion
*rma_region
;
1535 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
1536 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
1537 * not necessary on this hardware
1538 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
1540 * FIXME: We should allow the user to force contiguous RMA
1541 * allocation in the cap_ppc_rma==1 case.
1543 if (cap_ppc_rma
< 2) {
1547 fd
= kvm_vm_ioctl(kvm_state
, KVM_ALLOCATE_RMA
, &ret
);
1549 fprintf(stderr
, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
1554 size
= MIN(ret
.rma_size
, 256ul << 20);
1556 rma
= mmap(NULL
, size
, PROT_READ
|PROT_WRITE
, MAP_SHARED
, fd
, 0);
1557 if (rma
== MAP_FAILED
) {
1558 fprintf(stderr
, "KVM: Error mapping RMA: %s\n", strerror(errno
));
1562 rma_region
= g_new(MemoryRegion
, 1);
1563 memory_region_init_ram_ptr(rma_region
, NULL
, name
, size
, rma
);
1564 vmstate_register_ram_global(rma_region
);
1565 memory_region_add_subregion(sysmem
, 0, rma_region
);
1570 uint64_t kvmppc_rma_size(uint64_t current_size
, unsigned int hash_shift
)
1572 struct kvm_ppc_smmu_info info
;
1573 long rampagesize
, best_page_shift
;
1576 if (cap_ppc_rma
>= 2) {
1577 return current_size
;
1580 /* Find the largest hardware supported page size that's less than
1581 * or equal to the (logical) backing page size of guest RAM */
1582 kvm_get_smmu_info(ppc_env_get_cpu(first_cpu
), &info
);
1583 rampagesize
= getrampagesize();
1584 best_page_shift
= 0;
1586 for (i
= 0; i
< KVM_PPC_PAGE_SIZES_MAX_SZ
; i
++) {
1587 struct kvm_ppc_one_seg_page_size
*sps
= &info
.sps
[i
];
1589 if (!sps
->page_shift
) {
1593 if ((sps
->page_shift
> best_page_shift
)
1594 && ((1UL << sps
->page_shift
) <= rampagesize
)) {
1595 best_page_shift
= sps
->page_shift
;
1599 return MIN(current_size
,
1600 1ULL << (best_page_shift
+ hash_shift
- 7));
1604 void *kvmppc_create_spapr_tce(uint32_t liobn
, uint32_t window_size
, int *pfd
)
1606 struct kvm_create_spapr_tce args
= {
1608 .window_size
= window_size
,
1614 /* Must set fd to -1 so we don't try to munmap when called for
1615 * destroying the table, which the upper layers -will- do
1618 if (!cap_spapr_tce
) {
1622 fd
= kvm_vm_ioctl(kvm_state
, KVM_CREATE_SPAPR_TCE
, &args
);
1624 fprintf(stderr
, "KVM: Failed to create TCE table for liobn 0x%x\n",
1629 len
= (window_size
/ SPAPR_TCE_PAGE_SIZE
) * sizeof(sPAPRTCE
);
1630 /* FIXME: round this up to page size */
1632 table
= mmap(NULL
, len
, PROT_READ
|PROT_WRITE
, MAP_SHARED
, fd
, 0);
1633 if (table
== MAP_FAILED
) {
1634 fprintf(stderr
, "KVM: Failed to map TCE table for liobn 0x%x\n",
1644 int kvmppc_remove_spapr_tce(void *table
, int fd
, uint32_t window_size
)
1652 len
= (window_size
/ SPAPR_TCE_PAGE_SIZE
)*sizeof(sPAPRTCE
);
1653 if ((munmap(table
, len
) < 0) ||
1655 fprintf(stderr
, "KVM: Unexpected error removing TCE table: %s",
1657 /* Leak the table */
1663 int kvmppc_reset_htab(int shift_hint
)
1665 uint32_t shift
= shift_hint
;
1667 if (!kvm_enabled()) {
1668 /* Full emulation, tell caller to allocate htab itself */
1671 if (kvm_check_extension(kvm_state
, KVM_CAP_PPC_ALLOC_HTAB
)) {
1673 ret
= kvm_vm_ioctl(kvm_state
, KVM_PPC_ALLOCATE_HTAB
, &shift
);
1674 if (ret
== -ENOTTY
) {
1675 /* At least some versions of PR KVM advertise the
1676 * capability, but don't implement the ioctl(). Oops.
1677 * Return 0 so that we allocate the htab in qemu, as is
1678 * correct for PR. */
1680 } else if (ret
< 0) {
1686 /* We have a kernel that predates the htab reset calls. For PR
1687 * KVM, we need to allocate the htab ourselves, for an HV KVM of
1688 * this era, it has allocated a 16MB fixed size hash table
1689 * already. Kernels of this era have the GET_PVINFO capability
1690 * only on PR, so we use this hack to determine the right
1692 if (kvm_check_extension(kvm_state
, KVM_CAP_PPC_GET_PVINFO
)) {
1693 /* PR - tell caller to allocate htab */
1696 /* HV - assume 16MB kernel allocated htab */
1701 static inline uint32_t mfpvr(void)
1710 static void alter_insns(uint64_t *word
, uint64_t flags
, bool on
)
1719 static void kvmppc_host_cpu_initfn(Object
*obj
)
1721 assert(kvm_enabled());
1724 static void kvmppc_host_cpu_class_init(ObjectClass
*oc
, void *data
)
1726 PowerPCCPUClass
*pcc
= POWERPC_CPU_CLASS(oc
);
1727 uint32_t vmx
= kvmppc_get_vmx();
1728 uint32_t dfp
= kvmppc_get_dfp();
1729 uint32_t dcache_size
= kvmppc_read_int_cpu_dt("d-cache-size");
1730 uint32_t icache_size
= kvmppc_read_int_cpu_dt("i-cache-size");
1732 /* Now fix up the class with information we can query from the host */
1735 /* Only override when we know what the host supports */
1736 alter_insns(&pcc
->insns_flags
, PPC_ALTIVEC
, vmx
> 0);
1737 alter_insns(&pcc
->insns_flags2
, PPC2_VSX
, vmx
> 1);
1740 /* Only override when we know what the host supports */
1741 alter_insns(&pcc
->insns_flags2
, PPC2_DFP
, dfp
);
1744 if (dcache_size
!= -1) {
1745 pcc
->l1_dcache_size
= dcache_size
;
1748 if (icache_size
!= -1) {
1749 pcc
->l1_icache_size
= icache_size
;
1753 int kvmppc_fixup_cpu(PowerPCCPU
*cpu
)
1755 CPUState
*cs
= CPU(cpu
);
1758 /* Adjust cpu index for SMT */
1759 smt
= kvmppc_smt_threads();
1760 cs
->cpu_index
= (cs
->cpu_index
/ smp_threads
) * smt
1761 + (cs
->cpu_index
% smp_threads
);
1766 bool kvmppc_has_cap_epr(void)
1771 static int kvm_ppc_register_host_cpu_type(void)
1773 TypeInfo type_info
= {
1774 .name
= TYPE_HOST_POWERPC_CPU
,
1775 .instance_init
= kvmppc_host_cpu_initfn
,
1776 .class_init
= kvmppc_host_cpu_class_init
,
1778 uint32_t host_pvr
= mfpvr();
1779 PowerPCCPUClass
*pvr_pcc
;
1781 pvr_pcc
= ppc_cpu_class_by_pvr(host_pvr
);
1782 if (pvr_pcc
== NULL
) {
1785 type_info
.parent
= object_class_get_name(OBJECT_CLASS(pvr_pcc
));
1786 type_register(&type_info
);
1791 bool kvm_arch_stop_on_emulation_error(CPUState
*cpu
)
1796 int kvm_arch_on_sigbus_vcpu(CPUState
*cpu
, int code
, void *addr
)
1801 int kvm_arch_on_sigbus(int code
, void *addr
)
1806 void kvm_arch_init_irq_routing(KVMState
*s
)