2 * PowerPC implementation of KVM hooks
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
18 #include <sys/types.h>
19 #include <sys/ioctl.h>
23 #include <linux/kvm.h>
25 #include "qemu-common.h"
26 #include "qemu/timer.h"
27 #include "sysemu/sysemu.h"
28 #include "sysemu/kvm.h"
31 #include "sysemu/cpus.h"
32 #include "sysemu/device_tree.h"
33 #include "mmu-hash64.h"
35 #include "hw/sysbus.h"
36 #include "hw/ppc/spapr.h"
37 #include "hw/ppc/spapr_vio.h"
38 #include "hw/ppc/ppc.h"
39 #include "sysemu/watchdog.h"
45 #define DPRINTF(fmt, ...) \
46 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
48 #define DPRINTF(fmt, ...) \
52 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
54 const KVMCapabilityInfo kvm_arch_required_capabilities
[] = {
58 static int cap_interrupt_unset
= false;
59 static int cap_interrupt_level
= false;
60 static int cap_segstate
;
61 static int cap_booke_sregs
;
62 static int cap_ppc_smt
;
63 static int cap_ppc_rma
;
64 static int cap_spapr_tce
;
65 static int cap_spapr_multitce
;
67 static int cap_one_reg
;
69 static int cap_ppc_watchdog
;
71 static int cap_htab_fd
;
72 static int cap_fixup_hcalls
;
74 /* XXX We have a race condition where we actually have a level triggered
75 * interrupt, but the infrastructure can't expose that yet, so the guest
76 * takes but ignores it, goes to sleep and never gets notified that there's
77 * still an interrupt pending.
79 * As a quick workaround, let's just wake up again 20 ms after we injected
80 * an interrupt. That way we can assure that we're always reinjecting
81 * interrupts in case the guest swallowed them.
83 static QEMUTimer
*idle_timer
;
85 static void kvm_kick_cpu(void *opaque
)
87 PowerPCCPU
*cpu
= opaque
;
89 qemu_cpu_kick(CPU(cpu
));
92 static int kvm_ppc_register_host_cpu_type(void);
94 int kvm_arch_init(KVMState
*s
)
96 cap_interrupt_unset
= kvm_check_extension(s
, KVM_CAP_PPC_UNSET_IRQ
);
97 cap_interrupt_level
= kvm_check_extension(s
, KVM_CAP_PPC_IRQ_LEVEL
);
98 cap_segstate
= kvm_check_extension(s
, KVM_CAP_PPC_SEGSTATE
);
99 cap_booke_sregs
= kvm_check_extension(s
, KVM_CAP_PPC_BOOKE_SREGS
);
100 cap_ppc_smt
= kvm_check_extension(s
, KVM_CAP_PPC_SMT
);
101 cap_ppc_rma
= kvm_check_extension(s
, KVM_CAP_PPC_RMA
);
102 cap_spapr_tce
= kvm_check_extension(s
, KVM_CAP_SPAPR_TCE
);
103 cap_spapr_multitce
= kvm_check_extension(s
, KVM_CAP_SPAPR_MULTITCE
);
104 cap_one_reg
= kvm_check_extension(s
, KVM_CAP_ONE_REG
);
105 cap_hior
= kvm_check_extension(s
, KVM_CAP_PPC_HIOR
);
106 cap_epr
= kvm_check_extension(s
, KVM_CAP_PPC_EPR
);
107 cap_ppc_watchdog
= kvm_check_extension(s
, KVM_CAP_PPC_BOOKE_WATCHDOG
);
108 /* Note: we don't set cap_papr here, because this capability is
109 * only activated after this by kvmppc_set_papr() */
110 cap_htab_fd
= kvm_check_extension(s
, KVM_CAP_PPC_HTAB_FD
);
111 cap_fixup_hcalls
= kvm_check_extension(s
, KVM_CAP_PPC_FIXUP_HCALL
);
113 if (!cap_interrupt_level
) {
114 fprintf(stderr
, "KVM: Couldn't find level irq capability. Expect the "
115 "VM to stall at times!\n");
118 kvm_ppc_register_host_cpu_type();
123 static int kvm_arch_sync_sregs(PowerPCCPU
*cpu
)
125 CPUPPCState
*cenv
= &cpu
->env
;
126 CPUState
*cs
= CPU(cpu
);
127 struct kvm_sregs sregs
;
130 if (cenv
->excp_model
== POWERPC_EXCP_BOOKE
) {
131 /* What we're really trying to say is "if we're on BookE, we use
132 the native PVR for now". This is the only sane way to check
133 it though, so we potentially confuse users that they can run
134 BookE guests on BookS. Let's hope nobody dares enough :) */
138 fprintf(stderr
, "kvm error: missing PVR setting capability\n");
143 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_SREGS
, &sregs
);
148 sregs
.pvr
= cenv
->spr
[SPR_PVR
];
149 return kvm_vcpu_ioctl(cs
, KVM_SET_SREGS
, &sregs
);
152 /* Set up a shared TLB array with KVM */
153 static int kvm_booke206_tlb_init(PowerPCCPU
*cpu
)
155 CPUPPCState
*env
= &cpu
->env
;
156 CPUState
*cs
= CPU(cpu
);
157 struct kvm_book3e_206_tlb_params params
= {};
158 struct kvm_config_tlb cfg
= {};
159 unsigned int entries
= 0;
162 if (!kvm_enabled() ||
163 !kvm_check_extension(cs
->kvm_state
, KVM_CAP_SW_TLB
)) {
167 assert(ARRAY_SIZE(params
.tlb_sizes
) == BOOKE206_MAX_TLBN
);
169 for (i
= 0; i
< BOOKE206_MAX_TLBN
; i
++) {
170 params
.tlb_sizes
[i
] = booke206_tlb_size(env
, i
);
171 params
.tlb_ways
[i
] = booke206_tlb_ways(env
, i
);
172 entries
+= params
.tlb_sizes
[i
];
175 assert(entries
== env
->nb_tlb
);
176 assert(sizeof(struct kvm_book3e_206_tlb_entry
) == sizeof(ppcmas_tlb_t
));
178 env
->tlb_dirty
= true;
180 cfg
.array
= (uintptr_t)env
->tlb
.tlbm
;
181 cfg
.array_len
= sizeof(ppcmas_tlb_t
) * entries
;
182 cfg
.params
= (uintptr_t)¶ms
;
183 cfg
.mmu_type
= KVM_MMU_FSL_BOOKE_NOHV
;
185 ret
= kvm_vcpu_enable_cap(cs
, KVM_CAP_SW_TLB
, 0, (uintptr_t)&cfg
);
187 fprintf(stderr
, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
188 __func__
, strerror(-ret
));
192 env
->kvm_sw_tlb
= true;
197 #if defined(TARGET_PPC64)
198 static void kvm_get_fallback_smmu_info(PowerPCCPU
*cpu
,
199 struct kvm_ppc_smmu_info
*info
)
201 CPUPPCState
*env
= &cpu
->env
;
202 CPUState
*cs
= CPU(cpu
);
204 memset(info
, 0, sizeof(*info
));
206 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
207 * need to "guess" what the supported page sizes are.
209 * For that to work we make a few assumptions:
211 * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
212 * KVM which only supports 4K and 16M pages, but supports them
213 * regardless of the backing store characteritics. We also don't
214 * support 1T segments.
216 * This is safe as if HV KVM ever supports that capability or PR
217 * KVM grows supports for more page/segment sizes, those versions
218 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
219 * will not hit this fallback
221 * - Else we are running HV KVM. This means we only support page
222 * sizes that fit in the backing store. Additionally we only
223 * advertize 64K pages if the processor is ARCH 2.06 and we assume
224 * P7 encodings for the SLB and hash table. Here too, we assume
225 * support for any newer processor will mean a kernel that
226 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
229 if (kvm_check_extension(cs
->kvm_state
, KVM_CAP_PPC_GET_PVINFO
)) {
234 /* Standard 4k base page size segment */
235 info
->sps
[0].page_shift
= 12;
236 info
->sps
[0].slb_enc
= 0;
237 info
->sps
[0].enc
[0].page_shift
= 12;
238 info
->sps
[0].enc
[0].pte_enc
= 0;
240 /* Standard 16M large page size segment */
241 info
->sps
[1].page_shift
= 24;
242 info
->sps
[1].slb_enc
= SLB_VSID_L
;
243 info
->sps
[1].enc
[0].page_shift
= 24;
244 info
->sps
[1].enc
[0].pte_enc
= 0;
248 /* HV KVM has backing store size restrictions */
249 info
->flags
= KVM_PPC_PAGE_SIZES_REAL
;
251 if (env
->mmu_model
& POWERPC_MMU_1TSEG
) {
252 info
->flags
|= KVM_PPC_1T_SEGMENTS
;
255 if (env
->mmu_model
== POWERPC_MMU_2_06
) {
261 /* Standard 4k base page size segment */
262 info
->sps
[i
].page_shift
= 12;
263 info
->sps
[i
].slb_enc
= 0;
264 info
->sps
[i
].enc
[0].page_shift
= 12;
265 info
->sps
[i
].enc
[0].pte_enc
= 0;
268 /* 64K on MMU 2.06 */
269 if (env
->mmu_model
== POWERPC_MMU_2_06
) {
270 info
->sps
[i
].page_shift
= 16;
271 info
->sps
[i
].slb_enc
= 0x110;
272 info
->sps
[i
].enc
[0].page_shift
= 16;
273 info
->sps
[i
].enc
[0].pte_enc
= 1;
277 /* Standard 16M large page size segment */
278 info
->sps
[i
].page_shift
= 24;
279 info
->sps
[i
].slb_enc
= SLB_VSID_L
;
280 info
->sps
[i
].enc
[0].page_shift
= 24;
281 info
->sps
[i
].enc
[0].pte_enc
= 0;
285 static void kvm_get_smmu_info(PowerPCCPU
*cpu
, struct kvm_ppc_smmu_info
*info
)
287 CPUState
*cs
= CPU(cpu
);
290 if (kvm_check_extension(cs
->kvm_state
, KVM_CAP_PPC_GET_SMMU_INFO
)) {
291 ret
= kvm_vm_ioctl(cs
->kvm_state
, KVM_PPC_GET_SMMU_INFO
, info
);
297 kvm_get_fallback_smmu_info(cpu
, info
);
300 static long getrampagesize(void)
306 /* guest RAM is backed by normal anonymous pages */
307 return getpagesize();
311 ret
= statfs(mem_path
, &fs
);
312 } while (ret
!= 0 && errno
== EINTR
);
315 fprintf(stderr
, "Couldn't statfs() memory path: %s\n",
320 #define HUGETLBFS_MAGIC 0x958458f6
322 if (fs
.f_type
!= HUGETLBFS_MAGIC
) {
323 /* Explicit mempath, but it's ordinary pages */
324 return getpagesize();
327 /* It's hugepage, return the huge page size */
331 static bool kvm_valid_page_size(uint32_t flags
, long rampgsize
, uint32_t shift
)
333 if (!(flags
& KVM_PPC_PAGE_SIZES_REAL
)) {
337 return (1ul << shift
) <= rampgsize
;
340 static void kvm_fixup_page_sizes(PowerPCCPU
*cpu
)
342 static struct kvm_ppc_smmu_info smmu_info
;
343 static bool has_smmu_info
;
344 CPUPPCState
*env
= &cpu
->env
;
348 /* We only handle page sizes for 64-bit server guests for now */
349 if (!(env
->mmu_model
& POWERPC_MMU_64
)) {
353 /* Collect MMU info from kernel if not already */
354 if (!has_smmu_info
) {
355 kvm_get_smmu_info(cpu
, &smmu_info
);
356 has_smmu_info
= true;
359 rampagesize
= getrampagesize();
361 /* Convert to QEMU form */
362 memset(&env
->sps
, 0, sizeof(env
->sps
));
365 * XXX This loop should be an entry wide AND of the capabilities that
366 * the selected CPU has with the capabilities that KVM supports.
368 for (ik
= iq
= 0; ik
< KVM_PPC_PAGE_SIZES_MAX_SZ
; ik
++) {
369 struct ppc_one_seg_page_size
*qsps
= &env
->sps
.sps
[iq
];
370 struct kvm_ppc_one_seg_page_size
*ksps
= &smmu_info
.sps
[ik
];
372 if (!kvm_valid_page_size(smmu_info
.flags
, rampagesize
,
376 qsps
->page_shift
= ksps
->page_shift
;
377 qsps
->slb_enc
= ksps
->slb_enc
;
378 for (jk
= jq
= 0; jk
< KVM_PPC_PAGE_SIZES_MAX_SZ
; jk
++) {
379 if (!kvm_valid_page_size(smmu_info
.flags
, rampagesize
,
380 ksps
->enc
[jk
].page_shift
)) {
383 qsps
->enc
[jq
].page_shift
= ksps
->enc
[jk
].page_shift
;
384 qsps
->enc
[jq
].pte_enc
= ksps
->enc
[jk
].pte_enc
;
385 if (++jq
>= PPC_PAGE_SIZES_MAX_SZ
) {
389 if (++iq
>= PPC_PAGE_SIZES_MAX_SZ
) {
393 env
->slb_nr
= smmu_info
.slb_size
;
394 if (!(smmu_info
.flags
& KVM_PPC_1T_SEGMENTS
)) {
395 env
->mmu_model
&= ~POWERPC_MMU_1TSEG
;
398 #else /* defined (TARGET_PPC64) */
400 static inline void kvm_fixup_page_sizes(PowerPCCPU
*cpu
)
404 #endif /* !defined (TARGET_PPC64) */
406 unsigned long kvm_arch_vcpu_id(CPUState
*cpu
)
408 return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu
));
411 int kvm_arch_init_vcpu(CPUState
*cs
)
413 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
414 CPUPPCState
*cenv
= &cpu
->env
;
417 /* Gather server mmu info from KVM and update the CPU state */
418 kvm_fixup_page_sizes(cpu
);
420 /* Synchronize sregs with kvm */
421 ret
= kvm_arch_sync_sregs(cpu
);
426 idle_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL
, kvm_kick_cpu
, cpu
);
428 /* Some targets support access to KVM's guest TLB. */
429 switch (cenv
->mmu_model
) {
430 case POWERPC_MMU_BOOKE206
:
431 ret
= kvm_booke206_tlb_init(cpu
);
440 static void kvm_sw_tlb_put(PowerPCCPU
*cpu
)
442 CPUPPCState
*env
= &cpu
->env
;
443 CPUState
*cs
= CPU(cpu
);
444 struct kvm_dirty_tlb dirty_tlb
;
445 unsigned char *bitmap
;
448 if (!env
->kvm_sw_tlb
) {
452 bitmap
= g_malloc((env
->nb_tlb
+ 7) / 8);
453 memset(bitmap
, 0xFF, (env
->nb_tlb
+ 7) / 8);
455 dirty_tlb
.bitmap
= (uintptr_t)bitmap
;
456 dirty_tlb
.num_dirty
= env
->nb_tlb
;
458 ret
= kvm_vcpu_ioctl(cs
, KVM_DIRTY_TLB
, &dirty_tlb
);
460 fprintf(stderr
, "%s: KVM_DIRTY_TLB: %s\n",
461 __func__
, strerror(-ret
));
467 static void kvm_get_one_spr(CPUState
*cs
, uint64_t id
, int spr
)
469 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
470 CPUPPCState
*env
= &cpu
->env
;
475 struct kvm_one_reg reg
= {
477 .addr
= (uintptr_t) &val
,
481 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
483 trace_kvm_failed_spr_get(spr
, strerror(errno
));
485 switch (id
& KVM_REG_SIZE_MASK
) {
486 case KVM_REG_SIZE_U32
:
487 env
->spr
[spr
] = val
.u32
;
490 case KVM_REG_SIZE_U64
:
491 env
->spr
[spr
] = val
.u64
;
495 /* Don't handle this size yet */
501 static void kvm_put_one_spr(CPUState
*cs
, uint64_t id
, int spr
)
503 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
504 CPUPPCState
*env
= &cpu
->env
;
509 struct kvm_one_reg reg
= {
511 .addr
= (uintptr_t) &val
,
515 switch (id
& KVM_REG_SIZE_MASK
) {
516 case KVM_REG_SIZE_U32
:
517 val
.u32
= env
->spr
[spr
];
520 case KVM_REG_SIZE_U64
:
521 val
.u64
= env
->spr
[spr
];
525 /* Don't handle this size yet */
529 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
531 trace_kvm_failed_spr_set(spr
, strerror(errno
));
535 static int kvm_put_fp(CPUState
*cs
)
537 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
538 CPUPPCState
*env
= &cpu
->env
;
539 struct kvm_one_reg reg
;
543 if (env
->insns_flags
& PPC_FLOAT
) {
544 uint64_t fpscr
= env
->fpscr
;
545 bool vsx
= !!(env
->insns_flags2
& PPC2_VSX
);
547 reg
.id
= KVM_REG_PPC_FPSCR
;
548 reg
.addr
= (uintptr_t)&fpscr
;
549 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
551 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno
));
555 for (i
= 0; i
< 32; i
++) {
558 vsr
[0] = float64_val(env
->fpr
[i
]);
559 vsr
[1] = env
->vsr
[i
];
560 reg
.addr
= (uintptr_t) &vsr
;
561 reg
.id
= vsx
? KVM_REG_PPC_VSR(i
) : KVM_REG_PPC_FPR(i
);
563 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
565 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx
? "VSR" : "FPR",
572 if (env
->insns_flags
& PPC_ALTIVEC
) {
573 reg
.id
= KVM_REG_PPC_VSCR
;
574 reg
.addr
= (uintptr_t)&env
->vscr
;
575 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
577 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno
));
581 for (i
= 0; i
< 32; i
++) {
582 reg
.id
= KVM_REG_PPC_VR(i
);
583 reg
.addr
= (uintptr_t)&env
->avr
[i
];
584 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
586 DPRINTF("Unable to set VR%d to KVM: %s\n", i
, strerror(errno
));
595 static int kvm_get_fp(CPUState
*cs
)
597 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
598 CPUPPCState
*env
= &cpu
->env
;
599 struct kvm_one_reg reg
;
603 if (env
->insns_flags
& PPC_FLOAT
) {
605 bool vsx
= !!(env
->insns_flags2
& PPC2_VSX
);
607 reg
.id
= KVM_REG_PPC_FPSCR
;
608 reg
.addr
= (uintptr_t)&fpscr
;
609 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
611 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno
));
617 for (i
= 0; i
< 32; i
++) {
620 reg
.addr
= (uintptr_t) &vsr
;
621 reg
.id
= vsx
? KVM_REG_PPC_VSR(i
) : KVM_REG_PPC_FPR(i
);
623 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
625 DPRINTF("Unable to get %s%d from KVM: %s\n",
626 vsx
? "VSR" : "FPR", i
, strerror(errno
));
629 env
->fpr
[i
] = vsr
[0];
631 env
->vsr
[i
] = vsr
[1];
637 if (env
->insns_flags
& PPC_ALTIVEC
) {
638 reg
.id
= KVM_REG_PPC_VSCR
;
639 reg
.addr
= (uintptr_t)&env
->vscr
;
640 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
642 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno
));
646 for (i
= 0; i
< 32; i
++) {
647 reg
.id
= KVM_REG_PPC_VR(i
);
648 reg
.addr
= (uintptr_t)&env
->avr
[i
];
649 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
651 DPRINTF("Unable to get VR%d from KVM: %s\n",
661 #if defined(TARGET_PPC64)
662 static int kvm_get_vpa(CPUState
*cs
)
664 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
665 CPUPPCState
*env
= &cpu
->env
;
666 struct kvm_one_reg reg
;
669 reg
.id
= KVM_REG_PPC_VPA_ADDR
;
670 reg
.addr
= (uintptr_t)&env
->vpa_addr
;
671 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
673 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno
));
677 assert((uintptr_t)&env
->slb_shadow_size
678 == ((uintptr_t)&env
->slb_shadow_addr
+ 8));
679 reg
.id
= KVM_REG_PPC_VPA_SLB
;
680 reg
.addr
= (uintptr_t)&env
->slb_shadow_addr
;
681 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
683 DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
688 assert((uintptr_t)&env
->dtl_size
== ((uintptr_t)&env
->dtl_addr
+ 8));
689 reg
.id
= KVM_REG_PPC_VPA_DTL
;
690 reg
.addr
= (uintptr_t)&env
->dtl_addr
;
691 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
693 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
701 static int kvm_put_vpa(CPUState
*cs
)
703 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
704 CPUPPCState
*env
= &cpu
->env
;
705 struct kvm_one_reg reg
;
708 /* SLB shadow or DTL can't be registered unless a master VPA is
709 * registered. That means when restoring state, if a VPA *is*
710 * registered, we need to set that up first. If not, we need to
711 * deregister the others before deregistering the master VPA */
712 assert(env
->vpa_addr
|| !(env
->slb_shadow_addr
|| env
->dtl_addr
));
715 reg
.id
= KVM_REG_PPC_VPA_ADDR
;
716 reg
.addr
= (uintptr_t)&env
->vpa_addr
;
717 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
719 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno
));
724 assert((uintptr_t)&env
->slb_shadow_size
725 == ((uintptr_t)&env
->slb_shadow_addr
+ 8));
726 reg
.id
= KVM_REG_PPC_VPA_SLB
;
727 reg
.addr
= (uintptr_t)&env
->slb_shadow_addr
;
728 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
730 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno
));
734 assert((uintptr_t)&env
->dtl_size
== ((uintptr_t)&env
->dtl_addr
+ 8));
735 reg
.id
= KVM_REG_PPC_VPA_DTL
;
736 reg
.addr
= (uintptr_t)&env
->dtl_addr
;
737 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
739 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
744 if (!env
->vpa_addr
) {
745 reg
.id
= KVM_REG_PPC_VPA_ADDR
;
746 reg
.addr
= (uintptr_t)&env
->vpa_addr
;
747 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
749 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno
));
756 #endif /* TARGET_PPC64 */
758 int kvm_arch_put_registers(CPUState
*cs
, int level
)
760 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
761 CPUPPCState
*env
= &cpu
->env
;
762 struct kvm_regs regs
;
766 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_REGS
, ®s
);
773 regs
.xer
= cpu_read_xer(env
);
777 regs
.srr0
= env
->spr
[SPR_SRR0
];
778 regs
.srr1
= env
->spr
[SPR_SRR1
];
780 regs
.sprg0
= env
->spr
[SPR_SPRG0
];
781 regs
.sprg1
= env
->spr
[SPR_SPRG1
];
782 regs
.sprg2
= env
->spr
[SPR_SPRG2
];
783 regs
.sprg3
= env
->spr
[SPR_SPRG3
];
784 regs
.sprg4
= env
->spr
[SPR_SPRG4
];
785 regs
.sprg5
= env
->spr
[SPR_SPRG5
];
786 regs
.sprg6
= env
->spr
[SPR_SPRG6
];
787 regs
.sprg7
= env
->spr
[SPR_SPRG7
];
789 regs
.pid
= env
->spr
[SPR_BOOKE_PID
];
791 for (i
= 0;i
< 32; i
++)
792 regs
.gpr
[i
] = env
->gpr
[i
];
795 for (i
= 0; i
< 8; i
++) {
796 regs
.cr
|= (env
->crf
[i
] & 15) << (4 * (7 - i
));
799 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_REGS
, ®s
);
805 if (env
->tlb_dirty
) {
807 env
->tlb_dirty
= false;
810 if (cap_segstate
&& (level
>= KVM_PUT_RESET_STATE
)) {
811 struct kvm_sregs sregs
;
813 sregs
.pvr
= env
->spr
[SPR_PVR
];
815 sregs
.u
.s
.sdr1
= env
->spr
[SPR_SDR1
];
819 for (i
= 0; i
< ARRAY_SIZE(env
->slb
); i
++) {
820 sregs
.u
.s
.ppc64
.slb
[i
].slbe
= env
->slb
[i
].esid
;
821 if (env
->slb
[i
].esid
& SLB_ESID_V
) {
822 sregs
.u
.s
.ppc64
.slb
[i
].slbe
|= i
;
824 sregs
.u
.s
.ppc64
.slb
[i
].slbv
= env
->slb
[i
].vsid
;
829 for (i
= 0; i
< 16; i
++) {
830 sregs
.u
.s
.ppc32
.sr
[i
] = env
->sr
[i
];
834 for (i
= 0; i
< 8; i
++) {
835 /* Beware. We have to swap upper and lower bits here */
836 sregs
.u
.s
.ppc32
.dbat
[i
] = ((uint64_t)env
->DBAT
[0][i
] << 32)
838 sregs
.u
.s
.ppc32
.ibat
[i
] = ((uint64_t)env
->IBAT
[0][i
] << 32)
842 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_SREGS
, &sregs
);
848 if (cap_hior
&& (level
>= KVM_PUT_RESET_STATE
)) {
849 kvm_put_one_spr(cs
, KVM_REG_PPC_HIOR
, SPR_HIOR
);
855 /* We deliberately ignore errors here, for kernels which have
856 * the ONE_REG calls, but don't support the specific
857 * registers, there's a reasonable chance things will still
858 * work, at least until we try to migrate. */
859 for (i
= 0; i
< 1024; i
++) {
860 uint64_t id
= env
->spr_cb
[i
].one_reg_id
;
863 kvm_put_one_spr(cs
, id
, i
);
869 for (i
= 0; i
< ARRAY_SIZE(env
->tm_gpr
); i
++) {
870 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_GPR(i
), &env
->tm_gpr
[i
]);
872 for (i
= 0; i
< ARRAY_SIZE(env
->tm_vsr
); i
++) {
873 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_VSR(i
), &env
->tm_vsr
[i
]);
875 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_CR
, &env
->tm_cr
);
876 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_LR
, &env
->tm_lr
);
877 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_CTR
, &env
->tm_ctr
);
878 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_FPSCR
, &env
->tm_fpscr
);
879 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_AMR
, &env
->tm_amr
);
880 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_PPR
, &env
->tm_ppr
);
881 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_VRSAVE
, &env
->tm_vrsave
);
882 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_VSCR
, &env
->tm_vscr
);
883 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_DSCR
, &env
->tm_dscr
);
884 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_TAR
, &env
->tm_tar
);
888 if (kvm_put_vpa(cs
) < 0) {
889 DPRINTF("Warning: Unable to set VPA information to KVM\n");
893 kvm_set_one_reg(cs
, KVM_REG_PPC_TB_OFFSET
, &env
->tb_env
->tb_offset
);
894 #endif /* TARGET_PPC64 */
900 int kvm_arch_get_registers(CPUState
*cs
)
902 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
903 CPUPPCState
*env
= &cpu
->env
;
904 struct kvm_regs regs
;
905 struct kvm_sregs sregs
;
909 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_REGS
, ®s
);
914 for (i
= 7; i
>= 0; i
--) {
915 env
->crf
[i
] = cr
& 15;
921 cpu_write_xer(env
, regs
.xer
);
925 env
->spr
[SPR_SRR0
] = regs
.srr0
;
926 env
->spr
[SPR_SRR1
] = regs
.srr1
;
928 env
->spr
[SPR_SPRG0
] = regs
.sprg0
;
929 env
->spr
[SPR_SPRG1
] = regs
.sprg1
;
930 env
->spr
[SPR_SPRG2
] = regs
.sprg2
;
931 env
->spr
[SPR_SPRG3
] = regs
.sprg3
;
932 env
->spr
[SPR_SPRG4
] = regs
.sprg4
;
933 env
->spr
[SPR_SPRG5
] = regs
.sprg5
;
934 env
->spr
[SPR_SPRG6
] = regs
.sprg6
;
935 env
->spr
[SPR_SPRG7
] = regs
.sprg7
;
937 env
->spr
[SPR_BOOKE_PID
] = regs
.pid
;
939 for (i
= 0;i
< 32; i
++)
940 env
->gpr
[i
] = regs
.gpr
[i
];
944 if (cap_booke_sregs
) {
945 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_SREGS
, &sregs
);
950 if (sregs
.u
.e
.features
& KVM_SREGS_E_BASE
) {
951 env
->spr
[SPR_BOOKE_CSRR0
] = sregs
.u
.e
.csrr0
;
952 env
->spr
[SPR_BOOKE_CSRR1
] = sregs
.u
.e
.csrr1
;
953 env
->spr
[SPR_BOOKE_ESR
] = sregs
.u
.e
.esr
;
954 env
->spr
[SPR_BOOKE_DEAR
] = sregs
.u
.e
.dear
;
955 env
->spr
[SPR_BOOKE_MCSR
] = sregs
.u
.e
.mcsr
;
956 env
->spr
[SPR_BOOKE_TSR
] = sregs
.u
.e
.tsr
;
957 env
->spr
[SPR_BOOKE_TCR
] = sregs
.u
.e
.tcr
;
958 env
->spr
[SPR_DECR
] = sregs
.u
.e
.dec
;
959 env
->spr
[SPR_TBL
] = sregs
.u
.e
.tb
& 0xffffffff;
960 env
->spr
[SPR_TBU
] = sregs
.u
.e
.tb
>> 32;
961 env
->spr
[SPR_VRSAVE
] = sregs
.u
.e
.vrsave
;
964 if (sregs
.u
.e
.features
& KVM_SREGS_E_ARCH206
) {
965 env
->spr
[SPR_BOOKE_PIR
] = sregs
.u
.e
.pir
;
966 env
->spr
[SPR_BOOKE_MCSRR0
] = sregs
.u
.e
.mcsrr0
;
967 env
->spr
[SPR_BOOKE_MCSRR1
] = sregs
.u
.e
.mcsrr1
;
968 env
->spr
[SPR_BOOKE_DECAR
] = sregs
.u
.e
.decar
;
969 env
->spr
[SPR_BOOKE_IVPR
] = sregs
.u
.e
.ivpr
;
972 if (sregs
.u
.e
.features
& KVM_SREGS_E_64
) {
973 env
->spr
[SPR_BOOKE_EPCR
] = sregs
.u
.e
.epcr
;
976 if (sregs
.u
.e
.features
& KVM_SREGS_E_SPRG8
) {
977 env
->spr
[SPR_BOOKE_SPRG8
] = sregs
.u
.e
.sprg8
;
980 if (sregs
.u
.e
.features
& KVM_SREGS_E_IVOR
) {
981 env
->spr
[SPR_BOOKE_IVOR0
] = sregs
.u
.e
.ivor_low
[0];
982 env
->spr
[SPR_BOOKE_IVOR1
] = sregs
.u
.e
.ivor_low
[1];
983 env
->spr
[SPR_BOOKE_IVOR2
] = sregs
.u
.e
.ivor_low
[2];
984 env
->spr
[SPR_BOOKE_IVOR3
] = sregs
.u
.e
.ivor_low
[3];
985 env
->spr
[SPR_BOOKE_IVOR4
] = sregs
.u
.e
.ivor_low
[4];
986 env
->spr
[SPR_BOOKE_IVOR5
] = sregs
.u
.e
.ivor_low
[5];
987 env
->spr
[SPR_BOOKE_IVOR6
] = sregs
.u
.e
.ivor_low
[6];
988 env
->spr
[SPR_BOOKE_IVOR7
] = sregs
.u
.e
.ivor_low
[7];
989 env
->spr
[SPR_BOOKE_IVOR8
] = sregs
.u
.e
.ivor_low
[8];
990 env
->spr
[SPR_BOOKE_IVOR9
] = sregs
.u
.e
.ivor_low
[9];
991 env
->spr
[SPR_BOOKE_IVOR10
] = sregs
.u
.e
.ivor_low
[10];
992 env
->spr
[SPR_BOOKE_IVOR11
] = sregs
.u
.e
.ivor_low
[11];
993 env
->spr
[SPR_BOOKE_IVOR12
] = sregs
.u
.e
.ivor_low
[12];
994 env
->spr
[SPR_BOOKE_IVOR13
] = sregs
.u
.e
.ivor_low
[13];
995 env
->spr
[SPR_BOOKE_IVOR14
] = sregs
.u
.e
.ivor_low
[14];
996 env
->spr
[SPR_BOOKE_IVOR15
] = sregs
.u
.e
.ivor_low
[15];
998 if (sregs
.u
.e
.features
& KVM_SREGS_E_SPE
) {
999 env
->spr
[SPR_BOOKE_IVOR32
] = sregs
.u
.e
.ivor_high
[0];
1000 env
->spr
[SPR_BOOKE_IVOR33
] = sregs
.u
.e
.ivor_high
[1];
1001 env
->spr
[SPR_BOOKE_IVOR34
] = sregs
.u
.e
.ivor_high
[2];
1004 if (sregs
.u
.e
.features
& KVM_SREGS_E_PM
) {
1005 env
->spr
[SPR_BOOKE_IVOR35
] = sregs
.u
.e
.ivor_high
[3];
1008 if (sregs
.u
.e
.features
& KVM_SREGS_E_PC
) {
1009 env
->spr
[SPR_BOOKE_IVOR36
] = sregs
.u
.e
.ivor_high
[4];
1010 env
->spr
[SPR_BOOKE_IVOR37
] = sregs
.u
.e
.ivor_high
[5];
1014 if (sregs
.u
.e
.features
& KVM_SREGS_E_ARCH206_MMU
) {
1015 env
->spr
[SPR_BOOKE_MAS0
] = sregs
.u
.e
.mas0
;
1016 env
->spr
[SPR_BOOKE_MAS1
] = sregs
.u
.e
.mas1
;
1017 env
->spr
[SPR_BOOKE_MAS2
] = sregs
.u
.e
.mas2
;
1018 env
->spr
[SPR_BOOKE_MAS3
] = sregs
.u
.e
.mas7_3
& 0xffffffff;
1019 env
->spr
[SPR_BOOKE_MAS4
] = sregs
.u
.e
.mas4
;
1020 env
->spr
[SPR_BOOKE_MAS6
] = sregs
.u
.e
.mas6
;
1021 env
->spr
[SPR_BOOKE_MAS7
] = sregs
.u
.e
.mas7_3
>> 32;
1022 env
->spr
[SPR_MMUCFG
] = sregs
.u
.e
.mmucfg
;
1023 env
->spr
[SPR_BOOKE_TLB0CFG
] = sregs
.u
.e
.tlbcfg
[0];
1024 env
->spr
[SPR_BOOKE_TLB1CFG
] = sregs
.u
.e
.tlbcfg
[1];
1027 if (sregs
.u
.e
.features
& KVM_SREGS_EXP
) {
1028 env
->spr
[SPR_BOOKE_EPR
] = sregs
.u
.e
.epr
;
1031 if (sregs
.u
.e
.features
& KVM_SREGS_E_PD
) {
1032 env
->spr
[SPR_BOOKE_EPLC
] = sregs
.u
.e
.eplc
;
1033 env
->spr
[SPR_BOOKE_EPSC
] = sregs
.u
.e
.epsc
;
1036 if (sregs
.u
.e
.impl_id
== KVM_SREGS_E_IMPL_FSL
) {
1037 env
->spr
[SPR_E500_SVR
] = sregs
.u
.e
.impl
.fsl
.svr
;
1038 env
->spr
[SPR_Exxx_MCAR
] = sregs
.u
.e
.impl
.fsl
.mcar
;
1039 env
->spr
[SPR_HID0
] = sregs
.u
.e
.impl
.fsl
.hid0
;
1041 if (sregs
.u
.e
.impl
.fsl
.features
& KVM_SREGS_E_FSL_PIDn
) {
1042 env
->spr
[SPR_BOOKE_PID1
] = sregs
.u
.e
.impl
.fsl
.pid1
;
1043 env
->spr
[SPR_BOOKE_PID2
] = sregs
.u
.e
.impl
.fsl
.pid2
;
1049 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_SREGS
, &sregs
);
1054 if (!env
->external_htab
) {
1055 ppc_store_sdr1(env
, sregs
.u
.s
.sdr1
);
1061 * The packed SLB array we get from KVM_GET_SREGS only contains
1062 * information about valid entries. So we flush our internal
1063 * copy to get rid of stale ones, then put all valid SLB entries
1066 memset(env
->slb
, 0, sizeof(env
->slb
));
1067 for (i
= 0; i
< ARRAY_SIZE(env
->slb
); i
++) {
1068 target_ulong rb
= sregs
.u
.s
.ppc64
.slb
[i
].slbe
;
1069 target_ulong rs
= sregs
.u
.s
.ppc64
.slb
[i
].slbv
;
1071 * Only restore valid entries
1073 if (rb
& SLB_ESID_V
) {
1074 ppc_store_slb(env
, rb
, rs
);
1080 for (i
= 0; i
< 16; i
++) {
1081 env
->sr
[i
] = sregs
.u
.s
.ppc32
.sr
[i
];
1085 for (i
= 0; i
< 8; i
++) {
1086 env
->DBAT
[0][i
] = sregs
.u
.s
.ppc32
.dbat
[i
] & 0xffffffff;
1087 env
->DBAT
[1][i
] = sregs
.u
.s
.ppc32
.dbat
[i
] >> 32;
1088 env
->IBAT
[0][i
] = sregs
.u
.s
.ppc32
.ibat
[i
] & 0xffffffff;
1089 env
->IBAT
[1][i
] = sregs
.u
.s
.ppc32
.ibat
[i
] >> 32;
1094 kvm_get_one_spr(cs
, KVM_REG_PPC_HIOR
, SPR_HIOR
);
1100 /* We deliberately ignore errors here, for kernels which have
1101 * the ONE_REG calls, but don't support the specific
1102 * registers, there's a reasonable chance things will still
1103 * work, at least until we try to migrate. */
1104 for (i
= 0; i
< 1024; i
++) {
1105 uint64_t id
= env
->spr_cb
[i
].one_reg_id
;
1108 kvm_get_one_spr(cs
, id
, i
);
1114 for (i
= 0; i
< ARRAY_SIZE(env
->tm_gpr
); i
++) {
1115 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_GPR(i
), &env
->tm_gpr
[i
]);
1117 for (i
= 0; i
< ARRAY_SIZE(env
->tm_vsr
); i
++) {
1118 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_VSR(i
), &env
->tm_vsr
[i
]);
1120 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_CR
, &env
->tm_cr
);
1121 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_LR
, &env
->tm_lr
);
1122 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_CTR
, &env
->tm_ctr
);
1123 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_FPSCR
, &env
->tm_fpscr
);
1124 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_AMR
, &env
->tm_amr
);
1125 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_PPR
, &env
->tm_ppr
);
1126 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_VRSAVE
, &env
->tm_vrsave
);
1127 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_VSCR
, &env
->tm_vscr
);
1128 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_DSCR
, &env
->tm_dscr
);
1129 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_TAR
, &env
->tm_tar
);
1133 if (kvm_get_vpa(cs
) < 0) {
1134 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1138 kvm_get_one_reg(cs
, KVM_REG_PPC_TB_OFFSET
, &env
->tb_env
->tb_offset
);
1145 int kvmppc_set_interrupt(PowerPCCPU
*cpu
, int irq
, int level
)
1147 unsigned virq
= level
? KVM_INTERRUPT_SET_LEVEL
: KVM_INTERRUPT_UNSET
;
1149 if (irq
!= PPC_INTERRUPT_EXT
) {
1153 if (!kvm_enabled() || !cap_interrupt_unset
|| !cap_interrupt_level
) {
1157 kvm_vcpu_ioctl(CPU(cpu
), KVM_INTERRUPT
, &virq
);
1162 #if defined(TARGET_PPCEMB)
1163 #define PPC_INPUT_INT PPC40x_INPUT_INT
1164 #elif defined(TARGET_PPC64)
1165 #define PPC_INPUT_INT PPC970_INPUT_INT
1167 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1170 void kvm_arch_pre_run(CPUState
*cs
, struct kvm_run
*run
)
1172 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
1173 CPUPPCState
*env
= &cpu
->env
;
1177 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1178 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1179 if (!cap_interrupt_level
&&
1180 run
->ready_for_interrupt_injection
&&
1181 (cs
->interrupt_request
& CPU_INTERRUPT_HARD
) &&
1182 (env
->irq_input_state
& (1<<PPC_INPUT_INT
)))
1184 /* For now KVM disregards the 'irq' argument. However, in the
1185 * future KVM could cache it in-kernel to avoid a heavyweight exit
1186 * when reading the UIC.
1188 irq
= KVM_INTERRUPT_SET
;
1190 DPRINTF("injected interrupt %d\n", irq
);
1191 r
= kvm_vcpu_ioctl(cs
, KVM_INTERRUPT
, &irq
);
1193 printf("cpu %d fail inject %x\n", cs
->cpu_index
, irq
);
1196 /* Always wake up soon in case the interrupt was level based */
1197 timer_mod(idle_timer
, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) +
1198 (get_ticks_per_sec() / 50));
1201 /* We don't know if there are more interrupts pending after this. However,
1202 * the guest will return to userspace in the course of handling this one
1203 * anyways, so we will get a chance to deliver the rest. */
1206 void kvm_arch_post_run(CPUState
*cpu
, struct kvm_run
*run
)
1210 int kvm_arch_process_async_events(CPUState
*cs
)
1215 static int kvmppc_handle_halt(PowerPCCPU
*cpu
)
1217 CPUState
*cs
= CPU(cpu
);
1218 CPUPPCState
*env
= &cpu
->env
;
1220 if (!(cs
->interrupt_request
& CPU_INTERRUPT_HARD
) && (msr_ee
)) {
1222 cs
->exception_index
= EXCP_HLT
;
1228 /* map dcr access to existing qemu dcr emulation */
1229 static int kvmppc_handle_dcr_read(CPUPPCState
*env
, uint32_t dcrn
, uint32_t *data
)
1231 if (ppc_dcr_read(env
->dcr_env
, dcrn
, data
) < 0)
1232 fprintf(stderr
, "Read to unhandled DCR (0x%x)\n", dcrn
);
1237 static int kvmppc_handle_dcr_write(CPUPPCState
*env
, uint32_t dcrn
, uint32_t data
)
1239 if (ppc_dcr_write(env
->dcr_env
, dcrn
, data
) < 0)
1240 fprintf(stderr
, "Write to unhandled DCR (0x%x)\n", dcrn
);
1245 int kvm_arch_handle_exit(CPUState
*cs
, struct kvm_run
*run
)
1247 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
1248 CPUPPCState
*env
= &cpu
->env
;
1251 switch (run
->exit_reason
) {
1253 if (run
->dcr
.is_write
) {
1254 DPRINTF("handle dcr write\n");
1255 ret
= kvmppc_handle_dcr_write(env
, run
->dcr
.dcrn
, run
->dcr
.data
);
1257 DPRINTF("handle dcr read\n");
1258 ret
= kvmppc_handle_dcr_read(env
, run
->dcr
.dcrn
, &run
->dcr
.data
);
1262 DPRINTF("handle halt\n");
1263 ret
= kvmppc_handle_halt(cpu
);
1265 #if defined(TARGET_PPC64)
1266 case KVM_EXIT_PAPR_HCALL
:
1267 DPRINTF("handle PAPR hypercall\n");
1268 run
->papr_hcall
.ret
= spapr_hypercall(cpu
,
1270 run
->papr_hcall
.args
);
1275 DPRINTF("handle epr\n");
1276 run
->epr
.epr
= ldl_phys(cs
->as
, env
->mpic_iack
);
1279 case KVM_EXIT_WATCHDOG
:
1280 DPRINTF("handle watchdog expiry\n");
1281 watchdog_perform_action();
1286 fprintf(stderr
, "KVM: unknown exit reason %d\n", run
->exit_reason
);
1294 int kvmppc_or_tsr_bits(PowerPCCPU
*cpu
, uint32_t tsr_bits
)
1296 CPUState
*cs
= CPU(cpu
);
1297 uint32_t bits
= tsr_bits
;
1298 struct kvm_one_reg reg
= {
1299 .id
= KVM_REG_PPC_OR_TSR
,
1300 .addr
= (uintptr_t) &bits
,
1303 return kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
1306 int kvmppc_clear_tsr_bits(PowerPCCPU
*cpu
, uint32_t tsr_bits
)
1309 CPUState
*cs
= CPU(cpu
);
1310 uint32_t bits
= tsr_bits
;
1311 struct kvm_one_reg reg
= {
1312 .id
= KVM_REG_PPC_CLEAR_TSR
,
1313 .addr
= (uintptr_t) &bits
,
1316 return kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
1319 int kvmppc_set_tcr(PowerPCCPU
*cpu
)
1321 CPUState
*cs
= CPU(cpu
);
1322 CPUPPCState
*env
= &cpu
->env
;
1323 uint32_t tcr
= env
->spr
[SPR_BOOKE_TCR
];
1325 struct kvm_one_reg reg
= {
1326 .id
= KVM_REG_PPC_TCR
,
1327 .addr
= (uintptr_t) &tcr
,
1330 return kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
1333 int kvmppc_booke_watchdog_enable(PowerPCCPU
*cpu
)
1335 CPUState
*cs
= CPU(cpu
);
1338 if (!kvm_enabled()) {
1342 if (!cap_ppc_watchdog
) {
1343 printf("warning: KVM does not support watchdog");
1347 ret
= kvm_vcpu_enable_cap(cs
, KVM_CAP_PPC_BOOKE_WATCHDOG
, 0);
1349 fprintf(stderr
, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1350 __func__
, strerror(-ret
));
1357 static int read_cpuinfo(const char *field
, char *value
, int len
)
1361 int field_len
= strlen(field
);
1364 f
= fopen("/proc/cpuinfo", "r");
1370 if(!fgets(line
, sizeof(line
), f
)) {
1373 if (!strncmp(line
, field
, field_len
)) {
1374 pstrcpy(value
, len
, line
);
1385 uint32_t kvmppc_get_tbfreq(void)
1389 uint32_t retval
= get_ticks_per_sec();
1391 if (read_cpuinfo("timebase", line
, sizeof(line
))) {
1395 if (!(ns
= strchr(line
, ':'))) {
1405 /* Try to find a device tree node for a CPU with clock-frequency property */
1406 static int kvmppc_find_cpu_dt(char *buf
, int buf_len
)
1408 struct dirent
*dirp
;
1411 if ((dp
= opendir(PROC_DEVTREE_CPU
)) == NULL
) {
1412 printf("Can't open directory " PROC_DEVTREE_CPU
"\n");
1417 while ((dirp
= readdir(dp
)) != NULL
) {
1419 snprintf(buf
, buf_len
, "%s%s/clock-frequency", PROC_DEVTREE_CPU
,
1421 f
= fopen(buf
, "r");
1423 snprintf(buf
, buf_len
, "%s%s", PROC_DEVTREE_CPU
, dirp
->d_name
);
1430 if (buf
[0] == '\0') {
1431 printf("Unknown host!\n");
1438 /* Read a CPU node property from the host device tree that's a single
1439 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1440 * (can't find or open the property, or doesn't understand the
1442 static uint64_t kvmppc_read_int_cpu_dt(const char *propname
)
1452 if (kvmppc_find_cpu_dt(buf
, sizeof(buf
))) {
1456 strncat(buf
, "/", sizeof(buf
) - strlen(buf
));
1457 strncat(buf
, propname
, sizeof(buf
) - strlen(buf
));
1459 f
= fopen(buf
, "rb");
1464 len
= fread(&u
, 1, sizeof(u
), f
);
1468 /* property is a 32-bit quantity */
1469 return be32_to_cpu(u
.v32
);
1471 return be64_to_cpu(u
.v64
);
1477 uint64_t kvmppc_get_clockfreq(void)
1479 return kvmppc_read_int_cpu_dt("clock-frequency");
1482 uint32_t kvmppc_get_vmx(void)
1484 return kvmppc_read_int_cpu_dt("ibm,vmx");
1487 uint32_t kvmppc_get_dfp(void)
1489 return kvmppc_read_int_cpu_dt("ibm,dfp");
1492 static int kvmppc_get_pvinfo(CPUPPCState
*env
, struct kvm_ppc_pvinfo
*pvinfo
)
1494 PowerPCCPU
*cpu
= ppc_env_get_cpu(env
);
1495 CPUState
*cs
= CPU(cpu
);
1497 if (kvm_check_extension(cs
->kvm_state
, KVM_CAP_PPC_GET_PVINFO
) &&
1498 !kvm_vm_ioctl(cs
->kvm_state
, KVM_PPC_GET_PVINFO
, pvinfo
)) {
1505 int kvmppc_get_hasidle(CPUPPCState
*env
)
1507 struct kvm_ppc_pvinfo pvinfo
;
1509 if (!kvmppc_get_pvinfo(env
, &pvinfo
) &&
1510 (pvinfo
.flags
& KVM_PPC_PVINFO_FLAGS_EV_IDLE
)) {
1517 int kvmppc_get_hypercall(CPUPPCState
*env
, uint8_t *buf
, int buf_len
)
1519 uint32_t *hc
= (uint32_t*)buf
;
1520 struct kvm_ppc_pvinfo pvinfo
;
1522 if (!kvmppc_get_pvinfo(env
, &pvinfo
)) {
1523 memcpy(buf
, pvinfo
.hcall
, buf_len
);
1528 * Fallback to always fail hypercalls regardless of endianness:
1530 * tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
1532 * b .+8 (becomes nop in wrong endian)
1533 * bswap32(li r3, -1)
1536 hc
[0] = cpu_to_be32(0x08000048);
1537 hc
[1] = cpu_to_be32(0x3860ffff);
1538 hc
[2] = cpu_to_be32(0x48000008);
1539 hc
[3] = cpu_to_be32(bswap32(0x3860ffff));
1544 void kvmppc_set_papr(PowerPCCPU
*cpu
)
1546 CPUState
*cs
= CPU(cpu
);
1549 ret
= kvm_vcpu_enable_cap(cs
, KVM_CAP_PPC_PAPR
, 0);
1551 cpu_abort(cs
, "This KVM version does not support PAPR\n");
1554 /* Update the capability flag so we sync the right information
1559 int kvmppc_set_compat(PowerPCCPU
*cpu
, uint32_t cpu_version
)
1561 return kvm_set_one_reg(CPU(cpu
), KVM_REG_PPC_ARCH_COMPAT
, &cpu_version
);
1564 void kvmppc_set_mpic_proxy(PowerPCCPU
*cpu
, int mpic_proxy
)
1566 CPUState
*cs
= CPU(cpu
);
1569 ret
= kvm_vcpu_enable_cap(cs
, KVM_CAP_PPC_EPR
, 0, mpic_proxy
);
1570 if (ret
&& mpic_proxy
) {
1571 cpu_abort(cs
, "This KVM version does not support EPR\n");
1575 int kvmppc_smt_threads(void)
1577 return cap_ppc_smt
? cap_ppc_smt
: 1;
1581 off_t
kvmppc_alloc_rma(const char *name
, MemoryRegion
*sysmem
)
1586 struct kvm_allocate_rma ret
;
1587 MemoryRegion
*rma_region
;
1589 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
1590 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
1591 * not necessary on this hardware
1592 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
1594 * FIXME: We should allow the user to force contiguous RMA
1595 * allocation in the cap_ppc_rma==1 case.
1597 if (cap_ppc_rma
< 2) {
1601 fd
= kvm_vm_ioctl(kvm_state
, KVM_ALLOCATE_RMA
, &ret
);
1603 fprintf(stderr
, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
1608 size
= MIN(ret
.rma_size
, 256ul << 20);
1610 rma
= mmap(NULL
, size
, PROT_READ
|PROT_WRITE
, MAP_SHARED
, fd
, 0);
1611 if (rma
== MAP_FAILED
) {
1612 fprintf(stderr
, "KVM: Error mapping RMA: %s\n", strerror(errno
));
1616 rma_region
= g_new(MemoryRegion
, 1);
1617 memory_region_init_ram_ptr(rma_region
, NULL
, name
, size
, rma
);
1618 vmstate_register_ram_global(rma_region
);
1619 memory_region_add_subregion(sysmem
, 0, rma_region
);
1624 uint64_t kvmppc_rma_size(uint64_t current_size
, unsigned int hash_shift
)
1626 struct kvm_ppc_smmu_info info
;
1627 long rampagesize
, best_page_shift
;
1630 if (cap_ppc_rma
>= 2) {
1631 return current_size
;
1634 /* Find the largest hardware supported page size that's less than
1635 * or equal to the (logical) backing page size of guest RAM */
1636 kvm_get_smmu_info(POWERPC_CPU(first_cpu
), &info
);
1637 rampagesize
= getrampagesize();
1638 best_page_shift
= 0;
1640 for (i
= 0; i
< KVM_PPC_PAGE_SIZES_MAX_SZ
; i
++) {
1641 struct kvm_ppc_one_seg_page_size
*sps
= &info
.sps
[i
];
1643 if (!sps
->page_shift
) {
1647 if ((sps
->page_shift
> best_page_shift
)
1648 && ((1UL << sps
->page_shift
) <= rampagesize
)) {
1649 best_page_shift
= sps
->page_shift
;
1653 return MIN(current_size
,
1654 1ULL << (best_page_shift
+ hash_shift
- 7));
1658 bool kvmppc_spapr_use_multitce(void)
1660 return cap_spapr_multitce
;
1663 void *kvmppc_create_spapr_tce(uint32_t liobn
, uint32_t window_size
, int *pfd
)
1665 struct kvm_create_spapr_tce args
= {
1667 .window_size
= window_size
,
1673 /* Must set fd to -1 so we don't try to munmap when called for
1674 * destroying the table, which the upper layers -will- do
1677 if (!cap_spapr_tce
) {
1681 fd
= kvm_vm_ioctl(kvm_state
, KVM_CREATE_SPAPR_TCE
, &args
);
1683 fprintf(stderr
, "KVM: Failed to create TCE table for liobn 0x%x\n",
1688 len
= (window_size
/ SPAPR_TCE_PAGE_SIZE
) * sizeof(uint64_t);
1689 /* FIXME: round this up to page size */
1691 table
= mmap(NULL
, len
, PROT_READ
|PROT_WRITE
, MAP_SHARED
, fd
, 0);
1692 if (table
== MAP_FAILED
) {
1693 fprintf(stderr
, "KVM: Failed to map TCE table for liobn 0x%x\n",
1703 int kvmppc_remove_spapr_tce(void *table
, int fd
, uint32_t nb_table
)
1711 len
= nb_table
* sizeof(uint64_t);
1712 if ((munmap(table
, len
) < 0) ||
1714 fprintf(stderr
, "KVM: Unexpected error removing TCE table: %s",
1716 /* Leak the table */
1722 int kvmppc_reset_htab(int shift_hint
)
1724 uint32_t shift
= shift_hint
;
1726 if (!kvm_enabled()) {
1727 /* Full emulation, tell caller to allocate htab itself */
1730 if (kvm_check_extension(kvm_state
, KVM_CAP_PPC_ALLOC_HTAB
)) {
1732 ret
= kvm_vm_ioctl(kvm_state
, KVM_PPC_ALLOCATE_HTAB
, &shift
);
1733 if (ret
== -ENOTTY
) {
1734 /* At least some versions of PR KVM advertise the
1735 * capability, but don't implement the ioctl(). Oops.
1736 * Return 0 so that we allocate the htab in qemu, as is
1737 * correct for PR. */
1739 } else if (ret
< 0) {
1745 /* We have a kernel that predates the htab reset calls. For PR
1746 * KVM, we need to allocate the htab ourselves, for an HV KVM of
1747 * this era, it has allocated a 16MB fixed size hash table
1748 * already. Kernels of this era have the GET_PVINFO capability
1749 * only on PR, so we use this hack to determine the right
1751 if (kvm_check_extension(kvm_state
, KVM_CAP_PPC_GET_PVINFO
)) {
1752 /* PR - tell caller to allocate htab */
1755 /* HV - assume 16MB kernel allocated htab */
1760 static inline uint32_t mfpvr(void)
1769 static void alter_insns(uint64_t *word
, uint64_t flags
, bool on
)
1778 static void kvmppc_host_cpu_initfn(Object
*obj
)
1780 assert(kvm_enabled());
1783 static void kvmppc_host_cpu_class_init(ObjectClass
*oc
, void *data
)
1785 PowerPCCPUClass
*pcc
= POWERPC_CPU_CLASS(oc
);
1786 uint32_t vmx
= kvmppc_get_vmx();
1787 uint32_t dfp
= kvmppc_get_dfp();
1788 uint32_t dcache_size
= kvmppc_read_int_cpu_dt("d-cache-size");
1789 uint32_t icache_size
= kvmppc_read_int_cpu_dt("i-cache-size");
1791 /* Now fix up the class with information we can query from the host */
1795 /* Only override when we know what the host supports */
1796 alter_insns(&pcc
->insns_flags
, PPC_ALTIVEC
, vmx
> 0);
1797 alter_insns(&pcc
->insns_flags2
, PPC2_VSX
, vmx
> 1);
1800 /* Only override when we know what the host supports */
1801 alter_insns(&pcc
->insns_flags2
, PPC2_DFP
, dfp
);
1804 if (dcache_size
!= -1) {
1805 pcc
->l1_dcache_size
= dcache_size
;
1808 if (icache_size
!= -1) {
1809 pcc
->l1_icache_size
= icache_size
;
1813 bool kvmppc_has_cap_epr(void)
1818 bool kvmppc_has_cap_htab_fd(void)
1823 bool kvmppc_has_cap_fixup_hcalls(void)
1825 return cap_fixup_hcalls
;
1828 static PowerPCCPUClass
*ppc_cpu_get_family_class(PowerPCCPUClass
*pcc
)
1830 ObjectClass
*oc
= OBJECT_CLASS(pcc
);
1832 while (oc
&& !object_class_is_abstract(oc
)) {
1833 oc
= object_class_get_parent(oc
);
1837 return POWERPC_CPU_CLASS(oc
);
1840 static int kvm_ppc_register_host_cpu_type(void)
1842 TypeInfo type_info
= {
1843 .name
= TYPE_HOST_POWERPC_CPU
,
1844 .instance_init
= kvmppc_host_cpu_initfn
,
1845 .class_init
= kvmppc_host_cpu_class_init
,
1847 uint32_t host_pvr
= mfpvr();
1848 PowerPCCPUClass
*pvr_pcc
;
1851 pvr_pcc
= ppc_cpu_class_by_pvr(host_pvr
);
1852 if (pvr_pcc
== NULL
) {
1853 pvr_pcc
= ppc_cpu_class_by_pvr_mask(host_pvr
);
1855 if (pvr_pcc
== NULL
) {
1858 type_info
.parent
= object_class_get_name(OBJECT_CLASS(pvr_pcc
));
1859 type_register(&type_info
);
1861 /* Register generic family CPU class for a family */
1862 pvr_pcc
= ppc_cpu_get_family_class(pvr_pcc
);
1863 dc
= DEVICE_CLASS(pvr_pcc
);
1864 type_info
.parent
= object_class_get_name(OBJECT_CLASS(pvr_pcc
));
1865 type_info
.name
= g_strdup_printf("%s-"TYPE_POWERPC_CPU
, dc
->desc
);
1866 type_register(&type_info
);
1871 int kvmppc_define_rtas_kernel_token(uint32_t token
, const char *function
)
1873 struct kvm_rtas_token_args args
= {
1877 if (!kvm_check_extension(kvm_state
, KVM_CAP_PPC_RTAS
)) {
1881 strncpy(args
.name
, function
, sizeof(args
.name
));
1883 return kvm_vm_ioctl(kvm_state
, KVM_PPC_RTAS_DEFINE_TOKEN
, &args
);
1886 int kvmppc_get_htab_fd(bool write
)
1888 struct kvm_get_htab_fd s
= {
1889 .flags
= write
? KVM_GET_HTAB_WRITE
: 0,
1894 fprintf(stderr
, "KVM version doesn't support saving the hash table\n");
1898 return kvm_vm_ioctl(kvm_state
, KVM_PPC_GET_HTAB_FD
, &s
);
1901 int kvmppc_save_htab(QEMUFile
*f
, int fd
, size_t bufsize
, int64_t max_ns
)
1903 int64_t starttime
= qemu_clock_get_ns(QEMU_CLOCK_REALTIME
);
1904 uint8_t buf
[bufsize
];
1908 rc
= read(fd
, buf
, bufsize
);
1910 fprintf(stderr
, "Error reading data from KVM HTAB fd: %s\n",
1914 /* Kernel already retuns data in BE format for the file */
1915 qemu_put_buffer(f
, buf
, rc
);
1919 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME
) - starttime
) < max_ns
)));
1921 return (rc
== 0) ? 1 : 0;
1924 int kvmppc_load_htab_chunk(QEMUFile
*f
, int fd
, uint32_t index
,
1925 uint16_t n_valid
, uint16_t n_invalid
)
1927 struct kvm_get_htab_header
*buf
;
1928 size_t chunksize
= sizeof(*buf
) + n_valid
*HASH_PTE_SIZE_64
;
1931 buf
= alloca(chunksize
);
1932 /* This is KVM on ppc, so this is all big-endian */
1934 buf
->n_valid
= n_valid
;
1935 buf
->n_invalid
= n_invalid
;
1937 qemu_get_buffer(f
, (void *)(buf
+ 1), HASH_PTE_SIZE_64
*n_valid
);
1939 rc
= write(fd
, buf
, chunksize
);
1941 fprintf(stderr
, "Error writing KVM hash table: %s\n",
1945 if (rc
!= chunksize
) {
1946 /* We should never get a short write on a single chunk */
1947 fprintf(stderr
, "Short write, restoring KVM hash table\n");
1953 bool kvm_arch_stop_on_emulation_error(CPUState
*cpu
)
1958 int kvm_arch_on_sigbus_vcpu(CPUState
*cpu
, int code
, void *addr
)
1963 int kvm_arch_on_sigbus(int code
, void *addr
)
1968 void kvm_arch_init_irq_routing(KVMState
*s
)
1972 int kvm_arch_insert_sw_breakpoint(CPUState
*cpu
, struct kvm_sw_breakpoint
*bp
)
1977 int kvm_arch_remove_sw_breakpoint(CPUState
*cpu
, struct kvm_sw_breakpoint
*bp
)
1982 int kvm_arch_insert_hw_breakpoint(target_ulong addr
, target_ulong len
, int type
)
1987 int kvm_arch_remove_hw_breakpoint(target_ulong addr
, target_ulong len
, int type
)
1992 void kvm_arch_remove_all_hw_breakpoints(void)
1996 void kvm_arch_update_guest_debug(CPUState
*cpu
, struct kvm_guest_debug
*dbg
)
2000 struct kvm_get_htab_buf
{
2001 struct kvm_get_htab_header header
;
2003 * We require one extra byte for read
2005 target_ulong hpte
[(HPTES_PER_GROUP
* 2) + 1];
2008 uint64_t kvmppc_hash64_read_pteg(PowerPCCPU
*cpu
, target_ulong pte_index
)
2011 struct kvm_get_htab_fd ghf
;
2012 struct kvm_get_htab_buf
*hpte_buf
;
2015 ghf
.start_index
= pte_index
;
2016 htab_fd
= kvm_vm_ioctl(kvm_state
, KVM_PPC_GET_HTAB_FD
, &ghf
);
2021 hpte_buf
= g_malloc0(sizeof(*hpte_buf
));
2023 * Read the hpte group
2025 if (read(htab_fd
, hpte_buf
, sizeof(*hpte_buf
)) < 0) {
2030 return (uint64_t)(uintptr_t) hpte_buf
->hpte
;
2039 void kvmppc_hash64_free_pteg(uint64_t token
)
2041 struct kvm_get_htab_buf
*htab_buf
;
2043 htab_buf
= container_of((void *)(uintptr_t) token
, struct kvm_get_htab_buf
,
2049 void kvmppc_hash64_write_pte(CPUPPCState
*env
, target_ulong pte_index
,
2050 target_ulong pte0
, target_ulong pte1
)
2053 struct kvm_get_htab_fd ghf
;
2054 struct kvm_get_htab_buf hpte_buf
;
2057 ghf
.start_index
= 0; /* Ignored */
2058 htab_fd
= kvm_vm_ioctl(kvm_state
, KVM_PPC_GET_HTAB_FD
, &ghf
);
2063 hpte_buf
.header
.n_valid
= 1;
2064 hpte_buf
.header
.n_invalid
= 0;
2065 hpte_buf
.header
.index
= pte_index
;
2066 hpte_buf
.hpte
[0] = pte0
;
2067 hpte_buf
.hpte
[1] = pte1
;
2069 * Write the hpte entry.
2070 * CAUTION: write() has the warn_unused_result attribute. Hence we
2071 * need to check the return value, even though we do nothing.
2073 if (write(htab_fd
, &hpte_buf
, sizeof(hpte_buf
)) < 0) {