2 * PowerPC implementation of KVM hooks
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
18 #include <sys/types.h>
19 #include <sys/ioctl.h>
23 #include <linux/kvm.h>
25 #include "qemu-common.h"
26 #include "qemu/timer.h"
27 #include "sysemu/sysemu.h"
28 #include "sysemu/kvm.h"
31 #include "sysemu/cpus.h"
32 #include "sysemu/device_tree.h"
33 #include "mmu-hash64.h"
35 #include "hw/sysbus.h"
36 #include "hw/ppc/spapr.h"
37 #include "hw/ppc/spapr_vio.h"
38 #include "hw/ppc/ppc.h"
39 #include "sysemu/watchdog.h"
45 #define DPRINTF(fmt, ...) \
46 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
48 #define DPRINTF(fmt, ...) \
52 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
54 const KVMCapabilityInfo kvm_arch_required_capabilities
[] = {
58 static int cap_interrupt_unset
= false;
59 static int cap_interrupt_level
= false;
60 static int cap_segstate
;
61 static int cap_booke_sregs
;
62 static int cap_ppc_smt
;
63 static int cap_ppc_rma
;
64 static int cap_spapr_tce
;
65 static int cap_spapr_multitce
;
66 static int cap_spapr_vfio
;
68 static int cap_one_reg
;
70 static int cap_ppc_watchdog
;
72 static int cap_htab_fd
;
73 static int cap_fixup_hcalls
;
75 /* XXX We have a race condition where we actually have a level triggered
76 * interrupt, but the infrastructure can't expose that yet, so the guest
77 * takes but ignores it, goes to sleep and never gets notified that there's
78 * still an interrupt pending.
80 * As a quick workaround, let's just wake up again 20 ms after we injected
81 * an interrupt. That way we can assure that we're always reinjecting
82 * interrupts in case the guest swallowed them.
84 static QEMUTimer
*idle_timer
;
86 static void kvm_kick_cpu(void *opaque
)
88 PowerPCCPU
*cpu
= opaque
;
90 qemu_cpu_kick(CPU(cpu
));
93 static int kvm_ppc_register_host_cpu_type(void);
95 int kvm_arch_init(KVMState
*s
)
97 cap_interrupt_unset
= kvm_check_extension(s
, KVM_CAP_PPC_UNSET_IRQ
);
98 cap_interrupt_level
= kvm_check_extension(s
, KVM_CAP_PPC_IRQ_LEVEL
);
99 cap_segstate
= kvm_check_extension(s
, KVM_CAP_PPC_SEGSTATE
);
100 cap_booke_sregs
= kvm_check_extension(s
, KVM_CAP_PPC_BOOKE_SREGS
);
101 cap_ppc_smt
= kvm_check_extension(s
, KVM_CAP_PPC_SMT
);
102 cap_ppc_rma
= kvm_check_extension(s
, KVM_CAP_PPC_RMA
);
103 cap_spapr_tce
= kvm_check_extension(s
, KVM_CAP_SPAPR_TCE
);
104 cap_spapr_multitce
= kvm_check_extension(s
, KVM_CAP_SPAPR_MULTITCE
);
105 cap_spapr_vfio
= false;
106 cap_one_reg
= kvm_check_extension(s
, KVM_CAP_ONE_REG
);
107 cap_hior
= kvm_check_extension(s
, KVM_CAP_PPC_HIOR
);
108 cap_epr
= kvm_check_extension(s
, KVM_CAP_PPC_EPR
);
109 cap_ppc_watchdog
= kvm_check_extension(s
, KVM_CAP_PPC_BOOKE_WATCHDOG
);
110 /* Note: we don't set cap_papr here, because this capability is
111 * only activated after this by kvmppc_set_papr() */
112 cap_htab_fd
= kvm_check_extension(s
, KVM_CAP_PPC_HTAB_FD
);
113 cap_fixup_hcalls
= kvm_check_extension(s
, KVM_CAP_PPC_FIXUP_HCALL
);
115 if (!cap_interrupt_level
) {
116 fprintf(stderr
, "KVM: Couldn't find level irq capability. Expect the "
117 "VM to stall at times!\n");
120 kvm_ppc_register_host_cpu_type();
125 static int kvm_arch_sync_sregs(PowerPCCPU
*cpu
)
127 CPUPPCState
*cenv
= &cpu
->env
;
128 CPUState
*cs
= CPU(cpu
);
129 struct kvm_sregs sregs
;
132 if (cenv
->excp_model
== POWERPC_EXCP_BOOKE
) {
133 /* What we're really trying to say is "if we're on BookE, we use
134 the native PVR for now". This is the only sane way to check
135 it though, so we potentially confuse users that they can run
136 BookE guests on BookS. Let's hope nobody dares enough :) */
140 fprintf(stderr
, "kvm error: missing PVR setting capability\n");
145 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_SREGS
, &sregs
);
150 sregs
.pvr
= cenv
->spr
[SPR_PVR
];
151 return kvm_vcpu_ioctl(cs
, KVM_SET_SREGS
, &sregs
);
154 /* Set up a shared TLB array with KVM */
155 static int kvm_booke206_tlb_init(PowerPCCPU
*cpu
)
157 CPUPPCState
*env
= &cpu
->env
;
158 CPUState
*cs
= CPU(cpu
);
159 struct kvm_book3e_206_tlb_params params
= {};
160 struct kvm_config_tlb cfg
= {};
161 unsigned int entries
= 0;
164 if (!kvm_enabled() ||
165 !kvm_check_extension(cs
->kvm_state
, KVM_CAP_SW_TLB
)) {
169 assert(ARRAY_SIZE(params
.tlb_sizes
) == BOOKE206_MAX_TLBN
);
171 for (i
= 0; i
< BOOKE206_MAX_TLBN
; i
++) {
172 params
.tlb_sizes
[i
] = booke206_tlb_size(env
, i
);
173 params
.tlb_ways
[i
] = booke206_tlb_ways(env
, i
);
174 entries
+= params
.tlb_sizes
[i
];
177 assert(entries
== env
->nb_tlb
);
178 assert(sizeof(struct kvm_book3e_206_tlb_entry
) == sizeof(ppcmas_tlb_t
));
180 env
->tlb_dirty
= true;
182 cfg
.array
= (uintptr_t)env
->tlb
.tlbm
;
183 cfg
.array_len
= sizeof(ppcmas_tlb_t
) * entries
;
184 cfg
.params
= (uintptr_t)¶ms
;
185 cfg
.mmu_type
= KVM_MMU_FSL_BOOKE_NOHV
;
187 ret
= kvm_vcpu_enable_cap(cs
, KVM_CAP_SW_TLB
, 0, (uintptr_t)&cfg
);
189 fprintf(stderr
, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
190 __func__
, strerror(-ret
));
194 env
->kvm_sw_tlb
= true;
199 #if defined(TARGET_PPC64)
200 static void kvm_get_fallback_smmu_info(PowerPCCPU
*cpu
,
201 struct kvm_ppc_smmu_info
*info
)
203 CPUPPCState
*env
= &cpu
->env
;
204 CPUState
*cs
= CPU(cpu
);
206 memset(info
, 0, sizeof(*info
));
208 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
209 * need to "guess" what the supported page sizes are.
211 * For that to work we make a few assumptions:
213 * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
214 * KVM which only supports 4K and 16M pages, but supports them
215 * regardless of the backing store characteritics. We also don't
216 * support 1T segments.
218 * This is safe as if HV KVM ever supports that capability or PR
219 * KVM grows supports for more page/segment sizes, those versions
220 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
221 * will not hit this fallback
223 * - Else we are running HV KVM. This means we only support page
224 * sizes that fit in the backing store. Additionally we only
225 * advertize 64K pages if the processor is ARCH 2.06 and we assume
226 * P7 encodings for the SLB and hash table. Here too, we assume
227 * support for any newer processor will mean a kernel that
228 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
231 if (kvm_check_extension(cs
->kvm_state
, KVM_CAP_PPC_GET_PVINFO
)) {
236 /* Standard 4k base page size segment */
237 info
->sps
[0].page_shift
= 12;
238 info
->sps
[0].slb_enc
= 0;
239 info
->sps
[0].enc
[0].page_shift
= 12;
240 info
->sps
[0].enc
[0].pte_enc
= 0;
242 /* Standard 16M large page size segment */
243 info
->sps
[1].page_shift
= 24;
244 info
->sps
[1].slb_enc
= SLB_VSID_L
;
245 info
->sps
[1].enc
[0].page_shift
= 24;
246 info
->sps
[1].enc
[0].pte_enc
= 0;
250 /* HV KVM has backing store size restrictions */
251 info
->flags
= KVM_PPC_PAGE_SIZES_REAL
;
253 if (env
->mmu_model
& POWERPC_MMU_1TSEG
) {
254 info
->flags
|= KVM_PPC_1T_SEGMENTS
;
257 if (env
->mmu_model
== POWERPC_MMU_2_06
) {
263 /* Standard 4k base page size segment */
264 info
->sps
[i
].page_shift
= 12;
265 info
->sps
[i
].slb_enc
= 0;
266 info
->sps
[i
].enc
[0].page_shift
= 12;
267 info
->sps
[i
].enc
[0].pte_enc
= 0;
270 /* 64K on MMU 2.06 */
271 if (env
->mmu_model
== POWERPC_MMU_2_06
) {
272 info
->sps
[i
].page_shift
= 16;
273 info
->sps
[i
].slb_enc
= 0x110;
274 info
->sps
[i
].enc
[0].page_shift
= 16;
275 info
->sps
[i
].enc
[0].pte_enc
= 1;
279 /* Standard 16M large page size segment */
280 info
->sps
[i
].page_shift
= 24;
281 info
->sps
[i
].slb_enc
= SLB_VSID_L
;
282 info
->sps
[i
].enc
[0].page_shift
= 24;
283 info
->sps
[i
].enc
[0].pte_enc
= 0;
287 static void kvm_get_smmu_info(PowerPCCPU
*cpu
, struct kvm_ppc_smmu_info
*info
)
289 CPUState
*cs
= CPU(cpu
);
292 if (kvm_check_extension(cs
->kvm_state
, KVM_CAP_PPC_GET_SMMU_INFO
)) {
293 ret
= kvm_vm_ioctl(cs
->kvm_state
, KVM_PPC_GET_SMMU_INFO
, info
);
299 kvm_get_fallback_smmu_info(cpu
, info
);
302 static long getrampagesize(void)
308 /* guest RAM is backed by normal anonymous pages */
309 return getpagesize();
313 ret
= statfs(mem_path
, &fs
);
314 } while (ret
!= 0 && errno
== EINTR
);
317 fprintf(stderr
, "Couldn't statfs() memory path: %s\n",
322 #define HUGETLBFS_MAGIC 0x958458f6
324 if (fs
.f_type
!= HUGETLBFS_MAGIC
) {
325 /* Explicit mempath, but it's ordinary pages */
326 return getpagesize();
329 /* It's hugepage, return the huge page size */
333 static bool kvm_valid_page_size(uint32_t flags
, long rampgsize
, uint32_t shift
)
335 if (!(flags
& KVM_PPC_PAGE_SIZES_REAL
)) {
339 return (1ul << shift
) <= rampgsize
;
342 static void kvm_fixup_page_sizes(PowerPCCPU
*cpu
)
344 static struct kvm_ppc_smmu_info smmu_info
;
345 static bool has_smmu_info
;
346 CPUPPCState
*env
= &cpu
->env
;
350 /* We only handle page sizes for 64-bit server guests for now */
351 if (!(env
->mmu_model
& POWERPC_MMU_64
)) {
355 /* Collect MMU info from kernel if not already */
356 if (!has_smmu_info
) {
357 kvm_get_smmu_info(cpu
, &smmu_info
);
358 has_smmu_info
= true;
361 rampagesize
= getrampagesize();
363 /* Convert to QEMU form */
364 memset(&env
->sps
, 0, sizeof(env
->sps
));
367 * XXX This loop should be an entry wide AND of the capabilities that
368 * the selected CPU has with the capabilities that KVM supports.
370 for (ik
= iq
= 0; ik
< KVM_PPC_PAGE_SIZES_MAX_SZ
; ik
++) {
371 struct ppc_one_seg_page_size
*qsps
= &env
->sps
.sps
[iq
];
372 struct kvm_ppc_one_seg_page_size
*ksps
= &smmu_info
.sps
[ik
];
374 if (!kvm_valid_page_size(smmu_info
.flags
, rampagesize
,
378 qsps
->page_shift
= ksps
->page_shift
;
379 qsps
->slb_enc
= ksps
->slb_enc
;
380 for (jk
= jq
= 0; jk
< KVM_PPC_PAGE_SIZES_MAX_SZ
; jk
++) {
381 if (!kvm_valid_page_size(smmu_info
.flags
, rampagesize
,
382 ksps
->enc
[jk
].page_shift
)) {
385 qsps
->enc
[jq
].page_shift
= ksps
->enc
[jk
].page_shift
;
386 qsps
->enc
[jq
].pte_enc
= ksps
->enc
[jk
].pte_enc
;
387 if (++jq
>= PPC_PAGE_SIZES_MAX_SZ
) {
391 if (++iq
>= PPC_PAGE_SIZES_MAX_SZ
) {
395 env
->slb_nr
= smmu_info
.slb_size
;
396 if (!(smmu_info
.flags
& KVM_PPC_1T_SEGMENTS
)) {
397 env
->mmu_model
&= ~POWERPC_MMU_1TSEG
;
400 #else /* defined (TARGET_PPC64) */
402 static inline void kvm_fixup_page_sizes(PowerPCCPU
*cpu
)
406 #endif /* !defined (TARGET_PPC64) */
408 unsigned long kvm_arch_vcpu_id(CPUState
*cpu
)
410 return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu
));
413 int kvm_arch_init_vcpu(CPUState
*cs
)
415 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
416 CPUPPCState
*cenv
= &cpu
->env
;
419 /* Gather server mmu info from KVM and update the CPU state */
420 kvm_fixup_page_sizes(cpu
);
422 /* Synchronize sregs with kvm */
423 ret
= kvm_arch_sync_sregs(cpu
);
428 idle_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL
, kvm_kick_cpu
, cpu
);
430 /* Some targets support access to KVM's guest TLB. */
431 switch (cenv
->mmu_model
) {
432 case POWERPC_MMU_BOOKE206
:
433 ret
= kvm_booke206_tlb_init(cpu
);
442 static void kvm_sw_tlb_put(PowerPCCPU
*cpu
)
444 CPUPPCState
*env
= &cpu
->env
;
445 CPUState
*cs
= CPU(cpu
);
446 struct kvm_dirty_tlb dirty_tlb
;
447 unsigned char *bitmap
;
450 if (!env
->kvm_sw_tlb
) {
454 bitmap
= g_malloc((env
->nb_tlb
+ 7) / 8);
455 memset(bitmap
, 0xFF, (env
->nb_tlb
+ 7) / 8);
457 dirty_tlb
.bitmap
= (uintptr_t)bitmap
;
458 dirty_tlb
.num_dirty
= env
->nb_tlb
;
460 ret
= kvm_vcpu_ioctl(cs
, KVM_DIRTY_TLB
, &dirty_tlb
);
462 fprintf(stderr
, "%s: KVM_DIRTY_TLB: %s\n",
463 __func__
, strerror(-ret
));
469 static void kvm_get_one_spr(CPUState
*cs
, uint64_t id
, int spr
)
471 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
472 CPUPPCState
*env
= &cpu
->env
;
477 struct kvm_one_reg reg
= {
479 .addr
= (uintptr_t) &val
,
483 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
485 trace_kvm_failed_spr_get(spr
, strerror(errno
));
487 switch (id
& KVM_REG_SIZE_MASK
) {
488 case KVM_REG_SIZE_U32
:
489 env
->spr
[spr
] = val
.u32
;
492 case KVM_REG_SIZE_U64
:
493 env
->spr
[spr
] = val
.u64
;
497 /* Don't handle this size yet */
503 static void kvm_put_one_spr(CPUState
*cs
, uint64_t id
, int spr
)
505 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
506 CPUPPCState
*env
= &cpu
->env
;
511 struct kvm_one_reg reg
= {
513 .addr
= (uintptr_t) &val
,
517 switch (id
& KVM_REG_SIZE_MASK
) {
518 case KVM_REG_SIZE_U32
:
519 val
.u32
= env
->spr
[spr
];
522 case KVM_REG_SIZE_U64
:
523 val
.u64
= env
->spr
[spr
];
527 /* Don't handle this size yet */
531 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
533 trace_kvm_failed_spr_set(spr
, strerror(errno
));
537 static int kvm_put_fp(CPUState
*cs
)
539 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
540 CPUPPCState
*env
= &cpu
->env
;
541 struct kvm_one_reg reg
;
545 if (env
->insns_flags
& PPC_FLOAT
) {
546 uint64_t fpscr
= env
->fpscr
;
547 bool vsx
= !!(env
->insns_flags2
& PPC2_VSX
);
549 reg
.id
= KVM_REG_PPC_FPSCR
;
550 reg
.addr
= (uintptr_t)&fpscr
;
551 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
553 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno
));
557 for (i
= 0; i
< 32; i
++) {
560 vsr
[0] = float64_val(env
->fpr
[i
]);
561 vsr
[1] = env
->vsr
[i
];
562 reg
.addr
= (uintptr_t) &vsr
;
563 reg
.id
= vsx
? KVM_REG_PPC_VSR(i
) : KVM_REG_PPC_FPR(i
);
565 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
567 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx
? "VSR" : "FPR",
574 if (env
->insns_flags
& PPC_ALTIVEC
) {
575 reg
.id
= KVM_REG_PPC_VSCR
;
576 reg
.addr
= (uintptr_t)&env
->vscr
;
577 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
579 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno
));
583 for (i
= 0; i
< 32; i
++) {
584 reg
.id
= KVM_REG_PPC_VR(i
);
585 reg
.addr
= (uintptr_t)&env
->avr
[i
];
586 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
588 DPRINTF("Unable to set VR%d to KVM: %s\n", i
, strerror(errno
));
597 static int kvm_get_fp(CPUState
*cs
)
599 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
600 CPUPPCState
*env
= &cpu
->env
;
601 struct kvm_one_reg reg
;
605 if (env
->insns_flags
& PPC_FLOAT
) {
607 bool vsx
= !!(env
->insns_flags2
& PPC2_VSX
);
609 reg
.id
= KVM_REG_PPC_FPSCR
;
610 reg
.addr
= (uintptr_t)&fpscr
;
611 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
613 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno
));
619 for (i
= 0; i
< 32; i
++) {
622 reg
.addr
= (uintptr_t) &vsr
;
623 reg
.id
= vsx
? KVM_REG_PPC_VSR(i
) : KVM_REG_PPC_FPR(i
);
625 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
627 DPRINTF("Unable to get %s%d from KVM: %s\n",
628 vsx
? "VSR" : "FPR", i
, strerror(errno
));
631 env
->fpr
[i
] = vsr
[0];
633 env
->vsr
[i
] = vsr
[1];
639 if (env
->insns_flags
& PPC_ALTIVEC
) {
640 reg
.id
= KVM_REG_PPC_VSCR
;
641 reg
.addr
= (uintptr_t)&env
->vscr
;
642 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
644 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno
));
648 for (i
= 0; i
< 32; i
++) {
649 reg
.id
= KVM_REG_PPC_VR(i
);
650 reg
.addr
= (uintptr_t)&env
->avr
[i
];
651 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
653 DPRINTF("Unable to get VR%d from KVM: %s\n",
663 #if defined(TARGET_PPC64)
664 static int kvm_get_vpa(CPUState
*cs
)
666 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
667 CPUPPCState
*env
= &cpu
->env
;
668 struct kvm_one_reg reg
;
671 reg
.id
= KVM_REG_PPC_VPA_ADDR
;
672 reg
.addr
= (uintptr_t)&env
->vpa_addr
;
673 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
675 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno
));
679 assert((uintptr_t)&env
->slb_shadow_size
680 == ((uintptr_t)&env
->slb_shadow_addr
+ 8));
681 reg
.id
= KVM_REG_PPC_VPA_SLB
;
682 reg
.addr
= (uintptr_t)&env
->slb_shadow_addr
;
683 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
685 DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
690 assert((uintptr_t)&env
->dtl_size
== ((uintptr_t)&env
->dtl_addr
+ 8));
691 reg
.id
= KVM_REG_PPC_VPA_DTL
;
692 reg
.addr
= (uintptr_t)&env
->dtl_addr
;
693 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
695 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
703 static int kvm_put_vpa(CPUState
*cs
)
705 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
706 CPUPPCState
*env
= &cpu
->env
;
707 struct kvm_one_reg reg
;
710 /* SLB shadow or DTL can't be registered unless a master VPA is
711 * registered. That means when restoring state, if a VPA *is*
712 * registered, we need to set that up first. If not, we need to
713 * deregister the others before deregistering the master VPA */
714 assert(env
->vpa_addr
|| !(env
->slb_shadow_addr
|| env
->dtl_addr
));
717 reg
.id
= KVM_REG_PPC_VPA_ADDR
;
718 reg
.addr
= (uintptr_t)&env
->vpa_addr
;
719 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
721 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno
));
726 assert((uintptr_t)&env
->slb_shadow_size
727 == ((uintptr_t)&env
->slb_shadow_addr
+ 8));
728 reg
.id
= KVM_REG_PPC_VPA_SLB
;
729 reg
.addr
= (uintptr_t)&env
->slb_shadow_addr
;
730 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
732 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno
));
736 assert((uintptr_t)&env
->dtl_size
== ((uintptr_t)&env
->dtl_addr
+ 8));
737 reg
.id
= KVM_REG_PPC_VPA_DTL
;
738 reg
.addr
= (uintptr_t)&env
->dtl_addr
;
739 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
741 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
746 if (!env
->vpa_addr
) {
747 reg
.id
= KVM_REG_PPC_VPA_ADDR
;
748 reg
.addr
= (uintptr_t)&env
->vpa_addr
;
749 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
751 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno
));
758 #endif /* TARGET_PPC64 */
760 int kvm_arch_put_registers(CPUState
*cs
, int level
)
762 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
763 CPUPPCState
*env
= &cpu
->env
;
764 struct kvm_regs regs
;
768 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_REGS
, ®s
);
775 regs
.xer
= cpu_read_xer(env
);
779 regs
.srr0
= env
->spr
[SPR_SRR0
];
780 regs
.srr1
= env
->spr
[SPR_SRR1
];
782 regs
.sprg0
= env
->spr
[SPR_SPRG0
];
783 regs
.sprg1
= env
->spr
[SPR_SPRG1
];
784 regs
.sprg2
= env
->spr
[SPR_SPRG2
];
785 regs
.sprg3
= env
->spr
[SPR_SPRG3
];
786 regs
.sprg4
= env
->spr
[SPR_SPRG4
];
787 regs
.sprg5
= env
->spr
[SPR_SPRG5
];
788 regs
.sprg6
= env
->spr
[SPR_SPRG6
];
789 regs
.sprg7
= env
->spr
[SPR_SPRG7
];
791 regs
.pid
= env
->spr
[SPR_BOOKE_PID
];
793 for (i
= 0;i
< 32; i
++)
794 regs
.gpr
[i
] = env
->gpr
[i
];
797 for (i
= 0; i
< 8; i
++) {
798 regs
.cr
|= (env
->crf
[i
] & 15) << (4 * (7 - i
));
801 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_REGS
, ®s
);
807 if (env
->tlb_dirty
) {
809 env
->tlb_dirty
= false;
812 if (cap_segstate
&& (level
>= KVM_PUT_RESET_STATE
)) {
813 struct kvm_sregs sregs
;
815 sregs
.pvr
= env
->spr
[SPR_PVR
];
817 sregs
.u
.s
.sdr1
= env
->spr
[SPR_SDR1
];
821 for (i
= 0; i
< ARRAY_SIZE(env
->slb
); i
++) {
822 sregs
.u
.s
.ppc64
.slb
[i
].slbe
= env
->slb
[i
].esid
;
823 if (env
->slb
[i
].esid
& SLB_ESID_V
) {
824 sregs
.u
.s
.ppc64
.slb
[i
].slbe
|= i
;
826 sregs
.u
.s
.ppc64
.slb
[i
].slbv
= env
->slb
[i
].vsid
;
831 for (i
= 0; i
< 16; i
++) {
832 sregs
.u
.s
.ppc32
.sr
[i
] = env
->sr
[i
];
836 for (i
= 0; i
< 8; i
++) {
837 /* Beware. We have to swap upper and lower bits here */
838 sregs
.u
.s
.ppc32
.dbat
[i
] = ((uint64_t)env
->DBAT
[0][i
] << 32)
840 sregs
.u
.s
.ppc32
.ibat
[i
] = ((uint64_t)env
->IBAT
[0][i
] << 32)
844 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_SREGS
, &sregs
);
850 if (cap_hior
&& (level
>= KVM_PUT_RESET_STATE
)) {
851 kvm_put_one_spr(cs
, KVM_REG_PPC_HIOR
, SPR_HIOR
);
857 /* We deliberately ignore errors here, for kernels which have
858 * the ONE_REG calls, but don't support the specific
859 * registers, there's a reasonable chance things will still
860 * work, at least until we try to migrate. */
861 for (i
= 0; i
< 1024; i
++) {
862 uint64_t id
= env
->spr_cb
[i
].one_reg_id
;
865 kvm_put_one_spr(cs
, id
, i
);
871 for (i
= 0; i
< ARRAY_SIZE(env
->tm_gpr
); i
++) {
872 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_GPR(i
), &env
->tm_gpr
[i
]);
874 for (i
= 0; i
< ARRAY_SIZE(env
->tm_vsr
); i
++) {
875 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_VSR(i
), &env
->tm_vsr
[i
]);
877 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_CR
, &env
->tm_cr
);
878 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_LR
, &env
->tm_lr
);
879 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_CTR
, &env
->tm_ctr
);
880 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_FPSCR
, &env
->tm_fpscr
);
881 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_AMR
, &env
->tm_amr
);
882 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_PPR
, &env
->tm_ppr
);
883 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_VRSAVE
, &env
->tm_vrsave
);
884 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_VSCR
, &env
->tm_vscr
);
885 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_DSCR
, &env
->tm_dscr
);
886 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_TAR
, &env
->tm_tar
);
890 if (kvm_put_vpa(cs
) < 0) {
891 DPRINTF("Warning: Unable to set VPA information to KVM\n");
895 kvm_set_one_reg(cs
, KVM_REG_PPC_TB_OFFSET
, &env
->tb_env
->tb_offset
);
896 #endif /* TARGET_PPC64 */
902 int kvm_arch_get_registers(CPUState
*cs
)
904 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
905 CPUPPCState
*env
= &cpu
->env
;
906 struct kvm_regs regs
;
907 struct kvm_sregs sregs
;
911 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_REGS
, ®s
);
916 for (i
= 7; i
>= 0; i
--) {
917 env
->crf
[i
] = cr
& 15;
923 cpu_write_xer(env
, regs
.xer
);
927 env
->spr
[SPR_SRR0
] = regs
.srr0
;
928 env
->spr
[SPR_SRR1
] = regs
.srr1
;
930 env
->spr
[SPR_SPRG0
] = regs
.sprg0
;
931 env
->spr
[SPR_SPRG1
] = regs
.sprg1
;
932 env
->spr
[SPR_SPRG2
] = regs
.sprg2
;
933 env
->spr
[SPR_SPRG3
] = regs
.sprg3
;
934 env
->spr
[SPR_SPRG4
] = regs
.sprg4
;
935 env
->spr
[SPR_SPRG5
] = regs
.sprg5
;
936 env
->spr
[SPR_SPRG6
] = regs
.sprg6
;
937 env
->spr
[SPR_SPRG7
] = regs
.sprg7
;
939 env
->spr
[SPR_BOOKE_PID
] = regs
.pid
;
941 for (i
= 0;i
< 32; i
++)
942 env
->gpr
[i
] = regs
.gpr
[i
];
946 if (cap_booke_sregs
) {
947 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_SREGS
, &sregs
);
952 if (sregs
.u
.e
.features
& KVM_SREGS_E_BASE
) {
953 env
->spr
[SPR_BOOKE_CSRR0
] = sregs
.u
.e
.csrr0
;
954 env
->spr
[SPR_BOOKE_CSRR1
] = sregs
.u
.e
.csrr1
;
955 env
->spr
[SPR_BOOKE_ESR
] = sregs
.u
.e
.esr
;
956 env
->spr
[SPR_BOOKE_DEAR
] = sregs
.u
.e
.dear
;
957 env
->spr
[SPR_BOOKE_MCSR
] = sregs
.u
.e
.mcsr
;
958 env
->spr
[SPR_BOOKE_TSR
] = sregs
.u
.e
.tsr
;
959 env
->spr
[SPR_BOOKE_TCR
] = sregs
.u
.e
.tcr
;
960 env
->spr
[SPR_DECR
] = sregs
.u
.e
.dec
;
961 env
->spr
[SPR_TBL
] = sregs
.u
.e
.tb
& 0xffffffff;
962 env
->spr
[SPR_TBU
] = sregs
.u
.e
.tb
>> 32;
963 env
->spr
[SPR_VRSAVE
] = sregs
.u
.e
.vrsave
;
966 if (sregs
.u
.e
.features
& KVM_SREGS_E_ARCH206
) {
967 env
->spr
[SPR_BOOKE_PIR
] = sregs
.u
.e
.pir
;
968 env
->spr
[SPR_BOOKE_MCSRR0
] = sregs
.u
.e
.mcsrr0
;
969 env
->spr
[SPR_BOOKE_MCSRR1
] = sregs
.u
.e
.mcsrr1
;
970 env
->spr
[SPR_BOOKE_DECAR
] = sregs
.u
.e
.decar
;
971 env
->spr
[SPR_BOOKE_IVPR
] = sregs
.u
.e
.ivpr
;
974 if (sregs
.u
.e
.features
& KVM_SREGS_E_64
) {
975 env
->spr
[SPR_BOOKE_EPCR
] = sregs
.u
.e
.epcr
;
978 if (sregs
.u
.e
.features
& KVM_SREGS_E_SPRG8
) {
979 env
->spr
[SPR_BOOKE_SPRG8
] = sregs
.u
.e
.sprg8
;
982 if (sregs
.u
.e
.features
& KVM_SREGS_E_IVOR
) {
983 env
->spr
[SPR_BOOKE_IVOR0
] = sregs
.u
.e
.ivor_low
[0];
984 env
->spr
[SPR_BOOKE_IVOR1
] = sregs
.u
.e
.ivor_low
[1];
985 env
->spr
[SPR_BOOKE_IVOR2
] = sregs
.u
.e
.ivor_low
[2];
986 env
->spr
[SPR_BOOKE_IVOR3
] = sregs
.u
.e
.ivor_low
[3];
987 env
->spr
[SPR_BOOKE_IVOR4
] = sregs
.u
.e
.ivor_low
[4];
988 env
->spr
[SPR_BOOKE_IVOR5
] = sregs
.u
.e
.ivor_low
[5];
989 env
->spr
[SPR_BOOKE_IVOR6
] = sregs
.u
.e
.ivor_low
[6];
990 env
->spr
[SPR_BOOKE_IVOR7
] = sregs
.u
.e
.ivor_low
[7];
991 env
->spr
[SPR_BOOKE_IVOR8
] = sregs
.u
.e
.ivor_low
[8];
992 env
->spr
[SPR_BOOKE_IVOR9
] = sregs
.u
.e
.ivor_low
[9];
993 env
->spr
[SPR_BOOKE_IVOR10
] = sregs
.u
.e
.ivor_low
[10];
994 env
->spr
[SPR_BOOKE_IVOR11
] = sregs
.u
.e
.ivor_low
[11];
995 env
->spr
[SPR_BOOKE_IVOR12
] = sregs
.u
.e
.ivor_low
[12];
996 env
->spr
[SPR_BOOKE_IVOR13
] = sregs
.u
.e
.ivor_low
[13];
997 env
->spr
[SPR_BOOKE_IVOR14
] = sregs
.u
.e
.ivor_low
[14];
998 env
->spr
[SPR_BOOKE_IVOR15
] = sregs
.u
.e
.ivor_low
[15];
1000 if (sregs
.u
.e
.features
& KVM_SREGS_E_SPE
) {
1001 env
->spr
[SPR_BOOKE_IVOR32
] = sregs
.u
.e
.ivor_high
[0];
1002 env
->spr
[SPR_BOOKE_IVOR33
] = sregs
.u
.e
.ivor_high
[1];
1003 env
->spr
[SPR_BOOKE_IVOR34
] = sregs
.u
.e
.ivor_high
[2];
1006 if (sregs
.u
.e
.features
& KVM_SREGS_E_PM
) {
1007 env
->spr
[SPR_BOOKE_IVOR35
] = sregs
.u
.e
.ivor_high
[3];
1010 if (sregs
.u
.e
.features
& KVM_SREGS_E_PC
) {
1011 env
->spr
[SPR_BOOKE_IVOR36
] = sregs
.u
.e
.ivor_high
[4];
1012 env
->spr
[SPR_BOOKE_IVOR37
] = sregs
.u
.e
.ivor_high
[5];
1016 if (sregs
.u
.e
.features
& KVM_SREGS_E_ARCH206_MMU
) {
1017 env
->spr
[SPR_BOOKE_MAS0
] = sregs
.u
.e
.mas0
;
1018 env
->spr
[SPR_BOOKE_MAS1
] = sregs
.u
.e
.mas1
;
1019 env
->spr
[SPR_BOOKE_MAS2
] = sregs
.u
.e
.mas2
;
1020 env
->spr
[SPR_BOOKE_MAS3
] = sregs
.u
.e
.mas7_3
& 0xffffffff;
1021 env
->spr
[SPR_BOOKE_MAS4
] = sregs
.u
.e
.mas4
;
1022 env
->spr
[SPR_BOOKE_MAS6
] = sregs
.u
.e
.mas6
;
1023 env
->spr
[SPR_BOOKE_MAS7
] = sregs
.u
.e
.mas7_3
>> 32;
1024 env
->spr
[SPR_MMUCFG
] = sregs
.u
.e
.mmucfg
;
1025 env
->spr
[SPR_BOOKE_TLB0CFG
] = sregs
.u
.e
.tlbcfg
[0];
1026 env
->spr
[SPR_BOOKE_TLB1CFG
] = sregs
.u
.e
.tlbcfg
[1];
1029 if (sregs
.u
.e
.features
& KVM_SREGS_EXP
) {
1030 env
->spr
[SPR_BOOKE_EPR
] = sregs
.u
.e
.epr
;
1033 if (sregs
.u
.e
.features
& KVM_SREGS_E_PD
) {
1034 env
->spr
[SPR_BOOKE_EPLC
] = sregs
.u
.e
.eplc
;
1035 env
->spr
[SPR_BOOKE_EPSC
] = sregs
.u
.e
.epsc
;
1038 if (sregs
.u
.e
.impl_id
== KVM_SREGS_E_IMPL_FSL
) {
1039 env
->spr
[SPR_E500_SVR
] = sregs
.u
.e
.impl
.fsl
.svr
;
1040 env
->spr
[SPR_Exxx_MCAR
] = sregs
.u
.e
.impl
.fsl
.mcar
;
1041 env
->spr
[SPR_HID0
] = sregs
.u
.e
.impl
.fsl
.hid0
;
1043 if (sregs
.u
.e
.impl
.fsl
.features
& KVM_SREGS_E_FSL_PIDn
) {
1044 env
->spr
[SPR_BOOKE_PID1
] = sregs
.u
.e
.impl
.fsl
.pid1
;
1045 env
->spr
[SPR_BOOKE_PID2
] = sregs
.u
.e
.impl
.fsl
.pid2
;
1051 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_SREGS
, &sregs
);
1056 if (!env
->external_htab
) {
1057 ppc_store_sdr1(env
, sregs
.u
.s
.sdr1
);
1063 * The packed SLB array we get from KVM_GET_SREGS only contains
1064 * information about valid entries. So we flush our internal
1065 * copy to get rid of stale ones, then put all valid SLB entries
1068 memset(env
->slb
, 0, sizeof(env
->slb
));
1069 for (i
= 0; i
< ARRAY_SIZE(env
->slb
); i
++) {
1070 target_ulong rb
= sregs
.u
.s
.ppc64
.slb
[i
].slbe
;
1071 target_ulong rs
= sregs
.u
.s
.ppc64
.slb
[i
].slbv
;
1073 * Only restore valid entries
1075 if (rb
& SLB_ESID_V
) {
1076 ppc_store_slb(env
, rb
, rs
);
1082 for (i
= 0; i
< 16; i
++) {
1083 env
->sr
[i
] = sregs
.u
.s
.ppc32
.sr
[i
];
1087 for (i
= 0; i
< 8; i
++) {
1088 env
->DBAT
[0][i
] = sregs
.u
.s
.ppc32
.dbat
[i
] & 0xffffffff;
1089 env
->DBAT
[1][i
] = sregs
.u
.s
.ppc32
.dbat
[i
] >> 32;
1090 env
->IBAT
[0][i
] = sregs
.u
.s
.ppc32
.ibat
[i
] & 0xffffffff;
1091 env
->IBAT
[1][i
] = sregs
.u
.s
.ppc32
.ibat
[i
] >> 32;
1096 kvm_get_one_spr(cs
, KVM_REG_PPC_HIOR
, SPR_HIOR
);
1102 /* We deliberately ignore errors here, for kernels which have
1103 * the ONE_REG calls, but don't support the specific
1104 * registers, there's a reasonable chance things will still
1105 * work, at least until we try to migrate. */
1106 for (i
= 0; i
< 1024; i
++) {
1107 uint64_t id
= env
->spr_cb
[i
].one_reg_id
;
1110 kvm_get_one_spr(cs
, id
, i
);
1116 for (i
= 0; i
< ARRAY_SIZE(env
->tm_gpr
); i
++) {
1117 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_GPR(i
), &env
->tm_gpr
[i
]);
1119 for (i
= 0; i
< ARRAY_SIZE(env
->tm_vsr
); i
++) {
1120 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_VSR(i
), &env
->tm_vsr
[i
]);
1122 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_CR
, &env
->tm_cr
);
1123 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_LR
, &env
->tm_lr
);
1124 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_CTR
, &env
->tm_ctr
);
1125 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_FPSCR
, &env
->tm_fpscr
);
1126 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_AMR
, &env
->tm_amr
);
1127 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_PPR
, &env
->tm_ppr
);
1128 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_VRSAVE
, &env
->tm_vrsave
);
1129 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_VSCR
, &env
->tm_vscr
);
1130 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_DSCR
, &env
->tm_dscr
);
1131 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_TAR
, &env
->tm_tar
);
1135 if (kvm_get_vpa(cs
) < 0) {
1136 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1140 kvm_get_one_reg(cs
, KVM_REG_PPC_TB_OFFSET
, &env
->tb_env
->tb_offset
);
1147 int kvmppc_set_interrupt(PowerPCCPU
*cpu
, int irq
, int level
)
1149 unsigned virq
= level
? KVM_INTERRUPT_SET_LEVEL
: KVM_INTERRUPT_UNSET
;
1151 if (irq
!= PPC_INTERRUPT_EXT
) {
1155 if (!kvm_enabled() || !cap_interrupt_unset
|| !cap_interrupt_level
) {
1159 kvm_vcpu_ioctl(CPU(cpu
), KVM_INTERRUPT
, &virq
);
1164 #if defined(TARGET_PPCEMB)
1165 #define PPC_INPUT_INT PPC40x_INPUT_INT
1166 #elif defined(TARGET_PPC64)
1167 #define PPC_INPUT_INT PPC970_INPUT_INT
1169 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1172 void kvm_arch_pre_run(CPUState
*cs
, struct kvm_run
*run
)
1174 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
1175 CPUPPCState
*env
= &cpu
->env
;
1179 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1180 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1181 if (!cap_interrupt_level
&&
1182 run
->ready_for_interrupt_injection
&&
1183 (cs
->interrupt_request
& CPU_INTERRUPT_HARD
) &&
1184 (env
->irq_input_state
& (1<<PPC_INPUT_INT
)))
1186 /* For now KVM disregards the 'irq' argument. However, in the
1187 * future KVM could cache it in-kernel to avoid a heavyweight exit
1188 * when reading the UIC.
1190 irq
= KVM_INTERRUPT_SET
;
1192 DPRINTF("injected interrupt %d\n", irq
);
1193 r
= kvm_vcpu_ioctl(cs
, KVM_INTERRUPT
, &irq
);
1195 printf("cpu %d fail inject %x\n", cs
->cpu_index
, irq
);
1198 /* Always wake up soon in case the interrupt was level based */
1199 timer_mod(idle_timer
, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) +
1200 (get_ticks_per_sec() / 50));
1203 /* We don't know if there are more interrupts pending after this. However,
1204 * the guest will return to userspace in the course of handling this one
1205 * anyways, so we will get a chance to deliver the rest. */
1208 void kvm_arch_post_run(CPUState
*cpu
, struct kvm_run
*run
)
1212 int kvm_arch_process_async_events(CPUState
*cs
)
1217 static int kvmppc_handle_halt(PowerPCCPU
*cpu
)
1219 CPUState
*cs
= CPU(cpu
);
1220 CPUPPCState
*env
= &cpu
->env
;
1222 if (!(cs
->interrupt_request
& CPU_INTERRUPT_HARD
) && (msr_ee
)) {
1224 cs
->exception_index
= EXCP_HLT
;
1230 /* map dcr access to existing qemu dcr emulation */
1231 static int kvmppc_handle_dcr_read(CPUPPCState
*env
, uint32_t dcrn
, uint32_t *data
)
1233 if (ppc_dcr_read(env
->dcr_env
, dcrn
, data
) < 0)
1234 fprintf(stderr
, "Read to unhandled DCR (0x%x)\n", dcrn
);
1239 static int kvmppc_handle_dcr_write(CPUPPCState
*env
, uint32_t dcrn
, uint32_t data
)
1241 if (ppc_dcr_write(env
->dcr_env
, dcrn
, data
) < 0)
1242 fprintf(stderr
, "Write to unhandled DCR (0x%x)\n", dcrn
);
1247 int kvm_arch_handle_exit(CPUState
*cs
, struct kvm_run
*run
)
1249 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
1250 CPUPPCState
*env
= &cpu
->env
;
1253 switch (run
->exit_reason
) {
1255 if (run
->dcr
.is_write
) {
1256 DPRINTF("handle dcr write\n");
1257 ret
= kvmppc_handle_dcr_write(env
, run
->dcr
.dcrn
, run
->dcr
.data
);
1259 DPRINTF("handle dcr read\n");
1260 ret
= kvmppc_handle_dcr_read(env
, run
->dcr
.dcrn
, &run
->dcr
.data
);
1264 DPRINTF("handle halt\n");
1265 ret
= kvmppc_handle_halt(cpu
);
1267 #if defined(TARGET_PPC64)
1268 case KVM_EXIT_PAPR_HCALL
:
1269 DPRINTF("handle PAPR hypercall\n");
1270 run
->papr_hcall
.ret
= spapr_hypercall(cpu
,
1272 run
->papr_hcall
.args
);
1277 DPRINTF("handle epr\n");
1278 run
->epr
.epr
= ldl_phys(cs
->as
, env
->mpic_iack
);
1281 case KVM_EXIT_WATCHDOG
:
1282 DPRINTF("handle watchdog expiry\n");
1283 watchdog_perform_action();
1288 fprintf(stderr
, "KVM: unknown exit reason %d\n", run
->exit_reason
);
1296 int kvmppc_or_tsr_bits(PowerPCCPU
*cpu
, uint32_t tsr_bits
)
1298 CPUState
*cs
= CPU(cpu
);
1299 uint32_t bits
= tsr_bits
;
1300 struct kvm_one_reg reg
= {
1301 .id
= KVM_REG_PPC_OR_TSR
,
1302 .addr
= (uintptr_t) &bits
,
1305 return kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
1308 int kvmppc_clear_tsr_bits(PowerPCCPU
*cpu
, uint32_t tsr_bits
)
1311 CPUState
*cs
= CPU(cpu
);
1312 uint32_t bits
= tsr_bits
;
1313 struct kvm_one_reg reg
= {
1314 .id
= KVM_REG_PPC_CLEAR_TSR
,
1315 .addr
= (uintptr_t) &bits
,
1318 return kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
1321 int kvmppc_set_tcr(PowerPCCPU
*cpu
)
1323 CPUState
*cs
= CPU(cpu
);
1324 CPUPPCState
*env
= &cpu
->env
;
1325 uint32_t tcr
= env
->spr
[SPR_BOOKE_TCR
];
1327 struct kvm_one_reg reg
= {
1328 .id
= KVM_REG_PPC_TCR
,
1329 .addr
= (uintptr_t) &tcr
,
1332 return kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
1335 int kvmppc_booke_watchdog_enable(PowerPCCPU
*cpu
)
1337 CPUState
*cs
= CPU(cpu
);
1340 if (!kvm_enabled()) {
1344 if (!cap_ppc_watchdog
) {
1345 printf("warning: KVM does not support watchdog");
1349 ret
= kvm_vcpu_enable_cap(cs
, KVM_CAP_PPC_BOOKE_WATCHDOG
, 0);
1351 fprintf(stderr
, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1352 __func__
, strerror(-ret
));
1359 static int read_cpuinfo(const char *field
, char *value
, int len
)
1363 int field_len
= strlen(field
);
1366 f
= fopen("/proc/cpuinfo", "r");
1372 if(!fgets(line
, sizeof(line
), f
)) {
1375 if (!strncmp(line
, field
, field_len
)) {
1376 pstrcpy(value
, len
, line
);
1387 uint32_t kvmppc_get_tbfreq(void)
1391 uint32_t retval
= get_ticks_per_sec();
1393 if (read_cpuinfo("timebase", line
, sizeof(line
))) {
1397 if (!(ns
= strchr(line
, ':'))) {
1407 /* Try to find a device tree node for a CPU with clock-frequency property */
1408 static int kvmppc_find_cpu_dt(char *buf
, int buf_len
)
1410 struct dirent
*dirp
;
1413 if ((dp
= opendir(PROC_DEVTREE_CPU
)) == NULL
) {
1414 printf("Can't open directory " PROC_DEVTREE_CPU
"\n");
1419 while ((dirp
= readdir(dp
)) != NULL
) {
1421 snprintf(buf
, buf_len
, "%s%s/clock-frequency", PROC_DEVTREE_CPU
,
1423 f
= fopen(buf
, "r");
1425 snprintf(buf
, buf_len
, "%s%s", PROC_DEVTREE_CPU
, dirp
->d_name
);
1432 if (buf
[0] == '\0') {
1433 printf("Unknown host!\n");
1440 /* Read a CPU node property from the host device tree that's a single
1441 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1442 * (can't find or open the property, or doesn't understand the
1444 static uint64_t kvmppc_read_int_cpu_dt(const char *propname
)
1454 if (kvmppc_find_cpu_dt(buf
, sizeof(buf
))) {
1458 strncat(buf
, "/", sizeof(buf
) - strlen(buf
));
1459 strncat(buf
, propname
, sizeof(buf
) - strlen(buf
));
1461 f
= fopen(buf
, "rb");
1466 len
= fread(&u
, 1, sizeof(u
), f
);
1470 /* property is a 32-bit quantity */
1471 return be32_to_cpu(u
.v32
);
1473 return be64_to_cpu(u
.v64
);
1479 uint64_t kvmppc_get_clockfreq(void)
1481 return kvmppc_read_int_cpu_dt("clock-frequency");
1484 uint32_t kvmppc_get_vmx(void)
1486 return kvmppc_read_int_cpu_dt("ibm,vmx");
1489 uint32_t kvmppc_get_dfp(void)
1491 return kvmppc_read_int_cpu_dt("ibm,dfp");
1494 static int kvmppc_get_pvinfo(CPUPPCState
*env
, struct kvm_ppc_pvinfo
*pvinfo
)
1496 PowerPCCPU
*cpu
= ppc_env_get_cpu(env
);
1497 CPUState
*cs
= CPU(cpu
);
1499 if (kvm_check_extension(cs
->kvm_state
, KVM_CAP_PPC_GET_PVINFO
) &&
1500 !kvm_vm_ioctl(cs
->kvm_state
, KVM_PPC_GET_PVINFO
, pvinfo
)) {
1507 int kvmppc_get_hasidle(CPUPPCState
*env
)
1509 struct kvm_ppc_pvinfo pvinfo
;
1511 if (!kvmppc_get_pvinfo(env
, &pvinfo
) &&
1512 (pvinfo
.flags
& KVM_PPC_PVINFO_FLAGS_EV_IDLE
)) {
1519 int kvmppc_get_hypercall(CPUPPCState
*env
, uint8_t *buf
, int buf_len
)
1521 uint32_t *hc
= (uint32_t*)buf
;
1522 struct kvm_ppc_pvinfo pvinfo
;
1524 if (!kvmppc_get_pvinfo(env
, &pvinfo
)) {
1525 memcpy(buf
, pvinfo
.hcall
, buf_len
);
1530 * Fallback to always fail hypercalls regardless of endianness:
1532 * tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
1534 * b .+8 (becomes nop in wrong endian)
1535 * bswap32(li r3, -1)
1538 hc
[0] = cpu_to_be32(0x08000048);
1539 hc
[1] = cpu_to_be32(0x3860ffff);
1540 hc
[2] = cpu_to_be32(0x48000008);
1541 hc
[3] = cpu_to_be32(bswap32(0x3860ffff));
1546 void kvmppc_set_papr(PowerPCCPU
*cpu
)
1548 CPUState
*cs
= CPU(cpu
);
1551 ret
= kvm_vcpu_enable_cap(cs
, KVM_CAP_PPC_PAPR
, 0);
1553 cpu_abort(cs
, "This KVM version does not support PAPR\n");
1556 /* Update the capability flag so we sync the right information
1561 int kvmppc_set_compat(PowerPCCPU
*cpu
, uint32_t cpu_version
)
1563 return kvm_set_one_reg(CPU(cpu
), KVM_REG_PPC_ARCH_COMPAT
, &cpu_version
);
1566 void kvmppc_set_mpic_proxy(PowerPCCPU
*cpu
, int mpic_proxy
)
1568 CPUState
*cs
= CPU(cpu
);
1571 ret
= kvm_vcpu_enable_cap(cs
, KVM_CAP_PPC_EPR
, 0, mpic_proxy
);
1572 if (ret
&& mpic_proxy
) {
1573 cpu_abort(cs
, "This KVM version does not support EPR\n");
1577 int kvmppc_smt_threads(void)
1579 return cap_ppc_smt
? cap_ppc_smt
: 1;
1583 off_t
kvmppc_alloc_rma(const char *name
, MemoryRegion
*sysmem
)
1588 struct kvm_allocate_rma ret
;
1589 MemoryRegion
*rma_region
;
1591 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
1592 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
1593 * not necessary on this hardware
1594 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
1596 * FIXME: We should allow the user to force contiguous RMA
1597 * allocation in the cap_ppc_rma==1 case.
1599 if (cap_ppc_rma
< 2) {
1603 fd
= kvm_vm_ioctl(kvm_state
, KVM_ALLOCATE_RMA
, &ret
);
1605 fprintf(stderr
, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
1610 size
= MIN(ret
.rma_size
, 256ul << 20);
1612 rma
= mmap(NULL
, size
, PROT_READ
|PROT_WRITE
, MAP_SHARED
, fd
, 0);
1613 if (rma
== MAP_FAILED
) {
1614 fprintf(stderr
, "KVM: Error mapping RMA: %s\n", strerror(errno
));
1618 rma_region
= g_new(MemoryRegion
, 1);
1619 memory_region_init_ram_ptr(rma_region
, NULL
, name
, size
, rma
);
1620 vmstate_register_ram_global(rma_region
);
1621 memory_region_add_subregion(sysmem
, 0, rma_region
);
1626 uint64_t kvmppc_rma_size(uint64_t current_size
, unsigned int hash_shift
)
1628 struct kvm_ppc_smmu_info info
;
1629 long rampagesize
, best_page_shift
;
1632 if (cap_ppc_rma
>= 2) {
1633 return current_size
;
1636 /* Find the largest hardware supported page size that's less than
1637 * or equal to the (logical) backing page size of guest RAM */
1638 kvm_get_smmu_info(POWERPC_CPU(first_cpu
), &info
);
1639 rampagesize
= getrampagesize();
1640 best_page_shift
= 0;
1642 for (i
= 0; i
< KVM_PPC_PAGE_SIZES_MAX_SZ
; i
++) {
1643 struct kvm_ppc_one_seg_page_size
*sps
= &info
.sps
[i
];
1645 if (!sps
->page_shift
) {
1649 if ((sps
->page_shift
> best_page_shift
)
1650 && ((1UL << sps
->page_shift
) <= rampagesize
)) {
1651 best_page_shift
= sps
->page_shift
;
1655 return MIN(current_size
,
1656 1ULL << (best_page_shift
+ hash_shift
- 7));
1660 bool kvmppc_spapr_use_multitce(void)
1662 return cap_spapr_multitce
;
1665 void *kvmppc_create_spapr_tce(uint32_t liobn
, uint32_t window_size
, int *pfd
,
1668 struct kvm_create_spapr_tce args
= {
1670 .window_size
= window_size
,
1676 /* Must set fd to -1 so we don't try to munmap when called for
1677 * destroying the table, which the upper layers -will- do
1680 if (!cap_spapr_tce
|| (vfio_accel
&& !cap_spapr_vfio
)) {
1684 fd
= kvm_vm_ioctl(kvm_state
, KVM_CREATE_SPAPR_TCE
, &args
);
1686 fprintf(stderr
, "KVM: Failed to create TCE table for liobn 0x%x\n",
1691 len
= (window_size
/ SPAPR_TCE_PAGE_SIZE
) * sizeof(uint64_t);
1692 /* FIXME: round this up to page size */
1694 table
= mmap(NULL
, len
, PROT_READ
|PROT_WRITE
, MAP_SHARED
, fd
, 0);
1695 if (table
== MAP_FAILED
) {
1696 fprintf(stderr
, "KVM: Failed to map TCE table for liobn 0x%x\n",
1706 int kvmppc_remove_spapr_tce(void *table
, int fd
, uint32_t nb_table
)
1714 len
= nb_table
* sizeof(uint64_t);
1715 if ((munmap(table
, len
) < 0) ||
1717 fprintf(stderr
, "KVM: Unexpected error removing TCE table: %s",
1719 /* Leak the table */
1725 int kvmppc_reset_htab(int shift_hint
)
1727 uint32_t shift
= shift_hint
;
1729 if (!kvm_enabled()) {
1730 /* Full emulation, tell caller to allocate htab itself */
1733 if (kvm_check_extension(kvm_state
, KVM_CAP_PPC_ALLOC_HTAB
)) {
1735 ret
= kvm_vm_ioctl(kvm_state
, KVM_PPC_ALLOCATE_HTAB
, &shift
);
1736 if (ret
== -ENOTTY
) {
1737 /* At least some versions of PR KVM advertise the
1738 * capability, but don't implement the ioctl(). Oops.
1739 * Return 0 so that we allocate the htab in qemu, as is
1740 * correct for PR. */
1742 } else if (ret
< 0) {
1748 /* We have a kernel that predates the htab reset calls. For PR
1749 * KVM, we need to allocate the htab ourselves, for an HV KVM of
1750 * this era, it has allocated a 16MB fixed size hash table
1751 * already. Kernels of this era have the GET_PVINFO capability
1752 * only on PR, so we use this hack to determine the right
1754 if (kvm_check_extension(kvm_state
, KVM_CAP_PPC_GET_PVINFO
)) {
1755 /* PR - tell caller to allocate htab */
1758 /* HV - assume 16MB kernel allocated htab */
1763 static inline uint32_t mfpvr(void)
1772 static void alter_insns(uint64_t *word
, uint64_t flags
, bool on
)
1781 static void kvmppc_host_cpu_initfn(Object
*obj
)
1783 assert(kvm_enabled());
1786 static void kvmppc_host_cpu_class_init(ObjectClass
*oc
, void *data
)
1788 PowerPCCPUClass
*pcc
= POWERPC_CPU_CLASS(oc
);
1789 uint32_t vmx
= kvmppc_get_vmx();
1790 uint32_t dfp
= kvmppc_get_dfp();
1791 uint32_t dcache_size
= kvmppc_read_int_cpu_dt("d-cache-size");
1792 uint32_t icache_size
= kvmppc_read_int_cpu_dt("i-cache-size");
1794 /* Now fix up the class with information we can query from the host */
1798 /* Only override when we know what the host supports */
1799 alter_insns(&pcc
->insns_flags
, PPC_ALTIVEC
, vmx
> 0);
1800 alter_insns(&pcc
->insns_flags2
, PPC2_VSX
, vmx
> 1);
1803 /* Only override when we know what the host supports */
1804 alter_insns(&pcc
->insns_flags2
, PPC2_DFP
, dfp
);
1807 if (dcache_size
!= -1) {
1808 pcc
->l1_dcache_size
= dcache_size
;
1811 if (icache_size
!= -1) {
1812 pcc
->l1_icache_size
= icache_size
;
1816 bool kvmppc_has_cap_epr(void)
1821 bool kvmppc_has_cap_htab_fd(void)
1826 bool kvmppc_has_cap_fixup_hcalls(void)
1828 return cap_fixup_hcalls
;
1831 static PowerPCCPUClass
*ppc_cpu_get_family_class(PowerPCCPUClass
*pcc
)
1833 ObjectClass
*oc
= OBJECT_CLASS(pcc
);
1835 while (oc
&& !object_class_is_abstract(oc
)) {
1836 oc
= object_class_get_parent(oc
);
1840 return POWERPC_CPU_CLASS(oc
);
1843 static int kvm_ppc_register_host_cpu_type(void)
1845 TypeInfo type_info
= {
1846 .name
= TYPE_HOST_POWERPC_CPU
,
1847 .instance_init
= kvmppc_host_cpu_initfn
,
1848 .class_init
= kvmppc_host_cpu_class_init
,
1850 uint32_t host_pvr
= mfpvr();
1851 PowerPCCPUClass
*pvr_pcc
;
1854 pvr_pcc
= ppc_cpu_class_by_pvr(host_pvr
);
1855 if (pvr_pcc
== NULL
) {
1856 pvr_pcc
= ppc_cpu_class_by_pvr_mask(host_pvr
);
1858 if (pvr_pcc
== NULL
) {
1861 type_info
.parent
= object_class_get_name(OBJECT_CLASS(pvr_pcc
));
1862 type_register(&type_info
);
1864 /* Register generic family CPU class for a family */
1865 pvr_pcc
= ppc_cpu_get_family_class(pvr_pcc
);
1866 dc
= DEVICE_CLASS(pvr_pcc
);
1867 type_info
.parent
= object_class_get_name(OBJECT_CLASS(pvr_pcc
));
1868 type_info
.name
= g_strdup_printf("%s-"TYPE_POWERPC_CPU
, dc
->desc
);
1869 type_register(&type_info
);
1874 int kvmppc_define_rtas_kernel_token(uint32_t token
, const char *function
)
1876 struct kvm_rtas_token_args args
= {
1880 if (!kvm_check_extension(kvm_state
, KVM_CAP_PPC_RTAS
)) {
1884 strncpy(args
.name
, function
, sizeof(args
.name
));
1886 return kvm_vm_ioctl(kvm_state
, KVM_PPC_RTAS_DEFINE_TOKEN
, &args
);
1889 int kvmppc_get_htab_fd(bool write
)
1891 struct kvm_get_htab_fd s
= {
1892 .flags
= write
? KVM_GET_HTAB_WRITE
: 0,
1897 fprintf(stderr
, "KVM version doesn't support saving the hash table\n");
1901 return kvm_vm_ioctl(kvm_state
, KVM_PPC_GET_HTAB_FD
, &s
);
1904 int kvmppc_save_htab(QEMUFile
*f
, int fd
, size_t bufsize
, int64_t max_ns
)
1906 int64_t starttime
= qemu_clock_get_ns(QEMU_CLOCK_REALTIME
);
1907 uint8_t buf
[bufsize
];
1911 rc
= read(fd
, buf
, bufsize
);
1913 fprintf(stderr
, "Error reading data from KVM HTAB fd: %s\n",
1917 /* Kernel already retuns data in BE format for the file */
1918 qemu_put_buffer(f
, buf
, rc
);
1922 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME
) - starttime
) < max_ns
)));
1924 return (rc
== 0) ? 1 : 0;
1927 int kvmppc_load_htab_chunk(QEMUFile
*f
, int fd
, uint32_t index
,
1928 uint16_t n_valid
, uint16_t n_invalid
)
1930 struct kvm_get_htab_header
*buf
;
1931 size_t chunksize
= sizeof(*buf
) + n_valid
*HASH_PTE_SIZE_64
;
1934 buf
= alloca(chunksize
);
1935 /* This is KVM on ppc, so this is all big-endian */
1937 buf
->n_valid
= n_valid
;
1938 buf
->n_invalid
= n_invalid
;
1940 qemu_get_buffer(f
, (void *)(buf
+ 1), HASH_PTE_SIZE_64
*n_valid
);
1942 rc
= write(fd
, buf
, chunksize
);
1944 fprintf(stderr
, "Error writing KVM hash table: %s\n",
1948 if (rc
!= chunksize
) {
1949 /* We should never get a short write on a single chunk */
1950 fprintf(stderr
, "Short write, restoring KVM hash table\n");
1956 bool kvm_arch_stop_on_emulation_error(CPUState
*cpu
)
1961 int kvm_arch_on_sigbus_vcpu(CPUState
*cpu
, int code
, void *addr
)
1966 int kvm_arch_on_sigbus(int code
, void *addr
)
1971 void kvm_arch_init_irq_routing(KVMState
*s
)
1975 int kvm_arch_insert_sw_breakpoint(CPUState
*cpu
, struct kvm_sw_breakpoint
*bp
)
1980 int kvm_arch_remove_sw_breakpoint(CPUState
*cpu
, struct kvm_sw_breakpoint
*bp
)
1985 int kvm_arch_insert_hw_breakpoint(target_ulong addr
, target_ulong len
, int type
)
1990 int kvm_arch_remove_hw_breakpoint(target_ulong addr
, target_ulong len
, int type
)
1995 void kvm_arch_remove_all_hw_breakpoints(void)
1999 void kvm_arch_update_guest_debug(CPUState
*cpu
, struct kvm_guest_debug
*dbg
)
2003 struct kvm_get_htab_buf
{
2004 struct kvm_get_htab_header header
;
2006 * We require one extra byte for read
2008 target_ulong hpte
[(HPTES_PER_GROUP
* 2) + 1];
2011 uint64_t kvmppc_hash64_read_pteg(PowerPCCPU
*cpu
, target_ulong pte_index
)
2014 struct kvm_get_htab_fd ghf
;
2015 struct kvm_get_htab_buf
*hpte_buf
;
2018 ghf
.start_index
= pte_index
;
2019 htab_fd
= kvm_vm_ioctl(kvm_state
, KVM_PPC_GET_HTAB_FD
, &ghf
);
2024 hpte_buf
= g_malloc0(sizeof(*hpte_buf
));
2026 * Read the hpte group
2028 if (read(htab_fd
, hpte_buf
, sizeof(*hpte_buf
)) < 0) {
2033 return (uint64_t)(uintptr_t) hpte_buf
->hpte
;
2042 void kvmppc_hash64_free_pteg(uint64_t token
)
2044 struct kvm_get_htab_buf
*htab_buf
;
2046 htab_buf
= container_of((void *)(uintptr_t) token
, struct kvm_get_htab_buf
,
2052 void kvmppc_hash64_write_pte(CPUPPCState
*env
, target_ulong pte_index
,
2053 target_ulong pte0
, target_ulong pte1
)
2056 struct kvm_get_htab_fd ghf
;
2057 struct kvm_get_htab_buf hpte_buf
;
2060 ghf
.start_index
= 0; /* Ignored */
2061 htab_fd
= kvm_vm_ioctl(kvm_state
, KVM_PPC_GET_HTAB_FD
, &ghf
);
2066 hpte_buf
.header
.n_valid
= 1;
2067 hpte_buf
.header
.n_invalid
= 0;
2068 hpte_buf
.header
.index
= pte_index
;
2069 hpte_buf
.hpte
[0] = pte0
;
2070 hpte_buf
.hpte
[1] = pte1
;
2072 * Write the hpte entry.
2073 * CAUTION: write() has the warn_unused_result attribute. Hence we
2074 * need to check the return value, even though we do nothing.
2076 if (write(htab_fd
, &hpte_buf
, sizeof(hpte_buf
)) < 0) {