2 * PowerPC implementation of KVM hooks
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
18 #include <sys/types.h>
19 #include <sys/ioctl.h>
23 #include <linux/kvm.h>
25 #include "qemu-common.h"
26 #include "qemu/timer.h"
27 #include "sysemu/sysemu.h"
28 #include "sysemu/kvm.h"
31 #include "sysemu/cpus.h"
32 #include "sysemu/device_tree.h"
33 #include "mmu-hash64.h"
35 #include "hw/sysbus.h"
36 #include "hw/ppc/spapr.h"
37 #include "hw/ppc/spapr_vio.h"
38 #include "sysemu/watchdog.h"
44 #define DPRINTF(fmt, ...) \
45 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
47 #define DPRINTF(fmt, ...) \
51 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
53 const KVMCapabilityInfo kvm_arch_required_capabilities
[] = {
57 static int cap_interrupt_unset
= false;
58 static int cap_interrupt_level
= false;
59 static int cap_segstate
;
60 static int cap_booke_sregs
;
61 static int cap_ppc_smt
;
62 static int cap_ppc_rma
;
63 static int cap_spapr_tce
;
65 static int cap_one_reg
;
67 static int cap_ppc_watchdog
;
69 static int cap_htab_fd
;
71 /* XXX We have a race condition where we actually have a level triggered
72 * interrupt, but the infrastructure can't expose that yet, so the guest
73 * takes but ignores it, goes to sleep and never gets notified that there's
74 * still an interrupt pending.
76 * As a quick workaround, let's just wake up again 20 ms after we injected
77 * an interrupt. That way we can assure that we're always reinjecting
78 * interrupts in case the guest swallowed them.
80 static QEMUTimer
*idle_timer
;
82 static void kvm_kick_cpu(void *opaque
)
84 PowerPCCPU
*cpu
= opaque
;
86 qemu_cpu_kick(CPU(cpu
));
89 static int kvm_ppc_register_host_cpu_type(void);
91 int kvm_arch_init(KVMState
*s
)
93 cap_interrupt_unset
= kvm_check_extension(s
, KVM_CAP_PPC_UNSET_IRQ
);
94 cap_interrupt_level
= kvm_check_extension(s
, KVM_CAP_PPC_IRQ_LEVEL
);
95 cap_segstate
= kvm_check_extension(s
, KVM_CAP_PPC_SEGSTATE
);
96 cap_booke_sregs
= kvm_check_extension(s
, KVM_CAP_PPC_BOOKE_SREGS
);
97 cap_ppc_smt
= kvm_check_extension(s
, KVM_CAP_PPC_SMT
);
98 cap_ppc_rma
= kvm_check_extension(s
, KVM_CAP_PPC_RMA
);
99 cap_spapr_tce
= kvm_check_extension(s
, KVM_CAP_SPAPR_TCE
);
100 cap_one_reg
= kvm_check_extension(s
, KVM_CAP_ONE_REG
);
101 cap_hior
= kvm_check_extension(s
, KVM_CAP_PPC_HIOR
);
102 cap_epr
= kvm_check_extension(s
, KVM_CAP_PPC_EPR
);
103 cap_ppc_watchdog
= kvm_check_extension(s
, KVM_CAP_PPC_BOOKE_WATCHDOG
);
104 /* Note: we don't set cap_papr here, because this capability is
105 * only activated after this by kvmppc_set_papr() */
106 cap_htab_fd
= kvm_check_extension(s
, KVM_CAP_PPC_HTAB_FD
);
108 if (!cap_interrupt_level
) {
109 fprintf(stderr
, "KVM: Couldn't find level irq capability. Expect the "
110 "VM to stall at times!\n");
113 kvm_ppc_register_host_cpu_type();
118 static int kvm_arch_sync_sregs(PowerPCCPU
*cpu
)
120 CPUPPCState
*cenv
= &cpu
->env
;
121 CPUState
*cs
= CPU(cpu
);
122 struct kvm_sregs sregs
;
125 if (cenv
->excp_model
== POWERPC_EXCP_BOOKE
) {
126 /* What we're really trying to say is "if we're on BookE, we use
127 the native PVR for now". This is the only sane way to check
128 it though, so we potentially confuse users that they can run
129 BookE guests on BookS. Let's hope nobody dares enough :) */
133 fprintf(stderr
, "kvm error: missing PVR setting capability\n");
138 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_SREGS
, &sregs
);
143 sregs
.pvr
= cenv
->spr
[SPR_PVR
];
144 return kvm_vcpu_ioctl(cs
, KVM_SET_SREGS
, &sregs
);
147 /* Set up a shared TLB array with KVM */
148 static int kvm_booke206_tlb_init(PowerPCCPU
*cpu
)
150 CPUPPCState
*env
= &cpu
->env
;
151 CPUState
*cs
= CPU(cpu
);
152 struct kvm_book3e_206_tlb_params params
= {};
153 struct kvm_config_tlb cfg
= {};
154 struct kvm_enable_cap encap
= {};
155 unsigned int entries
= 0;
158 if (!kvm_enabled() ||
159 !kvm_check_extension(cs
->kvm_state
, KVM_CAP_SW_TLB
)) {
163 assert(ARRAY_SIZE(params
.tlb_sizes
) == BOOKE206_MAX_TLBN
);
165 for (i
= 0; i
< BOOKE206_MAX_TLBN
; i
++) {
166 params
.tlb_sizes
[i
] = booke206_tlb_size(env
, i
);
167 params
.tlb_ways
[i
] = booke206_tlb_ways(env
, i
);
168 entries
+= params
.tlb_sizes
[i
];
171 assert(entries
== env
->nb_tlb
);
172 assert(sizeof(struct kvm_book3e_206_tlb_entry
) == sizeof(ppcmas_tlb_t
));
174 env
->tlb_dirty
= true;
176 cfg
.array
= (uintptr_t)env
->tlb
.tlbm
;
177 cfg
.array_len
= sizeof(ppcmas_tlb_t
) * entries
;
178 cfg
.params
= (uintptr_t)¶ms
;
179 cfg
.mmu_type
= KVM_MMU_FSL_BOOKE_NOHV
;
181 encap
.cap
= KVM_CAP_SW_TLB
;
182 encap
.args
[0] = (uintptr_t)&cfg
;
184 ret
= kvm_vcpu_ioctl(cs
, KVM_ENABLE_CAP
, &encap
);
186 fprintf(stderr
, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
187 __func__
, strerror(-ret
));
191 env
->kvm_sw_tlb
= true;
196 #if defined(TARGET_PPC64)
197 static void kvm_get_fallback_smmu_info(PowerPCCPU
*cpu
,
198 struct kvm_ppc_smmu_info
*info
)
200 CPUPPCState
*env
= &cpu
->env
;
201 CPUState
*cs
= CPU(cpu
);
203 memset(info
, 0, sizeof(*info
));
205 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
206 * need to "guess" what the supported page sizes are.
208 * For that to work we make a few assumptions:
210 * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
211 * KVM which only supports 4K and 16M pages, but supports them
212 * regardless of the backing store characteritics. We also don't
213 * support 1T segments.
215 * This is safe as if HV KVM ever supports that capability or PR
216 * KVM grows supports for more page/segment sizes, those versions
217 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
218 * will not hit this fallback
220 * - Else we are running HV KVM. This means we only support page
221 * sizes that fit in the backing store. Additionally we only
222 * advertize 64K pages if the processor is ARCH 2.06 and we assume
223 * P7 encodings for the SLB and hash table. Here too, we assume
224 * support for any newer processor will mean a kernel that
225 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
228 if (kvm_check_extension(cs
->kvm_state
, KVM_CAP_PPC_GET_PVINFO
)) {
233 /* Standard 4k base page size segment */
234 info
->sps
[0].page_shift
= 12;
235 info
->sps
[0].slb_enc
= 0;
236 info
->sps
[0].enc
[0].page_shift
= 12;
237 info
->sps
[0].enc
[0].pte_enc
= 0;
239 /* Standard 16M large page size segment */
240 info
->sps
[1].page_shift
= 24;
241 info
->sps
[1].slb_enc
= SLB_VSID_L
;
242 info
->sps
[1].enc
[0].page_shift
= 24;
243 info
->sps
[1].enc
[0].pte_enc
= 0;
247 /* HV KVM has backing store size restrictions */
248 info
->flags
= KVM_PPC_PAGE_SIZES_REAL
;
250 if (env
->mmu_model
& POWERPC_MMU_1TSEG
) {
251 info
->flags
|= KVM_PPC_1T_SEGMENTS
;
254 if (env
->mmu_model
== POWERPC_MMU_2_06
) {
260 /* Standard 4k base page size segment */
261 info
->sps
[i
].page_shift
= 12;
262 info
->sps
[i
].slb_enc
= 0;
263 info
->sps
[i
].enc
[0].page_shift
= 12;
264 info
->sps
[i
].enc
[0].pte_enc
= 0;
267 /* 64K on MMU 2.06 */
268 if (env
->mmu_model
== POWERPC_MMU_2_06
) {
269 info
->sps
[i
].page_shift
= 16;
270 info
->sps
[i
].slb_enc
= 0x110;
271 info
->sps
[i
].enc
[0].page_shift
= 16;
272 info
->sps
[i
].enc
[0].pte_enc
= 1;
276 /* Standard 16M large page size segment */
277 info
->sps
[i
].page_shift
= 24;
278 info
->sps
[i
].slb_enc
= SLB_VSID_L
;
279 info
->sps
[i
].enc
[0].page_shift
= 24;
280 info
->sps
[i
].enc
[0].pte_enc
= 0;
284 static void kvm_get_smmu_info(PowerPCCPU
*cpu
, struct kvm_ppc_smmu_info
*info
)
286 CPUState
*cs
= CPU(cpu
);
289 if (kvm_check_extension(cs
->kvm_state
, KVM_CAP_PPC_GET_SMMU_INFO
)) {
290 ret
= kvm_vm_ioctl(cs
->kvm_state
, KVM_PPC_GET_SMMU_INFO
, info
);
296 kvm_get_fallback_smmu_info(cpu
, info
);
299 static long getrampagesize(void)
305 /* guest RAM is backed by normal anonymous pages */
306 return getpagesize();
310 ret
= statfs(mem_path
, &fs
);
311 } while (ret
!= 0 && errno
== EINTR
);
314 fprintf(stderr
, "Couldn't statfs() memory path: %s\n",
319 #define HUGETLBFS_MAGIC 0x958458f6
321 if (fs
.f_type
!= HUGETLBFS_MAGIC
) {
322 /* Explicit mempath, but it's ordinary pages */
323 return getpagesize();
326 /* It's hugepage, return the huge page size */
330 static bool kvm_valid_page_size(uint32_t flags
, long rampgsize
, uint32_t shift
)
332 if (!(flags
& KVM_PPC_PAGE_SIZES_REAL
)) {
336 return (1ul << shift
) <= rampgsize
;
339 static void kvm_fixup_page_sizes(PowerPCCPU
*cpu
)
341 static struct kvm_ppc_smmu_info smmu_info
;
342 static bool has_smmu_info
;
343 CPUPPCState
*env
= &cpu
->env
;
347 /* We only handle page sizes for 64-bit server guests for now */
348 if (!(env
->mmu_model
& POWERPC_MMU_64
)) {
352 /* Collect MMU info from kernel if not already */
353 if (!has_smmu_info
) {
354 kvm_get_smmu_info(cpu
, &smmu_info
);
355 has_smmu_info
= true;
358 rampagesize
= getrampagesize();
360 /* Convert to QEMU form */
361 memset(&env
->sps
, 0, sizeof(env
->sps
));
363 for (ik
= iq
= 0; ik
< KVM_PPC_PAGE_SIZES_MAX_SZ
; ik
++) {
364 struct ppc_one_seg_page_size
*qsps
= &env
->sps
.sps
[iq
];
365 struct kvm_ppc_one_seg_page_size
*ksps
= &smmu_info
.sps
[ik
];
367 if (!kvm_valid_page_size(smmu_info
.flags
, rampagesize
,
371 qsps
->page_shift
= ksps
->page_shift
;
372 qsps
->slb_enc
= ksps
->slb_enc
;
373 for (jk
= jq
= 0; jk
< KVM_PPC_PAGE_SIZES_MAX_SZ
; jk
++) {
374 if (!kvm_valid_page_size(smmu_info
.flags
, rampagesize
,
375 ksps
->enc
[jk
].page_shift
)) {
378 qsps
->enc
[jq
].page_shift
= ksps
->enc
[jk
].page_shift
;
379 qsps
->enc
[jq
].pte_enc
= ksps
->enc
[jk
].pte_enc
;
380 if (++jq
>= PPC_PAGE_SIZES_MAX_SZ
) {
384 if (++iq
>= PPC_PAGE_SIZES_MAX_SZ
) {
388 env
->slb_nr
= smmu_info
.slb_size
;
389 if (smmu_info
.flags
& KVM_PPC_1T_SEGMENTS
) {
390 env
->mmu_model
|= POWERPC_MMU_1TSEG
;
392 env
->mmu_model
&= ~POWERPC_MMU_1TSEG
;
395 #else /* defined (TARGET_PPC64) */
397 static inline void kvm_fixup_page_sizes(PowerPCCPU
*cpu
)
401 #endif /* !defined (TARGET_PPC64) */
403 unsigned long kvm_arch_vcpu_id(CPUState
*cpu
)
405 return cpu
->cpu_index
;
408 int kvm_arch_init_vcpu(CPUState
*cs
)
410 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
411 CPUPPCState
*cenv
= &cpu
->env
;
414 /* Gather server mmu info from KVM and update the CPU state */
415 kvm_fixup_page_sizes(cpu
);
417 /* Synchronize sregs with kvm */
418 ret
= kvm_arch_sync_sregs(cpu
);
423 idle_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL
, kvm_kick_cpu
, cpu
);
425 /* Some targets support access to KVM's guest TLB. */
426 switch (cenv
->mmu_model
) {
427 case POWERPC_MMU_BOOKE206
:
428 ret
= kvm_booke206_tlb_init(cpu
);
437 void kvm_arch_reset_vcpu(CPUState
*cpu
)
441 static void kvm_sw_tlb_put(PowerPCCPU
*cpu
)
443 CPUPPCState
*env
= &cpu
->env
;
444 CPUState
*cs
= CPU(cpu
);
445 struct kvm_dirty_tlb dirty_tlb
;
446 unsigned char *bitmap
;
449 if (!env
->kvm_sw_tlb
) {
453 bitmap
= g_malloc((env
->nb_tlb
+ 7) / 8);
454 memset(bitmap
, 0xFF, (env
->nb_tlb
+ 7) / 8);
456 dirty_tlb
.bitmap
= (uintptr_t)bitmap
;
457 dirty_tlb
.num_dirty
= env
->nb_tlb
;
459 ret
= kvm_vcpu_ioctl(cs
, KVM_DIRTY_TLB
, &dirty_tlb
);
461 fprintf(stderr
, "%s: KVM_DIRTY_TLB: %s\n",
462 __func__
, strerror(-ret
));
468 static void kvm_get_one_spr(CPUState
*cs
, uint64_t id
, int spr
)
470 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
471 CPUPPCState
*env
= &cpu
->env
;
476 struct kvm_one_reg reg
= {
478 .addr
= (uintptr_t) &val
,
482 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
484 trace_kvm_failed_spr_get(spr
, strerror(errno
));
486 switch (id
& KVM_REG_SIZE_MASK
) {
487 case KVM_REG_SIZE_U32
:
488 env
->spr
[spr
] = val
.u32
;
491 case KVM_REG_SIZE_U64
:
492 env
->spr
[spr
] = val
.u64
;
496 /* Don't handle this size yet */
502 static void kvm_put_one_spr(CPUState
*cs
, uint64_t id
, int spr
)
504 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
505 CPUPPCState
*env
= &cpu
->env
;
510 struct kvm_one_reg reg
= {
512 .addr
= (uintptr_t) &val
,
516 switch (id
& KVM_REG_SIZE_MASK
) {
517 case KVM_REG_SIZE_U32
:
518 val
.u32
= env
->spr
[spr
];
521 case KVM_REG_SIZE_U64
:
522 val
.u64
= env
->spr
[spr
];
526 /* Don't handle this size yet */
530 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
532 trace_kvm_failed_spr_set(spr
, strerror(errno
));
536 static int kvm_put_fp(CPUState
*cs
)
538 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
539 CPUPPCState
*env
= &cpu
->env
;
540 struct kvm_one_reg reg
;
544 if (env
->insns_flags
& PPC_FLOAT
) {
545 uint64_t fpscr
= env
->fpscr
;
546 bool vsx
= !!(env
->insns_flags2
& PPC2_VSX
);
548 reg
.id
= KVM_REG_PPC_FPSCR
;
549 reg
.addr
= (uintptr_t)&fpscr
;
550 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
552 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno
));
556 for (i
= 0; i
< 32; i
++) {
559 vsr
[0] = float64_val(env
->fpr
[i
]);
560 vsr
[1] = env
->vsr
[i
];
561 reg
.addr
= (uintptr_t) &vsr
;
562 reg
.id
= vsx
? KVM_REG_PPC_VSR(i
) : KVM_REG_PPC_FPR(i
);
564 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
566 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx
? "VSR" : "FPR",
573 if (env
->insns_flags
& PPC_ALTIVEC
) {
574 reg
.id
= KVM_REG_PPC_VSCR
;
575 reg
.addr
= (uintptr_t)&env
->vscr
;
576 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
578 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno
));
582 for (i
= 0; i
< 32; i
++) {
583 reg
.id
= KVM_REG_PPC_VR(i
);
584 reg
.addr
= (uintptr_t)&env
->avr
[i
];
585 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
587 DPRINTF("Unable to set VR%d to KVM: %s\n", i
, strerror(errno
));
596 static int kvm_get_fp(CPUState
*cs
)
598 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
599 CPUPPCState
*env
= &cpu
->env
;
600 struct kvm_one_reg reg
;
604 if (env
->insns_flags
& PPC_FLOAT
) {
606 bool vsx
= !!(env
->insns_flags2
& PPC2_VSX
);
608 reg
.id
= KVM_REG_PPC_FPSCR
;
609 reg
.addr
= (uintptr_t)&fpscr
;
610 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
612 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno
));
618 for (i
= 0; i
< 32; i
++) {
621 reg
.addr
= (uintptr_t) &vsr
;
622 reg
.id
= vsx
? KVM_REG_PPC_VSR(i
) : KVM_REG_PPC_FPR(i
);
624 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
626 DPRINTF("Unable to get %s%d from KVM: %s\n",
627 vsx
? "VSR" : "FPR", i
, strerror(errno
));
630 env
->fpr
[i
] = vsr
[0];
632 env
->vsr
[i
] = vsr
[1];
638 if (env
->insns_flags
& PPC_ALTIVEC
) {
639 reg
.id
= KVM_REG_PPC_VSCR
;
640 reg
.addr
= (uintptr_t)&env
->vscr
;
641 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
643 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno
));
647 for (i
= 0; i
< 32; i
++) {
648 reg
.id
= KVM_REG_PPC_VR(i
);
649 reg
.addr
= (uintptr_t)&env
->avr
[i
];
650 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
652 DPRINTF("Unable to get VR%d from KVM: %s\n",
662 #if defined(TARGET_PPC64)
663 static int kvm_get_vpa(CPUState
*cs
)
665 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
666 CPUPPCState
*env
= &cpu
->env
;
667 struct kvm_one_reg reg
;
670 reg
.id
= KVM_REG_PPC_VPA_ADDR
;
671 reg
.addr
= (uintptr_t)&env
->vpa_addr
;
672 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
674 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno
));
678 assert((uintptr_t)&env
->slb_shadow_size
679 == ((uintptr_t)&env
->slb_shadow_addr
+ 8));
680 reg
.id
= KVM_REG_PPC_VPA_SLB
;
681 reg
.addr
= (uintptr_t)&env
->slb_shadow_addr
;
682 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
684 DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
689 assert((uintptr_t)&env
->dtl_size
== ((uintptr_t)&env
->dtl_addr
+ 8));
690 reg
.id
= KVM_REG_PPC_VPA_DTL
;
691 reg
.addr
= (uintptr_t)&env
->dtl_addr
;
692 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
694 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
702 static int kvm_put_vpa(CPUState
*cs
)
704 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
705 CPUPPCState
*env
= &cpu
->env
;
706 struct kvm_one_reg reg
;
709 /* SLB shadow or DTL can't be registered unless a master VPA is
710 * registered. That means when restoring state, if a VPA *is*
711 * registered, we need to set that up first. If not, we need to
712 * deregister the others before deregistering the master VPA */
713 assert(env
->vpa_addr
|| !(env
->slb_shadow_addr
|| env
->dtl_addr
));
716 reg
.id
= KVM_REG_PPC_VPA_ADDR
;
717 reg
.addr
= (uintptr_t)&env
->vpa_addr
;
718 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
720 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno
));
725 assert((uintptr_t)&env
->slb_shadow_size
726 == ((uintptr_t)&env
->slb_shadow_addr
+ 8));
727 reg
.id
= KVM_REG_PPC_VPA_SLB
;
728 reg
.addr
= (uintptr_t)&env
->slb_shadow_addr
;
729 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
731 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno
));
735 assert((uintptr_t)&env
->dtl_size
== ((uintptr_t)&env
->dtl_addr
+ 8));
736 reg
.id
= KVM_REG_PPC_VPA_DTL
;
737 reg
.addr
= (uintptr_t)&env
->dtl_addr
;
738 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
740 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
745 if (!env
->vpa_addr
) {
746 reg
.id
= KVM_REG_PPC_VPA_ADDR
;
747 reg
.addr
= (uintptr_t)&env
->vpa_addr
;
748 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
750 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno
));
757 #endif /* TARGET_PPC64 */
759 int kvm_arch_put_registers(CPUState
*cs
, int level
)
761 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
762 CPUPPCState
*env
= &cpu
->env
;
763 struct kvm_regs regs
;
767 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_REGS
, ®s
);
774 regs
.xer
= cpu_read_xer(env
);
778 regs
.srr0
= env
->spr
[SPR_SRR0
];
779 regs
.srr1
= env
->spr
[SPR_SRR1
];
781 regs
.sprg0
= env
->spr
[SPR_SPRG0
];
782 regs
.sprg1
= env
->spr
[SPR_SPRG1
];
783 regs
.sprg2
= env
->spr
[SPR_SPRG2
];
784 regs
.sprg3
= env
->spr
[SPR_SPRG3
];
785 regs
.sprg4
= env
->spr
[SPR_SPRG4
];
786 regs
.sprg5
= env
->spr
[SPR_SPRG5
];
787 regs
.sprg6
= env
->spr
[SPR_SPRG6
];
788 regs
.sprg7
= env
->spr
[SPR_SPRG7
];
790 regs
.pid
= env
->spr
[SPR_BOOKE_PID
];
792 for (i
= 0;i
< 32; i
++)
793 regs
.gpr
[i
] = env
->gpr
[i
];
796 for (i
= 0; i
< 8; i
++) {
797 regs
.cr
|= (env
->crf
[i
] & 15) << (4 * (7 - i
));
800 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_REGS
, ®s
);
806 if (env
->tlb_dirty
) {
808 env
->tlb_dirty
= false;
811 if (cap_segstate
&& (level
>= KVM_PUT_RESET_STATE
)) {
812 struct kvm_sregs sregs
;
814 sregs
.pvr
= env
->spr
[SPR_PVR
];
816 sregs
.u
.s
.sdr1
= env
->spr
[SPR_SDR1
];
820 for (i
= 0; i
< ARRAY_SIZE(env
->slb
); i
++) {
821 sregs
.u
.s
.ppc64
.slb
[i
].slbe
= env
->slb
[i
].esid
;
822 if (env
->slb
[i
].esid
& SLB_ESID_V
) {
823 sregs
.u
.s
.ppc64
.slb
[i
].slbe
|= i
;
825 sregs
.u
.s
.ppc64
.slb
[i
].slbv
= env
->slb
[i
].vsid
;
830 for (i
= 0; i
< 16; i
++) {
831 sregs
.u
.s
.ppc32
.sr
[i
] = env
->sr
[i
];
835 for (i
= 0; i
< 8; i
++) {
836 /* Beware. We have to swap upper and lower bits here */
837 sregs
.u
.s
.ppc32
.dbat
[i
] = ((uint64_t)env
->DBAT
[0][i
] << 32)
839 sregs
.u
.s
.ppc32
.ibat
[i
] = ((uint64_t)env
->IBAT
[0][i
] << 32)
843 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_SREGS
, &sregs
);
849 if (cap_hior
&& (level
>= KVM_PUT_RESET_STATE
)) {
850 kvm_put_one_spr(cs
, KVM_REG_PPC_HIOR
, SPR_HIOR
);
856 /* We deliberately ignore errors here, for kernels which have
857 * the ONE_REG calls, but don't support the specific
858 * registers, there's a reasonable chance things will still
859 * work, at least until we try to migrate. */
860 for (i
= 0; i
< 1024; i
++) {
861 uint64_t id
= env
->spr_cb
[i
].one_reg_id
;
864 kvm_put_one_spr(cs
, id
, i
);
870 if (kvm_put_vpa(cs
) < 0) {
871 DPRINTF("Warning: Unable to set VPA information to KVM\n");
874 #endif /* TARGET_PPC64 */
880 int kvm_arch_get_registers(CPUState
*cs
)
882 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
883 CPUPPCState
*env
= &cpu
->env
;
884 struct kvm_regs regs
;
885 struct kvm_sregs sregs
;
889 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_REGS
, ®s
);
894 for (i
= 7; i
>= 0; i
--) {
895 env
->crf
[i
] = cr
& 15;
901 cpu_write_xer(env
, regs
.xer
);
905 env
->spr
[SPR_SRR0
] = regs
.srr0
;
906 env
->spr
[SPR_SRR1
] = regs
.srr1
;
908 env
->spr
[SPR_SPRG0
] = regs
.sprg0
;
909 env
->spr
[SPR_SPRG1
] = regs
.sprg1
;
910 env
->spr
[SPR_SPRG2
] = regs
.sprg2
;
911 env
->spr
[SPR_SPRG3
] = regs
.sprg3
;
912 env
->spr
[SPR_SPRG4
] = regs
.sprg4
;
913 env
->spr
[SPR_SPRG5
] = regs
.sprg5
;
914 env
->spr
[SPR_SPRG6
] = regs
.sprg6
;
915 env
->spr
[SPR_SPRG7
] = regs
.sprg7
;
917 env
->spr
[SPR_BOOKE_PID
] = regs
.pid
;
919 for (i
= 0;i
< 32; i
++)
920 env
->gpr
[i
] = regs
.gpr
[i
];
924 if (cap_booke_sregs
) {
925 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_SREGS
, &sregs
);
930 if (sregs
.u
.e
.features
& KVM_SREGS_E_BASE
) {
931 env
->spr
[SPR_BOOKE_CSRR0
] = sregs
.u
.e
.csrr0
;
932 env
->spr
[SPR_BOOKE_CSRR1
] = sregs
.u
.e
.csrr1
;
933 env
->spr
[SPR_BOOKE_ESR
] = sregs
.u
.e
.esr
;
934 env
->spr
[SPR_BOOKE_DEAR
] = sregs
.u
.e
.dear
;
935 env
->spr
[SPR_BOOKE_MCSR
] = sregs
.u
.e
.mcsr
;
936 env
->spr
[SPR_BOOKE_TSR
] = sregs
.u
.e
.tsr
;
937 env
->spr
[SPR_BOOKE_TCR
] = sregs
.u
.e
.tcr
;
938 env
->spr
[SPR_DECR
] = sregs
.u
.e
.dec
;
939 env
->spr
[SPR_TBL
] = sregs
.u
.e
.tb
& 0xffffffff;
940 env
->spr
[SPR_TBU
] = sregs
.u
.e
.tb
>> 32;
941 env
->spr
[SPR_VRSAVE
] = sregs
.u
.e
.vrsave
;
944 if (sregs
.u
.e
.features
& KVM_SREGS_E_ARCH206
) {
945 env
->spr
[SPR_BOOKE_PIR
] = sregs
.u
.e
.pir
;
946 env
->spr
[SPR_BOOKE_MCSRR0
] = sregs
.u
.e
.mcsrr0
;
947 env
->spr
[SPR_BOOKE_MCSRR1
] = sregs
.u
.e
.mcsrr1
;
948 env
->spr
[SPR_BOOKE_DECAR
] = sregs
.u
.e
.decar
;
949 env
->spr
[SPR_BOOKE_IVPR
] = sregs
.u
.e
.ivpr
;
952 if (sregs
.u
.e
.features
& KVM_SREGS_E_64
) {
953 env
->spr
[SPR_BOOKE_EPCR
] = sregs
.u
.e
.epcr
;
956 if (sregs
.u
.e
.features
& KVM_SREGS_E_SPRG8
) {
957 env
->spr
[SPR_BOOKE_SPRG8
] = sregs
.u
.e
.sprg8
;
960 if (sregs
.u
.e
.features
& KVM_SREGS_E_IVOR
) {
961 env
->spr
[SPR_BOOKE_IVOR0
] = sregs
.u
.e
.ivor_low
[0];
962 env
->spr
[SPR_BOOKE_IVOR1
] = sregs
.u
.e
.ivor_low
[1];
963 env
->spr
[SPR_BOOKE_IVOR2
] = sregs
.u
.e
.ivor_low
[2];
964 env
->spr
[SPR_BOOKE_IVOR3
] = sregs
.u
.e
.ivor_low
[3];
965 env
->spr
[SPR_BOOKE_IVOR4
] = sregs
.u
.e
.ivor_low
[4];
966 env
->spr
[SPR_BOOKE_IVOR5
] = sregs
.u
.e
.ivor_low
[5];
967 env
->spr
[SPR_BOOKE_IVOR6
] = sregs
.u
.e
.ivor_low
[6];
968 env
->spr
[SPR_BOOKE_IVOR7
] = sregs
.u
.e
.ivor_low
[7];
969 env
->spr
[SPR_BOOKE_IVOR8
] = sregs
.u
.e
.ivor_low
[8];
970 env
->spr
[SPR_BOOKE_IVOR9
] = sregs
.u
.e
.ivor_low
[9];
971 env
->spr
[SPR_BOOKE_IVOR10
] = sregs
.u
.e
.ivor_low
[10];
972 env
->spr
[SPR_BOOKE_IVOR11
] = sregs
.u
.e
.ivor_low
[11];
973 env
->spr
[SPR_BOOKE_IVOR12
] = sregs
.u
.e
.ivor_low
[12];
974 env
->spr
[SPR_BOOKE_IVOR13
] = sregs
.u
.e
.ivor_low
[13];
975 env
->spr
[SPR_BOOKE_IVOR14
] = sregs
.u
.e
.ivor_low
[14];
976 env
->spr
[SPR_BOOKE_IVOR15
] = sregs
.u
.e
.ivor_low
[15];
978 if (sregs
.u
.e
.features
& KVM_SREGS_E_SPE
) {
979 env
->spr
[SPR_BOOKE_IVOR32
] = sregs
.u
.e
.ivor_high
[0];
980 env
->spr
[SPR_BOOKE_IVOR33
] = sregs
.u
.e
.ivor_high
[1];
981 env
->spr
[SPR_BOOKE_IVOR34
] = sregs
.u
.e
.ivor_high
[2];
984 if (sregs
.u
.e
.features
& KVM_SREGS_E_PM
) {
985 env
->spr
[SPR_BOOKE_IVOR35
] = sregs
.u
.e
.ivor_high
[3];
988 if (sregs
.u
.e
.features
& KVM_SREGS_E_PC
) {
989 env
->spr
[SPR_BOOKE_IVOR36
] = sregs
.u
.e
.ivor_high
[4];
990 env
->spr
[SPR_BOOKE_IVOR37
] = sregs
.u
.e
.ivor_high
[5];
994 if (sregs
.u
.e
.features
& KVM_SREGS_E_ARCH206_MMU
) {
995 env
->spr
[SPR_BOOKE_MAS0
] = sregs
.u
.e
.mas0
;
996 env
->spr
[SPR_BOOKE_MAS1
] = sregs
.u
.e
.mas1
;
997 env
->spr
[SPR_BOOKE_MAS2
] = sregs
.u
.e
.mas2
;
998 env
->spr
[SPR_BOOKE_MAS3
] = sregs
.u
.e
.mas7_3
& 0xffffffff;
999 env
->spr
[SPR_BOOKE_MAS4
] = sregs
.u
.e
.mas4
;
1000 env
->spr
[SPR_BOOKE_MAS6
] = sregs
.u
.e
.mas6
;
1001 env
->spr
[SPR_BOOKE_MAS7
] = sregs
.u
.e
.mas7_3
>> 32;
1002 env
->spr
[SPR_MMUCFG
] = sregs
.u
.e
.mmucfg
;
1003 env
->spr
[SPR_BOOKE_TLB0CFG
] = sregs
.u
.e
.tlbcfg
[0];
1004 env
->spr
[SPR_BOOKE_TLB1CFG
] = sregs
.u
.e
.tlbcfg
[1];
1007 if (sregs
.u
.e
.features
& KVM_SREGS_EXP
) {
1008 env
->spr
[SPR_BOOKE_EPR
] = sregs
.u
.e
.epr
;
1011 if (sregs
.u
.e
.features
& KVM_SREGS_E_PD
) {
1012 env
->spr
[SPR_BOOKE_EPLC
] = sregs
.u
.e
.eplc
;
1013 env
->spr
[SPR_BOOKE_EPSC
] = sregs
.u
.e
.epsc
;
1016 if (sregs
.u
.e
.impl_id
== KVM_SREGS_E_IMPL_FSL
) {
1017 env
->spr
[SPR_E500_SVR
] = sregs
.u
.e
.impl
.fsl
.svr
;
1018 env
->spr
[SPR_Exxx_MCAR
] = sregs
.u
.e
.impl
.fsl
.mcar
;
1019 env
->spr
[SPR_HID0
] = sregs
.u
.e
.impl
.fsl
.hid0
;
1021 if (sregs
.u
.e
.impl
.fsl
.features
& KVM_SREGS_E_FSL_PIDn
) {
1022 env
->spr
[SPR_BOOKE_PID1
] = sregs
.u
.e
.impl
.fsl
.pid1
;
1023 env
->spr
[SPR_BOOKE_PID2
] = sregs
.u
.e
.impl
.fsl
.pid2
;
1029 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_SREGS
, &sregs
);
1034 if (!env
->external_htab
) {
1035 ppc_store_sdr1(env
, sregs
.u
.s
.sdr1
);
1041 * The packed SLB array we get from KVM_GET_SREGS only contains
1042 * information about valid entries. So we flush our internal
1043 * copy to get rid of stale ones, then put all valid SLB entries
1046 memset(env
->slb
, 0, sizeof(env
->slb
));
1047 for (i
= 0; i
< ARRAY_SIZE(env
->slb
); i
++) {
1048 target_ulong rb
= sregs
.u
.s
.ppc64
.slb
[i
].slbe
;
1049 target_ulong rs
= sregs
.u
.s
.ppc64
.slb
[i
].slbv
;
1051 * Only restore valid entries
1053 if (rb
& SLB_ESID_V
) {
1054 ppc_store_slb(env
, rb
, rs
);
1060 for (i
= 0; i
< 16; i
++) {
1061 env
->sr
[i
] = sregs
.u
.s
.ppc32
.sr
[i
];
1065 for (i
= 0; i
< 8; i
++) {
1066 env
->DBAT
[0][i
] = sregs
.u
.s
.ppc32
.dbat
[i
] & 0xffffffff;
1067 env
->DBAT
[1][i
] = sregs
.u
.s
.ppc32
.dbat
[i
] >> 32;
1068 env
->IBAT
[0][i
] = sregs
.u
.s
.ppc32
.ibat
[i
] & 0xffffffff;
1069 env
->IBAT
[1][i
] = sregs
.u
.s
.ppc32
.ibat
[i
] >> 32;
1074 kvm_get_one_spr(cs
, KVM_REG_PPC_HIOR
, SPR_HIOR
);
1080 /* We deliberately ignore errors here, for kernels which have
1081 * the ONE_REG calls, but don't support the specific
1082 * registers, there's a reasonable chance things will still
1083 * work, at least until we try to migrate. */
1084 for (i
= 0; i
< 1024; i
++) {
1085 uint64_t id
= env
->spr_cb
[i
].one_reg_id
;
1088 kvm_get_one_spr(cs
, id
, i
);
1094 if (kvm_get_vpa(cs
) < 0) {
1095 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1104 int kvmppc_set_interrupt(PowerPCCPU
*cpu
, int irq
, int level
)
1106 unsigned virq
= level
? KVM_INTERRUPT_SET_LEVEL
: KVM_INTERRUPT_UNSET
;
1108 if (irq
!= PPC_INTERRUPT_EXT
) {
1112 if (!kvm_enabled() || !cap_interrupt_unset
|| !cap_interrupt_level
) {
1116 kvm_vcpu_ioctl(CPU(cpu
), KVM_INTERRUPT
, &virq
);
1121 #if defined(TARGET_PPCEMB)
1122 #define PPC_INPUT_INT PPC40x_INPUT_INT
1123 #elif defined(TARGET_PPC64)
1124 #define PPC_INPUT_INT PPC970_INPUT_INT
1126 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1129 void kvm_arch_pre_run(CPUState
*cs
, struct kvm_run
*run
)
1131 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
1132 CPUPPCState
*env
= &cpu
->env
;
1136 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1137 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1138 if (!cap_interrupt_level
&&
1139 run
->ready_for_interrupt_injection
&&
1140 (cs
->interrupt_request
& CPU_INTERRUPT_HARD
) &&
1141 (env
->irq_input_state
& (1<<PPC_INPUT_INT
)))
1143 /* For now KVM disregards the 'irq' argument. However, in the
1144 * future KVM could cache it in-kernel to avoid a heavyweight exit
1145 * when reading the UIC.
1147 irq
= KVM_INTERRUPT_SET
;
1149 DPRINTF("injected interrupt %d\n", irq
);
1150 r
= kvm_vcpu_ioctl(cs
, KVM_INTERRUPT
, &irq
);
1152 printf("cpu %d fail inject %x\n", cs
->cpu_index
, irq
);
1155 /* Always wake up soon in case the interrupt was level based */
1156 timer_mod(idle_timer
, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) +
1157 (get_ticks_per_sec() / 50));
1160 /* We don't know if there are more interrupts pending after this. However,
1161 * the guest will return to userspace in the course of handling this one
1162 * anyways, so we will get a chance to deliver the rest. */
1165 void kvm_arch_post_run(CPUState
*cpu
, struct kvm_run
*run
)
1169 int kvm_arch_process_async_events(CPUState
*cs
)
1174 static int kvmppc_handle_halt(PowerPCCPU
*cpu
)
1176 CPUState
*cs
= CPU(cpu
);
1177 CPUPPCState
*env
= &cpu
->env
;
1179 if (!(cs
->interrupt_request
& CPU_INTERRUPT_HARD
) && (msr_ee
)) {
1181 env
->exception_index
= EXCP_HLT
;
1187 /* map dcr access to existing qemu dcr emulation */
1188 static int kvmppc_handle_dcr_read(CPUPPCState
*env
, uint32_t dcrn
, uint32_t *data
)
1190 if (ppc_dcr_read(env
->dcr_env
, dcrn
, data
) < 0)
1191 fprintf(stderr
, "Read to unhandled DCR (0x%x)\n", dcrn
);
1196 static int kvmppc_handle_dcr_write(CPUPPCState
*env
, uint32_t dcrn
, uint32_t data
)
1198 if (ppc_dcr_write(env
->dcr_env
, dcrn
, data
) < 0)
1199 fprintf(stderr
, "Write to unhandled DCR (0x%x)\n", dcrn
);
1204 int kvm_arch_handle_exit(CPUState
*cs
, struct kvm_run
*run
)
1206 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
1207 CPUPPCState
*env
= &cpu
->env
;
1210 switch (run
->exit_reason
) {
1212 if (run
->dcr
.is_write
) {
1213 DPRINTF("handle dcr write\n");
1214 ret
= kvmppc_handle_dcr_write(env
, run
->dcr
.dcrn
, run
->dcr
.data
);
1216 DPRINTF("handle dcr read\n");
1217 ret
= kvmppc_handle_dcr_read(env
, run
->dcr
.dcrn
, &run
->dcr
.data
);
1221 DPRINTF("handle halt\n");
1222 ret
= kvmppc_handle_halt(cpu
);
1224 #if defined(TARGET_PPC64)
1225 case KVM_EXIT_PAPR_HCALL
:
1226 DPRINTF("handle PAPR hypercall\n");
1227 run
->papr_hcall
.ret
= spapr_hypercall(cpu
,
1229 run
->papr_hcall
.args
);
1234 DPRINTF("handle epr\n");
1235 run
->epr
.epr
= ldl_phys(cs
->as
, env
->mpic_iack
);
1238 case KVM_EXIT_WATCHDOG
:
1239 DPRINTF("handle watchdog expiry\n");
1240 watchdog_perform_action();
1245 fprintf(stderr
, "KVM: unknown exit reason %d\n", run
->exit_reason
);
1253 int kvmppc_or_tsr_bits(PowerPCCPU
*cpu
, uint32_t tsr_bits
)
1255 CPUState
*cs
= CPU(cpu
);
1256 uint32_t bits
= tsr_bits
;
1257 struct kvm_one_reg reg
= {
1258 .id
= KVM_REG_PPC_OR_TSR
,
1259 .addr
= (uintptr_t) &bits
,
1262 return kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
1265 int kvmppc_clear_tsr_bits(PowerPCCPU
*cpu
, uint32_t tsr_bits
)
1268 CPUState
*cs
= CPU(cpu
);
1269 uint32_t bits
= tsr_bits
;
1270 struct kvm_one_reg reg
= {
1271 .id
= KVM_REG_PPC_CLEAR_TSR
,
1272 .addr
= (uintptr_t) &bits
,
1275 return kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
1278 int kvmppc_set_tcr(PowerPCCPU
*cpu
)
1280 CPUState
*cs
= CPU(cpu
);
1281 CPUPPCState
*env
= &cpu
->env
;
1282 uint32_t tcr
= env
->spr
[SPR_BOOKE_TCR
];
1284 struct kvm_one_reg reg
= {
1285 .id
= KVM_REG_PPC_TCR
,
1286 .addr
= (uintptr_t) &tcr
,
1289 return kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
1292 int kvmppc_booke_watchdog_enable(PowerPCCPU
*cpu
)
1294 CPUState
*cs
= CPU(cpu
);
1295 struct kvm_enable_cap encap
= {};
1298 if (!kvm_enabled()) {
1302 if (!cap_ppc_watchdog
) {
1303 printf("warning: KVM does not support watchdog");
1307 encap
.cap
= KVM_CAP_PPC_BOOKE_WATCHDOG
;
1308 ret
= kvm_vcpu_ioctl(cs
, KVM_ENABLE_CAP
, &encap
);
1310 fprintf(stderr
, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1311 __func__
, strerror(-ret
));
1318 static int read_cpuinfo(const char *field
, char *value
, int len
)
1322 int field_len
= strlen(field
);
1325 f
= fopen("/proc/cpuinfo", "r");
1331 if(!fgets(line
, sizeof(line
), f
)) {
1334 if (!strncmp(line
, field
, field_len
)) {
1335 pstrcpy(value
, len
, line
);
1346 uint32_t kvmppc_get_tbfreq(void)
1350 uint32_t retval
= get_ticks_per_sec();
1352 if (read_cpuinfo("timebase", line
, sizeof(line
))) {
1356 if (!(ns
= strchr(line
, ':'))) {
1366 /* Try to find a device tree node for a CPU with clock-frequency property */
1367 static int kvmppc_find_cpu_dt(char *buf
, int buf_len
)
1369 struct dirent
*dirp
;
1372 if ((dp
= opendir(PROC_DEVTREE_CPU
)) == NULL
) {
1373 printf("Can't open directory " PROC_DEVTREE_CPU
"\n");
1378 while ((dirp
= readdir(dp
)) != NULL
) {
1380 snprintf(buf
, buf_len
, "%s%s/clock-frequency", PROC_DEVTREE_CPU
,
1382 f
= fopen(buf
, "r");
1384 snprintf(buf
, buf_len
, "%s%s", PROC_DEVTREE_CPU
, dirp
->d_name
);
1391 if (buf
[0] == '\0') {
1392 printf("Unknown host!\n");
1399 /* Read a CPU node property from the host device tree that's a single
1400 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1401 * (can't find or open the property, or doesn't understand the
1403 static uint64_t kvmppc_read_int_cpu_dt(const char *propname
)
1413 if (kvmppc_find_cpu_dt(buf
, sizeof(buf
))) {
1417 strncat(buf
, "/", sizeof(buf
) - strlen(buf
));
1418 strncat(buf
, propname
, sizeof(buf
) - strlen(buf
));
1420 f
= fopen(buf
, "rb");
1425 len
= fread(&u
, 1, sizeof(u
), f
);
1429 /* property is a 32-bit quantity */
1430 return be32_to_cpu(u
.v32
);
1432 return be64_to_cpu(u
.v64
);
1438 uint64_t kvmppc_get_clockfreq(void)
1440 return kvmppc_read_int_cpu_dt("clock-frequency");
1443 uint32_t kvmppc_get_vmx(void)
1445 return kvmppc_read_int_cpu_dt("ibm,vmx");
1448 uint32_t kvmppc_get_dfp(void)
1450 return kvmppc_read_int_cpu_dt("ibm,dfp");
1453 static int kvmppc_get_pvinfo(CPUPPCState
*env
, struct kvm_ppc_pvinfo
*pvinfo
)
1455 PowerPCCPU
*cpu
= ppc_env_get_cpu(env
);
1456 CPUState
*cs
= CPU(cpu
);
1458 if (kvm_check_extension(cs
->kvm_state
, KVM_CAP_PPC_GET_PVINFO
) &&
1459 !kvm_vm_ioctl(cs
->kvm_state
, KVM_PPC_GET_PVINFO
, pvinfo
)) {
1466 int kvmppc_get_hasidle(CPUPPCState
*env
)
1468 struct kvm_ppc_pvinfo pvinfo
;
1470 if (!kvmppc_get_pvinfo(env
, &pvinfo
) &&
1471 (pvinfo
.flags
& KVM_PPC_PVINFO_FLAGS_EV_IDLE
)) {
1478 int kvmppc_get_hypercall(CPUPPCState
*env
, uint8_t *buf
, int buf_len
)
1480 uint32_t *hc
= (uint32_t*)buf
;
1481 struct kvm_ppc_pvinfo pvinfo
;
1483 if (!kvmppc_get_pvinfo(env
, &pvinfo
)) {
1484 memcpy(buf
, pvinfo
.hcall
, buf_len
);
1489 * Fallback to always fail hypercalls:
1505 void kvmppc_set_papr(PowerPCCPU
*cpu
)
1507 CPUPPCState
*env
= &cpu
->env
;
1508 CPUState
*cs
= CPU(cpu
);
1509 struct kvm_enable_cap cap
= {};
1512 cap
.cap
= KVM_CAP_PPC_PAPR
;
1513 ret
= kvm_vcpu_ioctl(cs
, KVM_ENABLE_CAP
, &cap
);
1516 cpu_abort(env
, "This KVM version does not support PAPR\n");
1519 /* Update the capability flag so we sync the right information
1524 void kvmppc_set_mpic_proxy(PowerPCCPU
*cpu
, int mpic_proxy
)
1526 CPUPPCState
*env
= &cpu
->env
;
1527 CPUState
*cs
= CPU(cpu
);
1528 struct kvm_enable_cap cap
= {};
1531 cap
.cap
= KVM_CAP_PPC_EPR
;
1532 cap
.args
[0] = mpic_proxy
;
1533 ret
= kvm_vcpu_ioctl(cs
, KVM_ENABLE_CAP
, &cap
);
1535 if (ret
&& mpic_proxy
) {
1536 cpu_abort(env
, "This KVM version does not support EPR\n");
1540 int kvmppc_smt_threads(void)
1542 return cap_ppc_smt
? cap_ppc_smt
: 1;
1546 off_t
kvmppc_alloc_rma(const char *name
, MemoryRegion
*sysmem
)
1551 struct kvm_allocate_rma ret
;
1552 MemoryRegion
*rma_region
;
1554 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
1555 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
1556 * not necessary on this hardware
1557 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
1559 * FIXME: We should allow the user to force contiguous RMA
1560 * allocation in the cap_ppc_rma==1 case.
1562 if (cap_ppc_rma
< 2) {
1566 fd
= kvm_vm_ioctl(kvm_state
, KVM_ALLOCATE_RMA
, &ret
);
1568 fprintf(stderr
, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
1573 size
= MIN(ret
.rma_size
, 256ul << 20);
1575 rma
= mmap(NULL
, size
, PROT_READ
|PROT_WRITE
, MAP_SHARED
, fd
, 0);
1576 if (rma
== MAP_FAILED
) {
1577 fprintf(stderr
, "KVM: Error mapping RMA: %s\n", strerror(errno
));
1581 rma_region
= g_new(MemoryRegion
, 1);
1582 memory_region_init_ram_ptr(rma_region
, NULL
, name
, size
, rma
);
1583 vmstate_register_ram_global(rma_region
);
1584 memory_region_add_subregion(sysmem
, 0, rma_region
);
1589 uint64_t kvmppc_rma_size(uint64_t current_size
, unsigned int hash_shift
)
1591 struct kvm_ppc_smmu_info info
;
1592 long rampagesize
, best_page_shift
;
1595 if (cap_ppc_rma
>= 2) {
1596 return current_size
;
1599 /* Find the largest hardware supported page size that's less than
1600 * or equal to the (logical) backing page size of guest RAM */
1601 kvm_get_smmu_info(POWERPC_CPU(first_cpu
), &info
);
1602 rampagesize
= getrampagesize();
1603 best_page_shift
= 0;
1605 for (i
= 0; i
< KVM_PPC_PAGE_SIZES_MAX_SZ
; i
++) {
1606 struct kvm_ppc_one_seg_page_size
*sps
= &info
.sps
[i
];
1608 if (!sps
->page_shift
) {
1612 if ((sps
->page_shift
> best_page_shift
)
1613 && ((1UL << sps
->page_shift
) <= rampagesize
)) {
1614 best_page_shift
= sps
->page_shift
;
1618 return MIN(current_size
,
1619 1ULL << (best_page_shift
+ hash_shift
- 7));
1623 void *kvmppc_create_spapr_tce(uint32_t liobn
, uint32_t window_size
, int *pfd
)
1625 struct kvm_create_spapr_tce args
= {
1627 .window_size
= window_size
,
1633 /* Must set fd to -1 so we don't try to munmap when called for
1634 * destroying the table, which the upper layers -will- do
1637 if (!cap_spapr_tce
) {
1641 fd
= kvm_vm_ioctl(kvm_state
, KVM_CREATE_SPAPR_TCE
, &args
);
1643 fprintf(stderr
, "KVM: Failed to create TCE table for liobn 0x%x\n",
1648 len
= (window_size
/ SPAPR_TCE_PAGE_SIZE
) * sizeof(uint64_t);
1649 /* FIXME: round this up to page size */
1651 table
= mmap(NULL
, len
, PROT_READ
|PROT_WRITE
, MAP_SHARED
, fd
, 0);
1652 if (table
== MAP_FAILED
) {
1653 fprintf(stderr
, "KVM: Failed to map TCE table for liobn 0x%x\n",
1663 int kvmppc_remove_spapr_tce(void *table
, int fd
, uint32_t window_size
)
1671 len
= (window_size
/ SPAPR_TCE_PAGE_SIZE
)*sizeof(uint64_t);
1672 if ((munmap(table
, len
) < 0) ||
1674 fprintf(stderr
, "KVM: Unexpected error removing TCE table: %s",
1676 /* Leak the table */
1682 int kvmppc_reset_htab(int shift_hint
)
1684 uint32_t shift
= shift_hint
;
1686 if (!kvm_enabled()) {
1687 /* Full emulation, tell caller to allocate htab itself */
1690 if (kvm_check_extension(kvm_state
, KVM_CAP_PPC_ALLOC_HTAB
)) {
1692 ret
= kvm_vm_ioctl(kvm_state
, KVM_PPC_ALLOCATE_HTAB
, &shift
);
1693 if (ret
== -ENOTTY
) {
1694 /* At least some versions of PR KVM advertise the
1695 * capability, but don't implement the ioctl(). Oops.
1696 * Return 0 so that we allocate the htab in qemu, as is
1697 * correct for PR. */
1699 } else if (ret
< 0) {
1705 /* We have a kernel that predates the htab reset calls. For PR
1706 * KVM, we need to allocate the htab ourselves, for an HV KVM of
1707 * this era, it has allocated a 16MB fixed size hash table
1708 * already. Kernels of this era have the GET_PVINFO capability
1709 * only on PR, so we use this hack to determine the right
1711 if (kvm_check_extension(kvm_state
, KVM_CAP_PPC_GET_PVINFO
)) {
1712 /* PR - tell caller to allocate htab */
1715 /* HV - assume 16MB kernel allocated htab */
1720 static inline uint32_t mfpvr(void)
1729 static void alter_insns(uint64_t *word
, uint64_t flags
, bool on
)
1738 static void kvmppc_host_cpu_initfn(Object
*obj
)
1740 assert(kvm_enabled());
1743 static void kvmppc_host_cpu_class_init(ObjectClass
*oc
, void *data
)
1745 PowerPCCPUClass
*pcc
= POWERPC_CPU_CLASS(oc
);
1746 uint32_t vmx
= kvmppc_get_vmx();
1747 uint32_t dfp
= kvmppc_get_dfp();
1748 uint32_t dcache_size
= kvmppc_read_int_cpu_dt("d-cache-size");
1749 uint32_t icache_size
= kvmppc_read_int_cpu_dt("i-cache-size");
1751 /* Now fix up the class with information we can query from the host */
1755 /* Only override when we know what the host supports */
1756 alter_insns(&pcc
->insns_flags
, PPC_ALTIVEC
, vmx
> 0);
1757 alter_insns(&pcc
->insns_flags2
, PPC2_VSX
, vmx
> 1);
1760 /* Only override when we know what the host supports */
1761 alter_insns(&pcc
->insns_flags2
, PPC2_DFP
, dfp
);
1764 if (dcache_size
!= -1) {
1765 pcc
->l1_dcache_size
= dcache_size
;
1768 if (icache_size
!= -1) {
1769 pcc
->l1_icache_size
= icache_size
;
1773 int kvmppc_fixup_cpu(PowerPCCPU
*cpu
)
1775 CPUState
*cs
= CPU(cpu
);
1778 /* Adjust cpu index for SMT */
1779 smt
= kvmppc_smt_threads();
1780 cs
->cpu_index
= (cs
->cpu_index
/ smp_threads
) * smt
1781 + (cs
->cpu_index
% smp_threads
);
1786 bool kvmppc_has_cap_epr(void)
1791 bool kvmppc_has_cap_htab_fd(void)
1796 static int kvm_ppc_register_host_cpu_type(void)
1798 TypeInfo type_info
= {
1799 .name
= TYPE_HOST_POWERPC_CPU
,
1800 .instance_init
= kvmppc_host_cpu_initfn
,
1801 .class_init
= kvmppc_host_cpu_class_init
,
1803 uint32_t host_pvr
= mfpvr();
1804 PowerPCCPUClass
*pvr_pcc
;
1806 pvr_pcc
= ppc_cpu_class_by_pvr(host_pvr
);
1807 if (pvr_pcc
== NULL
) {
1808 pvr_pcc
= ppc_cpu_class_by_pvr_mask(host_pvr
);
1810 if (pvr_pcc
== NULL
) {
1813 type_info
.parent
= object_class_get_name(OBJECT_CLASS(pvr_pcc
));
1814 type_register(&type_info
);
1818 int kvmppc_define_rtas_kernel_token(uint32_t token
, const char *function
)
1820 struct kvm_rtas_token_args args
= {
1824 if (!kvm_check_extension(kvm_state
, KVM_CAP_PPC_RTAS
)) {
1828 strncpy(args
.name
, function
, sizeof(args
.name
));
1830 return kvm_vm_ioctl(kvm_state
, KVM_PPC_RTAS_DEFINE_TOKEN
, &args
);
1833 int kvmppc_get_htab_fd(bool write
)
1835 struct kvm_get_htab_fd s
= {
1836 .flags
= write
? KVM_GET_HTAB_WRITE
: 0,
1841 fprintf(stderr
, "KVM version doesn't support saving the hash table\n");
1845 return kvm_vm_ioctl(kvm_state
, KVM_PPC_GET_HTAB_FD
, &s
);
1848 int kvmppc_save_htab(QEMUFile
*f
, int fd
, size_t bufsize
, int64_t max_ns
)
1850 int64_t starttime
= qemu_clock_get_ns(QEMU_CLOCK_REALTIME
);
1851 uint8_t buf
[bufsize
];
1855 rc
= read(fd
, buf
, bufsize
);
1857 fprintf(stderr
, "Error reading data from KVM HTAB fd: %s\n",
1861 /* Kernel already retuns data in BE format for the file */
1862 qemu_put_buffer(f
, buf
, rc
);
1866 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME
) - starttime
) < max_ns
)));
1868 return (rc
== 0) ? 1 : 0;
1871 int kvmppc_load_htab_chunk(QEMUFile
*f
, int fd
, uint32_t index
,
1872 uint16_t n_valid
, uint16_t n_invalid
)
1874 struct kvm_get_htab_header
*buf
;
1875 size_t chunksize
= sizeof(*buf
) + n_valid
*HASH_PTE_SIZE_64
;
1878 buf
= alloca(chunksize
);
1879 /* This is KVM on ppc, so this is all big-endian */
1881 buf
->n_valid
= n_valid
;
1882 buf
->n_invalid
= n_invalid
;
1884 qemu_get_buffer(f
, (void *)(buf
+ 1), HASH_PTE_SIZE_64
*n_valid
);
1886 rc
= write(fd
, buf
, chunksize
);
1888 fprintf(stderr
, "Error writing KVM hash table: %s\n",
1892 if (rc
!= chunksize
) {
1893 /* We should never get a short write on a single chunk */
1894 fprintf(stderr
, "Short write, restoring KVM hash table\n");
1900 bool kvm_arch_stop_on_emulation_error(CPUState
*cpu
)
1905 int kvm_arch_on_sigbus_vcpu(CPUState
*cpu
, int code
, void *addr
)
1910 int kvm_arch_on_sigbus(int code
, void *addr
)
1915 void kvm_arch_init_irq_routing(KVMState
*s
)
1919 int kvm_arch_insert_sw_breakpoint(CPUState
*cpu
, struct kvm_sw_breakpoint
*bp
)
1924 int kvm_arch_remove_sw_breakpoint(CPUState
*cpu
, struct kvm_sw_breakpoint
*bp
)
1929 int kvm_arch_insert_hw_breakpoint(target_ulong addr
, target_ulong len
, int type
)
1934 int kvm_arch_remove_hw_breakpoint(target_ulong addr
, target_ulong len
, int type
)
1939 void kvm_arch_remove_all_hw_breakpoints(void)
1943 void kvm_arch_update_guest_debug(CPUState
*cpu
, struct kvm_guest_debug
*dbg
)
1947 struct kvm_get_htab_buf
{
1948 struct kvm_get_htab_header header
;
1950 * We require one extra byte for read
1952 target_ulong hpte
[(HPTES_PER_GROUP
* 2) + 1];
1955 uint64_t kvmppc_hash64_read_pteg(PowerPCCPU
*cpu
, target_ulong pte_index
)
1958 struct kvm_get_htab_fd ghf
;
1959 struct kvm_get_htab_buf
*hpte_buf
;
1962 ghf
.start_index
= pte_index
;
1963 htab_fd
= kvm_vm_ioctl(kvm_state
, KVM_PPC_GET_HTAB_FD
, &ghf
);
1968 hpte_buf
= g_malloc0(sizeof(*hpte_buf
));
1970 * Read the hpte group
1972 if (read(htab_fd
, hpte_buf
, sizeof(*hpte_buf
)) < 0) {
1977 return (uint64_t)(uintptr_t) hpte_buf
->hpte
;
1986 void kvmppc_hash64_free_pteg(uint64_t token
)
1988 struct kvm_get_htab_buf
*htab_buf
;
1990 htab_buf
= container_of((void *)(uintptr_t) token
, struct kvm_get_htab_buf
,