2 * PowerPC implementation of KVM hooks
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
18 #include <sys/types.h>
19 #include <sys/ioctl.h>
23 #include <linux/kvm.h>
25 #include "qemu-common.h"
26 #include "qemu/timer.h"
27 #include "sysemu/sysemu.h"
28 #include "sysemu/kvm.h"
31 #include "sysemu/cpus.h"
32 #include "sysemu/device_tree.h"
33 #include "mmu-hash64.h"
35 #include "hw/sysbus.h"
36 #include "hw/ppc/spapr.h"
37 #include "hw/ppc/spapr_vio.h"
38 #include "hw/ppc/ppc.h"
39 #include "sysemu/watchdog.h"
45 #define DPRINTF(fmt, ...) \
46 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
48 #define DPRINTF(fmt, ...) \
52 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
54 const KVMCapabilityInfo kvm_arch_required_capabilities
[] = {
58 static int cap_interrupt_unset
= false;
59 static int cap_interrupt_level
= false;
60 static int cap_segstate
;
61 static int cap_booke_sregs
;
62 static int cap_ppc_smt
;
63 static int cap_ppc_rma
;
64 static int cap_spapr_tce
;
65 static int cap_spapr_multitce
;
67 static int cap_one_reg
;
69 static int cap_ppc_watchdog
;
71 static int cap_htab_fd
;
73 /* XXX We have a race condition where we actually have a level triggered
74 * interrupt, but the infrastructure can't expose that yet, so the guest
75 * takes but ignores it, goes to sleep and never gets notified that there's
76 * still an interrupt pending.
78 * As a quick workaround, let's just wake up again 20 ms after we injected
79 * an interrupt. That way we can assure that we're always reinjecting
80 * interrupts in case the guest swallowed them.
82 static QEMUTimer
*idle_timer
;
84 static void kvm_kick_cpu(void *opaque
)
86 PowerPCCPU
*cpu
= opaque
;
88 qemu_cpu_kick(CPU(cpu
));
91 static int kvm_ppc_register_host_cpu_type(void);
93 int kvm_arch_init(KVMState
*s
)
95 cap_interrupt_unset
= kvm_check_extension(s
, KVM_CAP_PPC_UNSET_IRQ
);
96 cap_interrupt_level
= kvm_check_extension(s
, KVM_CAP_PPC_IRQ_LEVEL
);
97 cap_segstate
= kvm_check_extension(s
, KVM_CAP_PPC_SEGSTATE
);
98 cap_booke_sregs
= kvm_check_extension(s
, KVM_CAP_PPC_BOOKE_SREGS
);
99 cap_ppc_smt
= kvm_check_extension(s
, KVM_CAP_PPC_SMT
);
100 cap_ppc_rma
= kvm_check_extension(s
, KVM_CAP_PPC_RMA
);
101 cap_spapr_tce
= kvm_check_extension(s
, KVM_CAP_SPAPR_TCE
);
102 cap_spapr_multitce
= kvm_check_extension(s
, KVM_CAP_SPAPR_MULTITCE
);
103 cap_one_reg
= kvm_check_extension(s
, KVM_CAP_ONE_REG
);
104 cap_hior
= kvm_check_extension(s
, KVM_CAP_PPC_HIOR
);
105 cap_epr
= kvm_check_extension(s
, KVM_CAP_PPC_EPR
);
106 cap_ppc_watchdog
= kvm_check_extension(s
, KVM_CAP_PPC_BOOKE_WATCHDOG
);
107 /* Note: we don't set cap_papr here, because this capability is
108 * only activated after this by kvmppc_set_papr() */
109 cap_htab_fd
= kvm_check_extension(s
, KVM_CAP_PPC_HTAB_FD
);
111 if (!cap_interrupt_level
) {
112 fprintf(stderr
, "KVM: Couldn't find level irq capability. Expect the "
113 "VM to stall at times!\n");
116 kvm_ppc_register_host_cpu_type();
121 static int kvm_arch_sync_sregs(PowerPCCPU
*cpu
)
123 CPUPPCState
*cenv
= &cpu
->env
;
124 CPUState
*cs
= CPU(cpu
);
125 struct kvm_sregs sregs
;
128 if (cenv
->excp_model
== POWERPC_EXCP_BOOKE
) {
129 /* What we're really trying to say is "if we're on BookE, we use
130 the native PVR for now". This is the only sane way to check
131 it though, so we potentially confuse users that they can run
132 BookE guests on BookS. Let's hope nobody dares enough :) */
136 fprintf(stderr
, "kvm error: missing PVR setting capability\n");
141 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_SREGS
, &sregs
);
146 sregs
.pvr
= cenv
->spr
[SPR_PVR
];
147 return kvm_vcpu_ioctl(cs
, KVM_SET_SREGS
, &sregs
);
150 /* Set up a shared TLB array with KVM */
151 static int kvm_booke206_tlb_init(PowerPCCPU
*cpu
)
153 CPUPPCState
*env
= &cpu
->env
;
154 CPUState
*cs
= CPU(cpu
);
155 struct kvm_book3e_206_tlb_params params
= {};
156 struct kvm_config_tlb cfg
= {};
157 unsigned int entries
= 0;
160 if (!kvm_enabled() ||
161 !kvm_check_extension(cs
->kvm_state
, KVM_CAP_SW_TLB
)) {
165 assert(ARRAY_SIZE(params
.tlb_sizes
) == BOOKE206_MAX_TLBN
);
167 for (i
= 0; i
< BOOKE206_MAX_TLBN
; i
++) {
168 params
.tlb_sizes
[i
] = booke206_tlb_size(env
, i
);
169 params
.tlb_ways
[i
] = booke206_tlb_ways(env
, i
);
170 entries
+= params
.tlb_sizes
[i
];
173 assert(entries
== env
->nb_tlb
);
174 assert(sizeof(struct kvm_book3e_206_tlb_entry
) == sizeof(ppcmas_tlb_t
));
176 env
->tlb_dirty
= true;
178 cfg
.array
= (uintptr_t)env
->tlb
.tlbm
;
179 cfg
.array_len
= sizeof(ppcmas_tlb_t
) * entries
;
180 cfg
.params
= (uintptr_t)¶ms
;
181 cfg
.mmu_type
= KVM_MMU_FSL_BOOKE_NOHV
;
183 ret
= kvm_vcpu_enable_cap(cs
, KVM_CAP_SW_TLB
, 0, (uintptr_t)&cfg
);
185 fprintf(stderr
, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
186 __func__
, strerror(-ret
));
190 env
->kvm_sw_tlb
= true;
195 #if defined(TARGET_PPC64)
196 static void kvm_get_fallback_smmu_info(PowerPCCPU
*cpu
,
197 struct kvm_ppc_smmu_info
*info
)
199 CPUPPCState
*env
= &cpu
->env
;
200 CPUState
*cs
= CPU(cpu
);
202 memset(info
, 0, sizeof(*info
));
204 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
205 * need to "guess" what the supported page sizes are.
207 * For that to work we make a few assumptions:
209 * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
210 * KVM which only supports 4K and 16M pages, but supports them
211 * regardless of the backing store characteritics. We also don't
212 * support 1T segments.
214 * This is safe as if HV KVM ever supports that capability or PR
215 * KVM grows supports for more page/segment sizes, those versions
216 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
217 * will not hit this fallback
219 * - Else we are running HV KVM. This means we only support page
220 * sizes that fit in the backing store. Additionally we only
221 * advertize 64K pages if the processor is ARCH 2.06 and we assume
222 * P7 encodings for the SLB and hash table. Here too, we assume
223 * support for any newer processor will mean a kernel that
224 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
227 if (kvm_check_extension(cs
->kvm_state
, KVM_CAP_PPC_GET_PVINFO
)) {
232 /* Standard 4k base page size segment */
233 info
->sps
[0].page_shift
= 12;
234 info
->sps
[0].slb_enc
= 0;
235 info
->sps
[0].enc
[0].page_shift
= 12;
236 info
->sps
[0].enc
[0].pte_enc
= 0;
238 /* Standard 16M large page size segment */
239 info
->sps
[1].page_shift
= 24;
240 info
->sps
[1].slb_enc
= SLB_VSID_L
;
241 info
->sps
[1].enc
[0].page_shift
= 24;
242 info
->sps
[1].enc
[0].pte_enc
= 0;
246 /* HV KVM has backing store size restrictions */
247 info
->flags
= KVM_PPC_PAGE_SIZES_REAL
;
249 if (env
->mmu_model
& POWERPC_MMU_1TSEG
) {
250 info
->flags
|= KVM_PPC_1T_SEGMENTS
;
253 if (env
->mmu_model
== POWERPC_MMU_2_06
) {
259 /* Standard 4k base page size segment */
260 info
->sps
[i
].page_shift
= 12;
261 info
->sps
[i
].slb_enc
= 0;
262 info
->sps
[i
].enc
[0].page_shift
= 12;
263 info
->sps
[i
].enc
[0].pte_enc
= 0;
266 /* 64K on MMU 2.06 */
267 if (env
->mmu_model
== POWERPC_MMU_2_06
) {
268 info
->sps
[i
].page_shift
= 16;
269 info
->sps
[i
].slb_enc
= 0x110;
270 info
->sps
[i
].enc
[0].page_shift
= 16;
271 info
->sps
[i
].enc
[0].pte_enc
= 1;
275 /* Standard 16M large page size segment */
276 info
->sps
[i
].page_shift
= 24;
277 info
->sps
[i
].slb_enc
= SLB_VSID_L
;
278 info
->sps
[i
].enc
[0].page_shift
= 24;
279 info
->sps
[i
].enc
[0].pte_enc
= 0;
283 static void kvm_get_smmu_info(PowerPCCPU
*cpu
, struct kvm_ppc_smmu_info
*info
)
285 CPUState
*cs
= CPU(cpu
);
288 if (kvm_check_extension(cs
->kvm_state
, KVM_CAP_PPC_GET_SMMU_INFO
)) {
289 ret
= kvm_vm_ioctl(cs
->kvm_state
, KVM_PPC_GET_SMMU_INFO
, info
);
295 kvm_get_fallback_smmu_info(cpu
, info
);
298 static long getrampagesize(void)
304 /* guest RAM is backed by normal anonymous pages */
305 return getpagesize();
309 ret
= statfs(mem_path
, &fs
);
310 } while (ret
!= 0 && errno
== EINTR
);
313 fprintf(stderr
, "Couldn't statfs() memory path: %s\n",
318 #define HUGETLBFS_MAGIC 0x958458f6
320 if (fs
.f_type
!= HUGETLBFS_MAGIC
) {
321 /* Explicit mempath, but it's ordinary pages */
322 return getpagesize();
325 /* It's hugepage, return the huge page size */
329 static bool kvm_valid_page_size(uint32_t flags
, long rampgsize
, uint32_t shift
)
331 if (!(flags
& KVM_PPC_PAGE_SIZES_REAL
)) {
335 return (1ul << shift
) <= rampgsize
;
338 static void kvm_fixup_page_sizes(PowerPCCPU
*cpu
)
340 static struct kvm_ppc_smmu_info smmu_info
;
341 static bool has_smmu_info
;
342 CPUPPCState
*env
= &cpu
->env
;
346 /* We only handle page sizes for 64-bit server guests for now */
347 if (!(env
->mmu_model
& POWERPC_MMU_64
)) {
351 /* Collect MMU info from kernel if not already */
352 if (!has_smmu_info
) {
353 kvm_get_smmu_info(cpu
, &smmu_info
);
354 has_smmu_info
= true;
357 rampagesize
= getrampagesize();
359 /* Convert to QEMU form */
360 memset(&env
->sps
, 0, sizeof(env
->sps
));
363 * XXX This loop should be an entry wide AND of the capabilities that
364 * the selected CPU has with the capabilities that KVM supports.
366 for (ik
= iq
= 0; ik
< KVM_PPC_PAGE_SIZES_MAX_SZ
; ik
++) {
367 struct ppc_one_seg_page_size
*qsps
= &env
->sps
.sps
[iq
];
368 struct kvm_ppc_one_seg_page_size
*ksps
= &smmu_info
.sps
[ik
];
370 if (!kvm_valid_page_size(smmu_info
.flags
, rampagesize
,
374 qsps
->page_shift
= ksps
->page_shift
;
375 qsps
->slb_enc
= ksps
->slb_enc
;
376 for (jk
= jq
= 0; jk
< KVM_PPC_PAGE_SIZES_MAX_SZ
; jk
++) {
377 if (!kvm_valid_page_size(smmu_info
.flags
, rampagesize
,
378 ksps
->enc
[jk
].page_shift
)) {
381 qsps
->enc
[jq
].page_shift
= ksps
->enc
[jk
].page_shift
;
382 qsps
->enc
[jq
].pte_enc
= ksps
->enc
[jk
].pte_enc
;
383 if (++jq
>= PPC_PAGE_SIZES_MAX_SZ
) {
387 if (++iq
>= PPC_PAGE_SIZES_MAX_SZ
) {
391 env
->slb_nr
= smmu_info
.slb_size
;
392 if (!(smmu_info
.flags
& KVM_PPC_1T_SEGMENTS
)) {
393 env
->mmu_model
&= ~POWERPC_MMU_1TSEG
;
396 #else /* defined (TARGET_PPC64) */
398 static inline void kvm_fixup_page_sizes(PowerPCCPU
*cpu
)
402 #endif /* !defined (TARGET_PPC64) */
404 unsigned long kvm_arch_vcpu_id(CPUState
*cpu
)
406 return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu
));
409 int kvm_arch_init_vcpu(CPUState
*cs
)
411 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
412 CPUPPCState
*cenv
= &cpu
->env
;
415 /* Gather server mmu info from KVM and update the CPU state */
416 kvm_fixup_page_sizes(cpu
);
418 /* Synchronize sregs with kvm */
419 ret
= kvm_arch_sync_sregs(cpu
);
424 idle_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL
, kvm_kick_cpu
, cpu
);
426 /* Some targets support access to KVM's guest TLB. */
427 switch (cenv
->mmu_model
) {
428 case POWERPC_MMU_BOOKE206
:
429 ret
= kvm_booke206_tlb_init(cpu
);
438 static void kvm_sw_tlb_put(PowerPCCPU
*cpu
)
440 CPUPPCState
*env
= &cpu
->env
;
441 CPUState
*cs
= CPU(cpu
);
442 struct kvm_dirty_tlb dirty_tlb
;
443 unsigned char *bitmap
;
446 if (!env
->kvm_sw_tlb
) {
450 bitmap
= g_malloc((env
->nb_tlb
+ 7) / 8);
451 memset(bitmap
, 0xFF, (env
->nb_tlb
+ 7) / 8);
453 dirty_tlb
.bitmap
= (uintptr_t)bitmap
;
454 dirty_tlb
.num_dirty
= env
->nb_tlb
;
456 ret
= kvm_vcpu_ioctl(cs
, KVM_DIRTY_TLB
, &dirty_tlb
);
458 fprintf(stderr
, "%s: KVM_DIRTY_TLB: %s\n",
459 __func__
, strerror(-ret
));
465 static void kvm_get_one_spr(CPUState
*cs
, uint64_t id
, int spr
)
467 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
468 CPUPPCState
*env
= &cpu
->env
;
473 struct kvm_one_reg reg
= {
475 .addr
= (uintptr_t) &val
,
479 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
481 trace_kvm_failed_spr_get(spr
, strerror(errno
));
483 switch (id
& KVM_REG_SIZE_MASK
) {
484 case KVM_REG_SIZE_U32
:
485 env
->spr
[spr
] = val
.u32
;
488 case KVM_REG_SIZE_U64
:
489 env
->spr
[spr
] = val
.u64
;
493 /* Don't handle this size yet */
499 static void kvm_put_one_spr(CPUState
*cs
, uint64_t id
, int spr
)
501 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
502 CPUPPCState
*env
= &cpu
->env
;
507 struct kvm_one_reg reg
= {
509 .addr
= (uintptr_t) &val
,
513 switch (id
& KVM_REG_SIZE_MASK
) {
514 case KVM_REG_SIZE_U32
:
515 val
.u32
= env
->spr
[spr
];
518 case KVM_REG_SIZE_U64
:
519 val
.u64
= env
->spr
[spr
];
523 /* Don't handle this size yet */
527 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
529 trace_kvm_failed_spr_set(spr
, strerror(errno
));
533 static int kvm_put_fp(CPUState
*cs
)
535 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
536 CPUPPCState
*env
= &cpu
->env
;
537 struct kvm_one_reg reg
;
541 if (env
->insns_flags
& PPC_FLOAT
) {
542 uint64_t fpscr
= env
->fpscr
;
543 bool vsx
= !!(env
->insns_flags2
& PPC2_VSX
);
545 reg
.id
= KVM_REG_PPC_FPSCR
;
546 reg
.addr
= (uintptr_t)&fpscr
;
547 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
549 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno
));
553 for (i
= 0; i
< 32; i
++) {
556 vsr
[0] = float64_val(env
->fpr
[i
]);
557 vsr
[1] = env
->vsr
[i
];
558 reg
.addr
= (uintptr_t) &vsr
;
559 reg
.id
= vsx
? KVM_REG_PPC_VSR(i
) : KVM_REG_PPC_FPR(i
);
561 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
563 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx
? "VSR" : "FPR",
570 if (env
->insns_flags
& PPC_ALTIVEC
) {
571 reg
.id
= KVM_REG_PPC_VSCR
;
572 reg
.addr
= (uintptr_t)&env
->vscr
;
573 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
575 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno
));
579 for (i
= 0; i
< 32; i
++) {
580 reg
.id
= KVM_REG_PPC_VR(i
);
581 reg
.addr
= (uintptr_t)&env
->avr
[i
];
582 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
584 DPRINTF("Unable to set VR%d to KVM: %s\n", i
, strerror(errno
));
593 static int kvm_get_fp(CPUState
*cs
)
595 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
596 CPUPPCState
*env
= &cpu
->env
;
597 struct kvm_one_reg reg
;
601 if (env
->insns_flags
& PPC_FLOAT
) {
603 bool vsx
= !!(env
->insns_flags2
& PPC2_VSX
);
605 reg
.id
= KVM_REG_PPC_FPSCR
;
606 reg
.addr
= (uintptr_t)&fpscr
;
607 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
609 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno
));
615 for (i
= 0; i
< 32; i
++) {
618 reg
.addr
= (uintptr_t) &vsr
;
619 reg
.id
= vsx
? KVM_REG_PPC_VSR(i
) : KVM_REG_PPC_FPR(i
);
621 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
623 DPRINTF("Unable to get %s%d from KVM: %s\n",
624 vsx
? "VSR" : "FPR", i
, strerror(errno
));
627 env
->fpr
[i
] = vsr
[0];
629 env
->vsr
[i
] = vsr
[1];
635 if (env
->insns_flags
& PPC_ALTIVEC
) {
636 reg
.id
= KVM_REG_PPC_VSCR
;
637 reg
.addr
= (uintptr_t)&env
->vscr
;
638 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
640 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno
));
644 for (i
= 0; i
< 32; i
++) {
645 reg
.id
= KVM_REG_PPC_VR(i
);
646 reg
.addr
= (uintptr_t)&env
->avr
[i
];
647 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
649 DPRINTF("Unable to get VR%d from KVM: %s\n",
659 #if defined(TARGET_PPC64)
660 static int kvm_get_vpa(CPUState
*cs
)
662 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
663 CPUPPCState
*env
= &cpu
->env
;
664 struct kvm_one_reg reg
;
667 reg
.id
= KVM_REG_PPC_VPA_ADDR
;
668 reg
.addr
= (uintptr_t)&env
->vpa_addr
;
669 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
671 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno
));
675 assert((uintptr_t)&env
->slb_shadow_size
676 == ((uintptr_t)&env
->slb_shadow_addr
+ 8));
677 reg
.id
= KVM_REG_PPC_VPA_SLB
;
678 reg
.addr
= (uintptr_t)&env
->slb_shadow_addr
;
679 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
681 DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
686 assert((uintptr_t)&env
->dtl_size
== ((uintptr_t)&env
->dtl_addr
+ 8));
687 reg
.id
= KVM_REG_PPC_VPA_DTL
;
688 reg
.addr
= (uintptr_t)&env
->dtl_addr
;
689 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
691 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
699 static int kvm_put_vpa(CPUState
*cs
)
701 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
702 CPUPPCState
*env
= &cpu
->env
;
703 struct kvm_one_reg reg
;
706 /* SLB shadow or DTL can't be registered unless a master VPA is
707 * registered. That means when restoring state, if a VPA *is*
708 * registered, we need to set that up first. If not, we need to
709 * deregister the others before deregistering the master VPA */
710 assert(env
->vpa_addr
|| !(env
->slb_shadow_addr
|| env
->dtl_addr
));
713 reg
.id
= KVM_REG_PPC_VPA_ADDR
;
714 reg
.addr
= (uintptr_t)&env
->vpa_addr
;
715 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
717 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno
));
722 assert((uintptr_t)&env
->slb_shadow_size
723 == ((uintptr_t)&env
->slb_shadow_addr
+ 8));
724 reg
.id
= KVM_REG_PPC_VPA_SLB
;
725 reg
.addr
= (uintptr_t)&env
->slb_shadow_addr
;
726 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
728 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno
));
732 assert((uintptr_t)&env
->dtl_size
== ((uintptr_t)&env
->dtl_addr
+ 8));
733 reg
.id
= KVM_REG_PPC_VPA_DTL
;
734 reg
.addr
= (uintptr_t)&env
->dtl_addr
;
735 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
737 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
742 if (!env
->vpa_addr
) {
743 reg
.id
= KVM_REG_PPC_VPA_ADDR
;
744 reg
.addr
= (uintptr_t)&env
->vpa_addr
;
745 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
747 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno
));
754 #endif /* TARGET_PPC64 */
756 int kvm_arch_put_registers(CPUState
*cs
, int level
)
758 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
759 CPUPPCState
*env
= &cpu
->env
;
760 struct kvm_regs regs
;
764 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_REGS
, ®s
);
771 regs
.xer
= cpu_read_xer(env
);
775 regs
.srr0
= env
->spr
[SPR_SRR0
];
776 regs
.srr1
= env
->spr
[SPR_SRR1
];
778 regs
.sprg0
= env
->spr
[SPR_SPRG0
];
779 regs
.sprg1
= env
->spr
[SPR_SPRG1
];
780 regs
.sprg2
= env
->spr
[SPR_SPRG2
];
781 regs
.sprg3
= env
->spr
[SPR_SPRG3
];
782 regs
.sprg4
= env
->spr
[SPR_SPRG4
];
783 regs
.sprg5
= env
->spr
[SPR_SPRG5
];
784 regs
.sprg6
= env
->spr
[SPR_SPRG6
];
785 regs
.sprg7
= env
->spr
[SPR_SPRG7
];
787 regs
.pid
= env
->spr
[SPR_BOOKE_PID
];
789 for (i
= 0;i
< 32; i
++)
790 regs
.gpr
[i
] = env
->gpr
[i
];
793 for (i
= 0; i
< 8; i
++) {
794 regs
.cr
|= (env
->crf
[i
] & 15) << (4 * (7 - i
));
797 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_REGS
, ®s
);
803 if (env
->tlb_dirty
) {
805 env
->tlb_dirty
= false;
808 if (cap_segstate
&& (level
>= KVM_PUT_RESET_STATE
)) {
809 struct kvm_sregs sregs
;
811 sregs
.pvr
= env
->spr
[SPR_PVR
];
813 sregs
.u
.s
.sdr1
= env
->spr
[SPR_SDR1
];
817 for (i
= 0; i
< ARRAY_SIZE(env
->slb
); i
++) {
818 sregs
.u
.s
.ppc64
.slb
[i
].slbe
= env
->slb
[i
].esid
;
819 if (env
->slb
[i
].esid
& SLB_ESID_V
) {
820 sregs
.u
.s
.ppc64
.slb
[i
].slbe
|= i
;
822 sregs
.u
.s
.ppc64
.slb
[i
].slbv
= env
->slb
[i
].vsid
;
827 for (i
= 0; i
< 16; i
++) {
828 sregs
.u
.s
.ppc32
.sr
[i
] = env
->sr
[i
];
832 for (i
= 0; i
< 8; i
++) {
833 /* Beware. We have to swap upper and lower bits here */
834 sregs
.u
.s
.ppc32
.dbat
[i
] = ((uint64_t)env
->DBAT
[0][i
] << 32)
836 sregs
.u
.s
.ppc32
.ibat
[i
] = ((uint64_t)env
->IBAT
[0][i
] << 32)
840 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_SREGS
, &sregs
);
846 if (cap_hior
&& (level
>= KVM_PUT_RESET_STATE
)) {
847 kvm_put_one_spr(cs
, KVM_REG_PPC_HIOR
, SPR_HIOR
);
853 /* We deliberately ignore errors here, for kernels which have
854 * the ONE_REG calls, but don't support the specific
855 * registers, there's a reasonable chance things will still
856 * work, at least until we try to migrate. */
857 for (i
= 0; i
< 1024; i
++) {
858 uint64_t id
= env
->spr_cb
[i
].one_reg_id
;
861 kvm_put_one_spr(cs
, id
, i
);
867 if (kvm_put_vpa(cs
) < 0) {
868 DPRINTF("Warning: Unable to set VPA information to KVM\n");
872 kvm_set_one_reg(cs
, KVM_REG_PPC_TB_OFFSET
, &env
->tb_env
->tb_offset
);
873 #endif /* TARGET_PPC64 */
879 int kvm_arch_get_registers(CPUState
*cs
)
881 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
882 CPUPPCState
*env
= &cpu
->env
;
883 struct kvm_regs regs
;
884 struct kvm_sregs sregs
;
888 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_REGS
, ®s
);
893 for (i
= 7; i
>= 0; i
--) {
894 env
->crf
[i
] = cr
& 15;
900 cpu_write_xer(env
, regs
.xer
);
904 env
->spr
[SPR_SRR0
] = regs
.srr0
;
905 env
->spr
[SPR_SRR1
] = regs
.srr1
;
907 env
->spr
[SPR_SPRG0
] = regs
.sprg0
;
908 env
->spr
[SPR_SPRG1
] = regs
.sprg1
;
909 env
->spr
[SPR_SPRG2
] = regs
.sprg2
;
910 env
->spr
[SPR_SPRG3
] = regs
.sprg3
;
911 env
->spr
[SPR_SPRG4
] = regs
.sprg4
;
912 env
->spr
[SPR_SPRG5
] = regs
.sprg5
;
913 env
->spr
[SPR_SPRG6
] = regs
.sprg6
;
914 env
->spr
[SPR_SPRG7
] = regs
.sprg7
;
916 env
->spr
[SPR_BOOKE_PID
] = regs
.pid
;
918 for (i
= 0;i
< 32; i
++)
919 env
->gpr
[i
] = regs
.gpr
[i
];
923 if (cap_booke_sregs
) {
924 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_SREGS
, &sregs
);
929 if (sregs
.u
.e
.features
& KVM_SREGS_E_BASE
) {
930 env
->spr
[SPR_BOOKE_CSRR0
] = sregs
.u
.e
.csrr0
;
931 env
->spr
[SPR_BOOKE_CSRR1
] = sregs
.u
.e
.csrr1
;
932 env
->spr
[SPR_BOOKE_ESR
] = sregs
.u
.e
.esr
;
933 env
->spr
[SPR_BOOKE_DEAR
] = sregs
.u
.e
.dear
;
934 env
->spr
[SPR_BOOKE_MCSR
] = sregs
.u
.e
.mcsr
;
935 env
->spr
[SPR_BOOKE_TSR
] = sregs
.u
.e
.tsr
;
936 env
->spr
[SPR_BOOKE_TCR
] = sregs
.u
.e
.tcr
;
937 env
->spr
[SPR_DECR
] = sregs
.u
.e
.dec
;
938 env
->spr
[SPR_TBL
] = sregs
.u
.e
.tb
& 0xffffffff;
939 env
->spr
[SPR_TBU
] = sregs
.u
.e
.tb
>> 32;
940 env
->spr
[SPR_VRSAVE
] = sregs
.u
.e
.vrsave
;
943 if (sregs
.u
.e
.features
& KVM_SREGS_E_ARCH206
) {
944 env
->spr
[SPR_BOOKE_PIR
] = sregs
.u
.e
.pir
;
945 env
->spr
[SPR_BOOKE_MCSRR0
] = sregs
.u
.e
.mcsrr0
;
946 env
->spr
[SPR_BOOKE_MCSRR1
] = sregs
.u
.e
.mcsrr1
;
947 env
->spr
[SPR_BOOKE_DECAR
] = sregs
.u
.e
.decar
;
948 env
->spr
[SPR_BOOKE_IVPR
] = sregs
.u
.e
.ivpr
;
951 if (sregs
.u
.e
.features
& KVM_SREGS_E_64
) {
952 env
->spr
[SPR_BOOKE_EPCR
] = sregs
.u
.e
.epcr
;
955 if (sregs
.u
.e
.features
& KVM_SREGS_E_SPRG8
) {
956 env
->spr
[SPR_BOOKE_SPRG8
] = sregs
.u
.e
.sprg8
;
959 if (sregs
.u
.e
.features
& KVM_SREGS_E_IVOR
) {
960 env
->spr
[SPR_BOOKE_IVOR0
] = sregs
.u
.e
.ivor_low
[0];
961 env
->spr
[SPR_BOOKE_IVOR1
] = sregs
.u
.e
.ivor_low
[1];
962 env
->spr
[SPR_BOOKE_IVOR2
] = sregs
.u
.e
.ivor_low
[2];
963 env
->spr
[SPR_BOOKE_IVOR3
] = sregs
.u
.e
.ivor_low
[3];
964 env
->spr
[SPR_BOOKE_IVOR4
] = sregs
.u
.e
.ivor_low
[4];
965 env
->spr
[SPR_BOOKE_IVOR5
] = sregs
.u
.e
.ivor_low
[5];
966 env
->spr
[SPR_BOOKE_IVOR6
] = sregs
.u
.e
.ivor_low
[6];
967 env
->spr
[SPR_BOOKE_IVOR7
] = sregs
.u
.e
.ivor_low
[7];
968 env
->spr
[SPR_BOOKE_IVOR8
] = sregs
.u
.e
.ivor_low
[8];
969 env
->spr
[SPR_BOOKE_IVOR9
] = sregs
.u
.e
.ivor_low
[9];
970 env
->spr
[SPR_BOOKE_IVOR10
] = sregs
.u
.e
.ivor_low
[10];
971 env
->spr
[SPR_BOOKE_IVOR11
] = sregs
.u
.e
.ivor_low
[11];
972 env
->spr
[SPR_BOOKE_IVOR12
] = sregs
.u
.e
.ivor_low
[12];
973 env
->spr
[SPR_BOOKE_IVOR13
] = sregs
.u
.e
.ivor_low
[13];
974 env
->spr
[SPR_BOOKE_IVOR14
] = sregs
.u
.e
.ivor_low
[14];
975 env
->spr
[SPR_BOOKE_IVOR15
] = sregs
.u
.e
.ivor_low
[15];
977 if (sregs
.u
.e
.features
& KVM_SREGS_E_SPE
) {
978 env
->spr
[SPR_BOOKE_IVOR32
] = sregs
.u
.e
.ivor_high
[0];
979 env
->spr
[SPR_BOOKE_IVOR33
] = sregs
.u
.e
.ivor_high
[1];
980 env
->spr
[SPR_BOOKE_IVOR34
] = sregs
.u
.e
.ivor_high
[2];
983 if (sregs
.u
.e
.features
& KVM_SREGS_E_PM
) {
984 env
->spr
[SPR_BOOKE_IVOR35
] = sregs
.u
.e
.ivor_high
[3];
987 if (sregs
.u
.e
.features
& KVM_SREGS_E_PC
) {
988 env
->spr
[SPR_BOOKE_IVOR36
] = sregs
.u
.e
.ivor_high
[4];
989 env
->spr
[SPR_BOOKE_IVOR37
] = sregs
.u
.e
.ivor_high
[5];
993 if (sregs
.u
.e
.features
& KVM_SREGS_E_ARCH206_MMU
) {
994 env
->spr
[SPR_BOOKE_MAS0
] = sregs
.u
.e
.mas0
;
995 env
->spr
[SPR_BOOKE_MAS1
] = sregs
.u
.e
.mas1
;
996 env
->spr
[SPR_BOOKE_MAS2
] = sregs
.u
.e
.mas2
;
997 env
->spr
[SPR_BOOKE_MAS3
] = sregs
.u
.e
.mas7_3
& 0xffffffff;
998 env
->spr
[SPR_BOOKE_MAS4
] = sregs
.u
.e
.mas4
;
999 env
->spr
[SPR_BOOKE_MAS6
] = sregs
.u
.e
.mas6
;
1000 env
->spr
[SPR_BOOKE_MAS7
] = sregs
.u
.e
.mas7_3
>> 32;
1001 env
->spr
[SPR_MMUCFG
] = sregs
.u
.e
.mmucfg
;
1002 env
->spr
[SPR_BOOKE_TLB0CFG
] = sregs
.u
.e
.tlbcfg
[0];
1003 env
->spr
[SPR_BOOKE_TLB1CFG
] = sregs
.u
.e
.tlbcfg
[1];
1006 if (sregs
.u
.e
.features
& KVM_SREGS_EXP
) {
1007 env
->spr
[SPR_BOOKE_EPR
] = sregs
.u
.e
.epr
;
1010 if (sregs
.u
.e
.features
& KVM_SREGS_E_PD
) {
1011 env
->spr
[SPR_BOOKE_EPLC
] = sregs
.u
.e
.eplc
;
1012 env
->spr
[SPR_BOOKE_EPSC
] = sregs
.u
.e
.epsc
;
1015 if (sregs
.u
.e
.impl_id
== KVM_SREGS_E_IMPL_FSL
) {
1016 env
->spr
[SPR_E500_SVR
] = sregs
.u
.e
.impl
.fsl
.svr
;
1017 env
->spr
[SPR_Exxx_MCAR
] = sregs
.u
.e
.impl
.fsl
.mcar
;
1018 env
->spr
[SPR_HID0
] = sregs
.u
.e
.impl
.fsl
.hid0
;
1020 if (sregs
.u
.e
.impl
.fsl
.features
& KVM_SREGS_E_FSL_PIDn
) {
1021 env
->spr
[SPR_BOOKE_PID1
] = sregs
.u
.e
.impl
.fsl
.pid1
;
1022 env
->spr
[SPR_BOOKE_PID2
] = sregs
.u
.e
.impl
.fsl
.pid2
;
1028 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_SREGS
, &sregs
);
1033 if (!env
->external_htab
) {
1034 ppc_store_sdr1(env
, sregs
.u
.s
.sdr1
);
1040 * The packed SLB array we get from KVM_GET_SREGS only contains
1041 * information about valid entries. So we flush our internal
1042 * copy to get rid of stale ones, then put all valid SLB entries
1045 memset(env
->slb
, 0, sizeof(env
->slb
));
1046 for (i
= 0; i
< ARRAY_SIZE(env
->slb
); i
++) {
1047 target_ulong rb
= sregs
.u
.s
.ppc64
.slb
[i
].slbe
;
1048 target_ulong rs
= sregs
.u
.s
.ppc64
.slb
[i
].slbv
;
1050 * Only restore valid entries
1052 if (rb
& SLB_ESID_V
) {
1053 ppc_store_slb(env
, rb
, rs
);
1059 for (i
= 0; i
< 16; i
++) {
1060 env
->sr
[i
] = sregs
.u
.s
.ppc32
.sr
[i
];
1064 for (i
= 0; i
< 8; i
++) {
1065 env
->DBAT
[0][i
] = sregs
.u
.s
.ppc32
.dbat
[i
] & 0xffffffff;
1066 env
->DBAT
[1][i
] = sregs
.u
.s
.ppc32
.dbat
[i
] >> 32;
1067 env
->IBAT
[0][i
] = sregs
.u
.s
.ppc32
.ibat
[i
] & 0xffffffff;
1068 env
->IBAT
[1][i
] = sregs
.u
.s
.ppc32
.ibat
[i
] >> 32;
1073 kvm_get_one_spr(cs
, KVM_REG_PPC_HIOR
, SPR_HIOR
);
1079 /* We deliberately ignore errors here, for kernels which have
1080 * the ONE_REG calls, but don't support the specific
1081 * registers, there's a reasonable chance things will still
1082 * work, at least until we try to migrate. */
1083 for (i
= 0; i
< 1024; i
++) {
1084 uint64_t id
= env
->spr_cb
[i
].one_reg_id
;
1087 kvm_get_one_spr(cs
, id
, i
);
1093 if (kvm_get_vpa(cs
) < 0) {
1094 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1098 kvm_get_one_reg(cs
, KVM_REG_PPC_TB_OFFSET
, &env
->tb_env
->tb_offset
);
1105 int kvmppc_set_interrupt(PowerPCCPU
*cpu
, int irq
, int level
)
1107 unsigned virq
= level
? KVM_INTERRUPT_SET_LEVEL
: KVM_INTERRUPT_UNSET
;
1109 if (irq
!= PPC_INTERRUPT_EXT
) {
1113 if (!kvm_enabled() || !cap_interrupt_unset
|| !cap_interrupt_level
) {
1117 kvm_vcpu_ioctl(CPU(cpu
), KVM_INTERRUPT
, &virq
);
1122 #if defined(TARGET_PPCEMB)
1123 #define PPC_INPUT_INT PPC40x_INPUT_INT
1124 #elif defined(TARGET_PPC64)
1125 #define PPC_INPUT_INT PPC970_INPUT_INT
1127 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1130 void kvm_arch_pre_run(CPUState
*cs
, struct kvm_run
*run
)
1132 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
1133 CPUPPCState
*env
= &cpu
->env
;
1137 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1138 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1139 if (!cap_interrupt_level
&&
1140 run
->ready_for_interrupt_injection
&&
1141 (cs
->interrupt_request
& CPU_INTERRUPT_HARD
) &&
1142 (env
->irq_input_state
& (1<<PPC_INPUT_INT
)))
1144 /* For now KVM disregards the 'irq' argument. However, in the
1145 * future KVM could cache it in-kernel to avoid a heavyweight exit
1146 * when reading the UIC.
1148 irq
= KVM_INTERRUPT_SET
;
1150 DPRINTF("injected interrupt %d\n", irq
);
1151 r
= kvm_vcpu_ioctl(cs
, KVM_INTERRUPT
, &irq
);
1153 printf("cpu %d fail inject %x\n", cs
->cpu_index
, irq
);
1156 /* Always wake up soon in case the interrupt was level based */
1157 timer_mod(idle_timer
, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) +
1158 (get_ticks_per_sec() / 50));
1161 /* We don't know if there are more interrupts pending after this. However,
1162 * the guest will return to userspace in the course of handling this one
1163 * anyways, so we will get a chance to deliver the rest. */
1166 void kvm_arch_post_run(CPUState
*cpu
, struct kvm_run
*run
)
1170 int kvm_arch_process_async_events(CPUState
*cs
)
1175 static int kvmppc_handle_halt(PowerPCCPU
*cpu
)
1177 CPUState
*cs
= CPU(cpu
);
1178 CPUPPCState
*env
= &cpu
->env
;
1180 if (!(cs
->interrupt_request
& CPU_INTERRUPT_HARD
) && (msr_ee
)) {
1182 cs
->exception_index
= EXCP_HLT
;
1188 /* map dcr access to existing qemu dcr emulation */
1189 static int kvmppc_handle_dcr_read(CPUPPCState
*env
, uint32_t dcrn
, uint32_t *data
)
1191 if (ppc_dcr_read(env
->dcr_env
, dcrn
, data
) < 0)
1192 fprintf(stderr
, "Read to unhandled DCR (0x%x)\n", dcrn
);
1197 static int kvmppc_handle_dcr_write(CPUPPCState
*env
, uint32_t dcrn
, uint32_t data
)
1199 if (ppc_dcr_write(env
->dcr_env
, dcrn
, data
) < 0)
1200 fprintf(stderr
, "Write to unhandled DCR (0x%x)\n", dcrn
);
1205 int kvm_arch_handle_exit(CPUState
*cs
, struct kvm_run
*run
)
1207 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
1208 CPUPPCState
*env
= &cpu
->env
;
1211 switch (run
->exit_reason
) {
1213 if (run
->dcr
.is_write
) {
1214 DPRINTF("handle dcr write\n");
1215 ret
= kvmppc_handle_dcr_write(env
, run
->dcr
.dcrn
, run
->dcr
.data
);
1217 DPRINTF("handle dcr read\n");
1218 ret
= kvmppc_handle_dcr_read(env
, run
->dcr
.dcrn
, &run
->dcr
.data
);
1222 DPRINTF("handle halt\n");
1223 ret
= kvmppc_handle_halt(cpu
);
1225 #if defined(TARGET_PPC64)
1226 case KVM_EXIT_PAPR_HCALL
:
1227 DPRINTF("handle PAPR hypercall\n");
1228 run
->papr_hcall
.ret
= spapr_hypercall(cpu
,
1230 run
->papr_hcall
.args
);
1235 DPRINTF("handle epr\n");
1236 run
->epr
.epr
= ldl_phys(cs
->as
, env
->mpic_iack
);
1239 case KVM_EXIT_WATCHDOG
:
1240 DPRINTF("handle watchdog expiry\n");
1241 watchdog_perform_action();
1246 fprintf(stderr
, "KVM: unknown exit reason %d\n", run
->exit_reason
);
1254 int kvmppc_or_tsr_bits(PowerPCCPU
*cpu
, uint32_t tsr_bits
)
1256 CPUState
*cs
= CPU(cpu
);
1257 uint32_t bits
= tsr_bits
;
1258 struct kvm_one_reg reg
= {
1259 .id
= KVM_REG_PPC_OR_TSR
,
1260 .addr
= (uintptr_t) &bits
,
1263 return kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
1266 int kvmppc_clear_tsr_bits(PowerPCCPU
*cpu
, uint32_t tsr_bits
)
1269 CPUState
*cs
= CPU(cpu
);
1270 uint32_t bits
= tsr_bits
;
1271 struct kvm_one_reg reg
= {
1272 .id
= KVM_REG_PPC_CLEAR_TSR
,
1273 .addr
= (uintptr_t) &bits
,
1276 return kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
1279 int kvmppc_set_tcr(PowerPCCPU
*cpu
)
1281 CPUState
*cs
= CPU(cpu
);
1282 CPUPPCState
*env
= &cpu
->env
;
1283 uint32_t tcr
= env
->spr
[SPR_BOOKE_TCR
];
1285 struct kvm_one_reg reg
= {
1286 .id
= KVM_REG_PPC_TCR
,
1287 .addr
= (uintptr_t) &tcr
,
1290 return kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
1293 int kvmppc_booke_watchdog_enable(PowerPCCPU
*cpu
)
1295 CPUState
*cs
= CPU(cpu
);
1298 if (!kvm_enabled()) {
1302 if (!cap_ppc_watchdog
) {
1303 printf("warning: KVM does not support watchdog");
1307 ret
= kvm_vcpu_enable_cap(cs
, KVM_CAP_PPC_BOOKE_WATCHDOG
, 0);
1309 fprintf(stderr
, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1310 __func__
, strerror(-ret
));
1317 static int read_cpuinfo(const char *field
, char *value
, int len
)
1321 int field_len
= strlen(field
);
1324 f
= fopen("/proc/cpuinfo", "r");
1330 if(!fgets(line
, sizeof(line
), f
)) {
1333 if (!strncmp(line
, field
, field_len
)) {
1334 pstrcpy(value
, len
, line
);
1345 uint32_t kvmppc_get_tbfreq(void)
1349 uint32_t retval
= get_ticks_per_sec();
1351 if (read_cpuinfo("timebase", line
, sizeof(line
))) {
1355 if (!(ns
= strchr(line
, ':'))) {
1365 /* Try to find a device tree node for a CPU with clock-frequency property */
1366 static int kvmppc_find_cpu_dt(char *buf
, int buf_len
)
1368 struct dirent
*dirp
;
1371 if ((dp
= opendir(PROC_DEVTREE_CPU
)) == NULL
) {
1372 printf("Can't open directory " PROC_DEVTREE_CPU
"\n");
1377 while ((dirp
= readdir(dp
)) != NULL
) {
1379 snprintf(buf
, buf_len
, "%s%s/clock-frequency", PROC_DEVTREE_CPU
,
1381 f
= fopen(buf
, "r");
1383 snprintf(buf
, buf_len
, "%s%s", PROC_DEVTREE_CPU
, dirp
->d_name
);
1390 if (buf
[0] == '\0') {
1391 printf("Unknown host!\n");
1398 /* Read a CPU node property from the host device tree that's a single
1399 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1400 * (can't find or open the property, or doesn't understand the
1402 static uint64_t kvmppc_read_int_cpu_dt(const char *propname
)
1412 if (kvmppc_find_cpu_dt(buf
, sizeof(buf
))) {
1416 strncat(buf
, "/", sizeof(buf
) - strlen(buf
));
1417 strncat(buf
, propname
, sizeof(buf
) - strlen(buf
));
1419 f
= fopen(buf
, "rb");
1424 len
= fread(&u
, 1, sizeof(u
), f
);
1428 /* property is a 32-bit quantity */
1429 return be32_to_cpu(u
.v32
);
1431 return be64_to_cpu(u
.v64
);
1437 uint64_t kvmppc_get_clockfreq(void)
1439 return kvmppc_read_int_cpu_dt("clock-frequency");
1442 uint32_t kvmppc_get_vmx(void)
1444 return kvmppc_read_int_cpu_dt("ibm,vmx");
1447 uint32_t kvmppc_get_dfp(void)
1449 return kvmppc_read_int_cpu_dt("ibm,dfp");
1452 static int kvmppc_get_pvinfo(CPUPPCState
*env
, struct kvm_ppc_pvinfo
*pvinfo
)
1454 PowerPCCPU
*cpu
= ppc_env_get_cpu(env
);
1455 CPUState
*cs
= CPU(cpu
);
1457 if (kvm_check_extension(cs
->kvm_state
, KVM_CAP_PPC_GET_PVINFO
) &&
1458 !kvm_vm_ioctl(cs
->kvm_state
, KVM_PPC_GET_PVINFO
, pvinfo
)) {
1465 int kvmppc_get_hasidle(CPUPPCState
*env
)
1467 struct kvm_ppc_pvinfo pvinfo
;
1469 if (!kvmppc_get_pvinfo(env
, &pvinfo
) &&
1470 (pvinfo
.flags
& KVM_PPC_PVINFO_FLAGS_EV_IDLE
)) {
1477 int kvmppc_get_hypercall(CPUPPCState
*env
, uint8_t *buf
, int buf_len
)
1479 uint32_t *hc
= (uint32_t*)buf
;
1480 struct kvm_ppc_pvinfo pvinfo
;
1482 if (!kvmppc_get_pvinfo(env
, &pvinfo
)) {
1483 memcpy(buf
, pvinfo
.hcall
, buf_len
);
1488 * Fallback to always fail hypercalls:
1504 void kvmppc_set_papr(PowerPCCPU
*cpu
)
1506 CPUState
*cs
= CPU(cpu
);
1509 ret
= kvm_vcpu_enable_cap(cs
, KVM_CAP_PPC_PAPR
, 0);
1511 cpu_abort(cs
, "This KVM version does not support PAPR\n");
1514 /* Update the capability flag so we sync the right information
1519 int kvmppc_set_compat(PowerPCCPU
*cpu
, uint32_t cpu_version
)
1521 return kvm_set_one_reg(CPU(cpu
), KVM_REG_PPC_ARCH_COMPAT
, &cpu_version
);
1524 void kvmppc_set_mpic_proxy(PowerPCCPU
*cpu
, int mpic_proxy
)
1526 CPUState
*cs
= CPU(cpu
);
1529 ret
= kvm_vcpu_enable_cap(cs
, KVM_CAP_PPC_EPR
, 0, mpic_proxy
);
1530 if (ret
&& mpic_proxy
) {
1531 cpu_abort(cs
, "This KVM version does not support EPR\n");
1535 int kvmppc_smt_threads(void)
1537 return cap_ppc_smt
? cap_ppc_smt
: 1;
1541 off_t
kvmppc_alloc_rma(const char *name
, MemoryRegion
*sysmem
)
1546 struct kvm_allocate_rma ret
;
1547 MemoryRegion
*rma_region
;
1549 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
1550 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
1551 * not necessary on this hardware
1552 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
1554 * FIXME: We should allow the user to force contiguous RMA
1555 * allocation in the cap_ppc_rma==1 case.
1557 if (cap_ppc_rma
< 2) {
1561 fd
= kvm_vm_ioctl(kvm_state
, KVM_ALLOCATE_RMA
, &ret
);
1563 fprintf(stderr
, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
1568 size
= MIN(ret
.rma_size
, 256ul << 20);
1570 rma
= mmap(NULL
, size
, PROT_READ
|PROT_WRITE
, MAP_SHARED
, fd
, 0);
1571 if (rma
== MAP_FAILED
) {
1572 fprintf(stderr
, "KVM: Error mapping RMA: %s\n", strerror(errno
));
1576 rma_region
= g_new(MemoryRegion
, 1);
1577 memory_region_init_ram_ptr(rma_region
, NULL
, name
, size
, rma
);
1578 vmstate_register_ram_global(rma_region
);
1579 memory_region_add_subregion(sysmem
, 0, rma_region
);
1584 uint64_t kvmppc_rma_size(uint64_t current_size
, unsigned int hash_shift
)
1586 struct kvm_ppc_smmu_info info
;
1587 long rampagesize
, best_page_shift
;
1590 if (cap_ppc_rma
>= 2) {
1591 return current_size
;
1594 /* Find the largest hardware supported page size that's less than
1595 * or equal to the (logical) backing page size of guest RAM */
1596 kvm_get_smmu_info(POWERPC_CPU(first_cpu
), &info
);
1597 rampagesize
= getrampagesize();
1598 best_page_shift
= 0;
1600 for (i
= 0; i
< KVM_PPC_PAGE_SIZES_MAX_SZ
; i
++) {
1601 struct kvm_ppc_one_seg_page_size
*sps
= &info
.sps
[i
];
1603 if (!sps
->page_shift
) {
1607 if ((sps
->page_shift
> best_page_shift
)
1608 && ((1UL << sps
->page_shift
) <= rampagesize
)) {
1609 best_page_shift
= sps
->page_shift
;
1613 return MIN(current_size
,
1614 1ULL << (best_page_shift
+ hash_shift
- 7));
1618 bool kvmppc_spapr_use_multitce(void)
1620 return cap_spapr_multitce
;
1623 void *kvmppc_create_spapr_tce(uint32_t liobn
, uint32_t window_size
, int *pfd
)
1625 struct kvm_create_spapr_tce args
= {
1627 .window_size
= window_size
,
1633 /* Must set fd to -1 so we don't try to munmap when called for
1634 * destroying the table, which the upper layers -will- do
1637 if (!cap_spapr_tce
) {
1641 fd
= kvm_vm_ioctl(kvm_state
, KVM_CREATE_SPAPR_TCE
, &args
);
1643 fprintf(stderr
, "KVM: Failed to create TCE table for liobn 0x%x\n",
1648 len
= (window_size
/ SPAPR_TCE_PAGE_SIZE
) * sizeof(uint64_t);
1649 /* FIXME: round this up to page size */
1651 table
= mmap(NULL
, len
, PROT_READ
|PROT_WRITE
, MAP_SHARED
, fd
, 0);
1652 if (table
== MAP_FAILED
) {
1653 fprintf(stderr
, "KVM: Failed to map TCE table for liobn 0x%x\n",
1663 int kvmppc_remove_spapr_tce(void *table
, int fd
, uint32_t nb_table
)
1671 len
= nb_table
* sizeof(uint64_t);
1672 if ((munmap(table
, len
) < 0) ||
1674 fprintf(stderr
, "KVM: Unexpected error removing TCE table: %s",
1676 /* Leak the table */
1682 int kvmppc_reset_htab(int shift_hint
)
1684 uint32_t shift
= shift_hint
;
1686 if (!kvm_enabled()) {
1687 /* Full emulation, tell caller to allocate htab itself */
1690 if (kvm_check_extension(kvm_state
, KVM_CAP_PPC_ALLOC_HTAB
)) {
1692 ret
= kvm_vm_ioctl(kvm_state
, KVM_PPC_ALLOCATE_HTAB
, &shift
);
1693 if (ret
== -ENOTTY
) {
1694 /* At least some versions of PR KVM advertise the
1695 * capability, but don't implement the ioctl(). Oops.
1696 * Return 0 so that we allocate the htab in qemu, as is
1697 * correct for PR. */
1699 } else if (ret
< 0) {
1705 /* We have a kernel that predates the htab reset calls. For PR
1706 * KVM, we need to allocate the htab ourselves, for an HV KVM of
1707 * this era, it has allocated a 16MB fixed size hash table
1708 * already. Kernels of this era have the GET_PVINFO capability
1709 * only on PR, so we use this hack to determine the right
1711 if (kvm_check_extension(kvm_state
, KVM_CAP_PPC_GET_PVINFO
)) {
1712 /* PR - tell caller to allocate htab */
1715 /* HV - assume 16MB kernel allocated htab */
1720 static inline uint32_t mfpvr(void)
1729 static void alter_insns(uint64_t *word
, uint64_t flags
, bool on
)
1738 static void kvmppc_host_cpu_initfn(Object
*obj
)
1740 assert(kvm_enabled());
1743 static void kvmppc_host_cpu_class_init(ObjectClass
*oc
, void *data
)
1745 PowerPCCPUClass
*pcc
= POWERPC_CPU_CLASS(oc
);
1746 uint32_t vmx
= kvmppc_get_vmx();
1747 uint32_t dfp
= kvmppc_get_dfp();
1748 uint32_t dcache_size
= kvmppc_read_int_cpu_dt("d-cache-size");
1749 uint32_t icache_size
= kvmppc_read_int_cpu_dt("i-cache-size");
1751 /* Now fix up the class with information we can query from the host */
1755 /* Only override when we know what the host supports */
1756 alter_insns(&pcc
->insns_flags
, PPC_ALTIVEC
, vmx
> 0);
1757 alter_insns(&pcc
->insns_flags2
, PPC2_VSX
, vmx
> 1);
1760 /* Only override when we know what the host supports */
1761 alter_insns(&pcc
->insns_flags2
, PPC2_DFP
, dfp
);
1764 if (dcache_size
!= -1) {
1765 pcc
->l1_dcache_size
= dcache_size
;
1768 if (icache_size
!= -1) {
1769 pcc
->l1_icache_size
= icache_size
;
1773 bool kvmppc_has_cap_epr(void)
1778 bool kvmppc_has_cap_htab_fd(void)
1783 static PowerPCCPUClass
*ppc_cpu_get_family_class(PowerPCCPUClass
*pcc
)
1785 ObjectClass
*oc
= OBJECT_CLASS(pcc
);
1787 while (oc
&& !object_class_is_abstract(oc
)) {
1788 oc
= object_class_get_parent(oc
);
1792 return POWERPC_CPU_CLASS(oc
);
1795 static int kvm_ppc_register_host_cpu_type(void)
1797 TypeInfo type_info
= {
1798 .name
= TYPE_HOST_POWERPC_CPU
,
1799 .instance_init
= kvmppc_host_cpu_initfn
,
1800 .class_init
= kvmppc_host_cpu_class_init
,
1802 uint32_t host_pvr
= mfpvr();
1803 PowerPCCPUClass
*pvr_pcc
;
1806 pvr_pcc
= ppc_cpu_class_by_pvr(host_pvr
);
1807 if (pvr_pcc
== NULL
) {
1808 pvr_pcc
= ppc_cpu_class_by_pvr_mask(host_pvr
);
1810 if (pvr_pcc
== NULL
) {
1813 type_info
.parent
= object_class_get_name(OBJECT_CLASS(pvr_pcc
));
1814 type_register(&type_info
);
1816 /* Register generic family CPU class for a family */
1817 pvr_pcc
= ppc_cpu_get_family_class(pvr_pcc
);
1818 dc
= DEVICE_CLASS(pvr_pcc
);
1819 type_info
.parent
= object_class_get_name(OBJECT_CLASS(pvr_pcc
));
1820 type_info
.name
= g_strdup_printf("%s-"TYPE_POWERPC_CPU
, dc
->desc
);
1821 type_register(&type_info
);
1826 int kvmppc_define_rtas_kernel_token(uint32_t token
, const char *function
)
1828 struct kvm_rtas_token_args args
= {
1832 if (!kvm_check_extension(kvm_state
, KVM_CAP_PPC_RTAS
)) {
1836 strncpy(args
.name
, function
, sizeof(args
.name
));
1838 return kvm_vm_ioctl(kvm_state
, KVM_PPC_RTAS_DEFINE_TOKEN
, &args
);
1841 int kvmppc_get_htab_fd(bool write
)
1843 struct kvm_get_htab_fd s
= {
1844 .flags
= write
? KVM_GET_HTAB_WRITE
: 0,
1849 fprintf(stderr
, "KVM version doesn't support saving the hash table\n");
1853 return kvm_vm_ioctl(kvm_state
, KVM_PPC_GET_HTAB_FD
, &s
);
1856 int kvmppc_save_htab(QEMUFile
*f
, int fd
, size_t bufsize
, int64_t max_ns
)
1858 int64_t starttime
= qemu_clock_get_ns(QEMU_CLOCK_REALTIME
);
1859 uint8_t buf
[bufsize
];
1863 rc
= read(fd
, buf
, bufsize
);
1865 fprintf(stderr
, "Error reading data from KVM HTAB fd: %s\n",
1869 /* Kernel already retuns data in BE format for the file */
1870 qemu_put_buffer(f
, buf
, rc
);
1874 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME
) - starttime
) < max_ns
)));
1876 return (rc
== 0) ? 1 : 0;
1879 int kvmppc_load_htab_chunk(QEMUFile
*f
, int fd
, uint32_t index
,
1880 uint16_t n_valid
, uint16_t n_invalid
)
1882 struct kvm_get_htab_header
*buf
;
1883 size_t chunksize
= sizeof(*buf
) + n_valid
*HASH_PTE_SIZE_64
;
1886 buf
= alloca(chunksize
);
1887 /* This is KVM on ppc, so this is all big-endian */
1889 buf
->n_valid
= n_valid
;
1890 buf
->n_invalid
= n_invalid
;
1892 qemu_get_buffer(f
, (void *)(buf
+ 1), HASH_PTE_SIZE_64
*n_valid
);
1894 rc
= write(fd
, buf
, chunksize
);
1896 fprintf(stderr
, "Error writing KVM hash table: %s\n",
1900 if (rc
!= chunksize
) {
1901 /* We should never get a short write on a single chunk */
1902 fprintf(stderr
, "Short write, restoring KVM hash table\n");
1908 bool kvm_arch_stop_on_emulation_error(CPUState
*cpu
)
1913 int kvm_arch_on_sigbus_vcpu(CPUState
*cpu
, int code
, void *addr
)
1918 int kvm_arch_on_sigbus(int code
, void *addr
)
1923 void kvm_arch_init_irq_routing(KVMState
*s
)
1927 int kvm_arch_insert_sw_breakpoint(CPUState
*cpu
, struct kvm_sw_breakpoint
*bp
)
1932 int kvm_arch_remove_sw_breakpoint(CPUState
*cpu
, struct kvm_sw_breakpoint
*bp
)
1937 int kvm_arch_insert_hw_breakpoint(target_ulong addr
, target_ulong len
, int type
)
1942 int kvm_arch_remove_hw_breakpoint(target_ulong addr
, target_ulong len
, int type
)
1947 void kvm_arch_remove_all_hw_breakpoints(void)
1951 void kvm_arch_update_guest_debug(CPUState
*cpu
, struct kvm_guest_debug
*dbg
)
1955 struct kvm_get_htab_buf
{
1956 struct kvm_get_htab_header header
;
1958 * We require one extra byte for read
1960 target_ulong hpte
[(HPTES_PER_GROUP
* 2) + 1];
1963 uint64_t kvmppc_hash64_read_pteg(PowerPCCPU
*cpu
, target_ulong pte_index
)
1966 struct kvm_get_htab_fd ghf
;
1967 struct kvm_get_htab_buf
*hpte_buf
;
1970 ghf
.start_index
= pte_index
;
1971 htab_fd
= kvm_vm_ioctl(kvm_state
, KVM_PPC_GET_HTAB_FD
, &ghf
);
1976 hpte_buf
= g_malloc0(sizeof(*hpte_buf
));
1978 * Read the hpte group
1980 if (read(htab_fd
, hpte_buf
, sizeof(*hpte_buf
)) < 0) {
1985 return (uint64_t)(uintptr_t) hpte_buf
->hpte
;
1994 void kvmppc_hash64_free_pteg(uint64_t token
)
1996 struct kvm_get_htab_buf
*htab_buf
;
1998 htab_buf
= container_of((void *)(uintptr_t) token
, struct kvm_get_htab_buf
,
2004 void kvmppc_hash64_write_pte(CPUPPCState
*env
, target_ulong pte_index
,
2005 target_ulong pte0
, target_ulong pte1
)
2008 struct kvm_get_htab_fd ghf
;
2009 struct kvm_get_htab_buf hpte_buf
;
2012 ghf
.start_index
= 0; /* Ignored */
2013 htab_fd
= kvm_vm_ioctl(kvm_state
, KVM_PPC_GET_HTAB_FD
, &ghf
);
2018 hpte_buf
.header
.n_valid
= 1;
2019 hpte_buf
.header
.n_invalid
= 0;
2020 hpte_buf
.header
.index
= pte_index
;
2021 hpte_buf
.hpte
[0] = pte0
;
2022 hpte_buf
.hpte
[1] = pte1
;
2024 * Write the hpte entry.
2025 * CAUTION: write() has the warn_unused_result attribute. Hence we
2026 * need to check the return value, even though we do nothing.
2028 if (write(htab_fd
, &hpte_buf
, sizeof(hpte_buf
)) < 0) {