2 * PowerPC implementation of KVM hooks
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
18 #include <sys/types.h>
19 #include <sys/ioctl.h>
23 #include <linux/kvm.h>
25 #include "qemu-common.h"
26 #include "qemu/timer.h"
27 #include "sysemu/sysemu.h"
28 #include "sysemu/kvm.h"
31 #include "sysemu/cpus.h"
32 #include "sysemu/device_tree.h"
33 #include "mmu-hash64.h"
35 #include "hw/sysbus.h"
36 #include "hw/ppc/spapr.h"
37 #include "hw/ppc/spapr_vio.h"
38 #include "sysemu/watchdog.h"
43 #define DPRINTF(fmt, ...) \
44 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
46 #define DPRINTF(fmt, ...) \
50 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
52 const KVMCapabilityInfo kvm_arch_required_capabilities
[] = {
56 static int cap_interrupt_unset
= false;
57 static int cap_interrupt_level
= false;
58 static int cap_segstate
;
59 static int cap_booke_sregs
;
60 static int cap_ppc_smt
;
61 static int cap_ppc_rma
;
62 static int cap_spapr_tce
;
64 static int cap_one_reg
;
66 static int cap_ppc_watchdog
;
68 static int cap_htab_fd
;
70 /* XXX We have a race condition where we actually have a level triggered
71 * interrupt, but the infrastructure can't expose that yet, so the guest
72 * takes but ignores it, goes to sleep and never gets notified that there's
73 * still an interrupt pending.
75 * As a quick workaround, let's just wake up again 20 ms after we injected
76 * an interrupt. That way we can assure that we're always reinjecting
77 * interrupts in case the guest swallowed them.
79 static QEMUTimer
*idle_timer
;
81 static void kvm_kick_cpu(void *opaque
)
83 PowerPCCPU
*cpu
= opaque
;
85 qemu_cpu_kick(CPU(cpu
));
88 static int kvm_ppc_register_host_cpu_type(void);
90 int kvm_arch_init(KVMState
*s
)
92 cap_interrupt_unset
= kvm_check_extension(s
, KVM_CAP_PPC_UNSET_IRQ
);
93 cap_interrupt_level
= kvm_check_extension(s
, KVM_CAP_PPC_IRQ_LEVEL
);
94 cap_segstate
= kvm_check_extension(s
, KVM_CAP_PPC_SEGSTATE
);
95 cap_booke_sregs
= kvm_check_extension(s
, KVM_CAP_PPC_BOOKE_SREGS
);
96 cap_ppc_smt
= kvm_check_extension(s
, KVM_CAP_PPC_SMT
);
97 cap_ppc_rma
= kvm_check_extension(s
, KVM_CAP_PPC_RMA
);
98 cap_spapr_tce
= kvm_check_extension(s
, KVM_CAP_SPAPR_TCE
);
99 cap_one_reg
= kvm_check_extension(s
, KVM_CAP_ONE_REG
);
100 cap_hior
= kvm_check_extension(s
, KVM_CAP_PPC_HIOR
);
101 cap_epr
= kvm_check_extension(s
, KVM_CAP_PPC_EPR
);
102 cap_ppc_watchdog
= kvm_check_extension(s
, KVM_CAP_PPC_BOOKE_WATCHDOG
);
103 /* Note: we don't set cap_papr here, because this capability is
104 * only activated after this by kvmppc_set_papr() */
105 cap_htab_fd
= kvm_check_extension(s
, KVM_CAP_PPC_HTAB_FD
);
107 if (!cap_interrupt_level
) {
108 fprintf(stderr
, "KVM: Couldn't find level irq capability. Expect the "
109 "VM to stall at times!\n");
112 kvm_ppc_register_host_cpu_type();
117 static int kvm_arch_sync_sregs(PowerPCCPU
*cpu
)
119 CPUPPCState
*cenv
= &cpu
->env
;
120 CPUState
*cs
= CPU(cpu
);
121 struct kvm_sregs sregs
;
124 if (cenv
->excp_model
== POWERPC_EXCP_BOOKE
) {
125 /* What we're really trying to say is "if we're on BookE, we use
126 the native PVR for now". This is the only sane way to check
127 it though, so we potentially confuse users that they can run
128 BookE guests on BookS. Let's hope nobody dares enough :) */
132 fprintf(stderr
, "kvm error: missing PVR setting capability\n");
137 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_SREGS
, &sregs
);
142 sregs
.pvr
= cenv
->spr
[SPR_PVR
];
143 return kvm_vcpu_ioctl(cs
, KVM_SET_SREGS
, &sregs
);
146 /* Set up a shared TLB array with KVM */
147 static int kvm_booke206_tlb_init(PowerPCCPU
*cpu
)
149 CPUPPCState
*env
= &cpu
->env
;
150 CPUState
*cs
= CPU(cpu
);
151 struct kvm_book3e_206_tlb_params params
= {};
152 struct kvm_config_tlb cfg
= {};
153 struct kvm_enable_cap encap
= {};
154 unsigned int entries
= 0;
157 if (!kvm_enabled() ||
158 !kvm_check_extension(cs
->kvm_state
, KVM_CAP_SW_TLB
)) {
162 assert(ARRAY_SIZE(params
.tlb_sizes
) == BOOKE206_MAX_TLBN
);
164 for (i
= 0; i
< BOOKE206_MAX_TLBN
; i
++) {
165 params
.tlb_sizes
[i
] = booke206_tlb_size(env
, i
);
166 params
.tlb_ways
[i
] = booke206_tlb_ways(env
, i
);
167 entries
+= params
.tlb_sizes
[i
];
170 assert(entries
== env
->nb_tlb
);
171 assert(sizeof(struct kvm_book3e_206_tlb_entry
) == sizeof(ppcmas_tlb_t
));
173 env
->tlb_dirty
= true;
175 cfg
.array
= (uintptr_t)env
->tlb
.tlbm
;
176 cfg
.array_len
= sizeof(ppcmas_tlb_t
) * entries
;
177 cfg
.params
= (uintptr_t)¶ms
;
178 cfg
.mmu_type
= KVM_MMU_FSL_BOOKE_NOHV
;
180 encap
.cap
= KVM_CAP_SW_TLB
;
181 encap
.args
[0] = (uintptr_t)&cfg
;
183 ret
= kvm_vcpu_ioctl(cs
, KVM_ENABLE_CAP
, &encap
);
185 fprintf(stderr
, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
186 __func__
, strerror(-ret
));
190 env
->kvm_sw_tlb
= true;
195 #if defined(TARGET_PPC64)
196 static void kvm_get_fallback_smmu_info(PowerPCCPU
*cpu
,
197 struct kvm_ppc_smmu_info
*info
)
199 CPUPPCState
*env
= &cpu
->env
;
200 CPUState
*cs
= CPU(cpu
);
202 memset(info
, 0, sizeof(*info
));
204 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
205 * need to "guess" what the supported page sizes are.
207 * For that to work we make a few assumptions:
209 * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
210 * KVM which only supports 4K and 16M pages, but supports them
211 * regardless of the backing store characteritics. We also don't
212 * support 1T segments.
214 * This is safe as if HV KVM ever supports that capability or PR
215 * KVM grows supports for more page/segment sizes, those versions
216 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
217 * will not hit this fallback
219 * - Else we are running HV KVM. This means we only support page
220 * sizes that fit in the backing store. Additionally we only
221 * advertize 64K pages if the processor is ARCH 2.06 and we assume
222 * P7 encodings for the SLB and hash table. Here too, we assume
223 * support for any newer processor will mean a kernel that
224 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
227 if (kvm_check_extension(cs
->kvm_state
, KVM_CAP_PPC_GET_PVINFO
)) {
232 /* Standard 4k base page size segment */
233 info
->sps
[0].page_shift
= 12;
234 info
->sps
[0].slb_enc
= 0;
235 info
->sps
[0].enc
[0].page_shift
= 12;
236 info
->sps
[0].enc
[0].pte_enc
= 0;
238 /* Standard 16M large page size segment */
239 info
->sps
[1].page_shift
= 24;
240 info
->sps
[1].slb_enc
= SLB_VSID_L
;
241 info
->sps
[1].enc
[0].page_shift
= 24;
242 info
->sps
[1].enc
[0].pte_enc
= 0;
246 /* HV KVM has backing store size restrictions */
247 info
->flags
= KVM_PPC_PAGE_SIZES_REAL
;
249 if (env
->mmu_model
& POWERPC_MMU_1TSEG
) {
250 info
->flags
|= KVM_PPC_1T_SEGMENTS
;
253 if (env
->mmu_model
== POWERPC_MMU_2_06
) {
259 /* Standard 4k base page size segment */
260 info
->sps
[i
].page_shift
= 12;
261 info
->sps
[i
].slb_enc
= 0;
262 info
->sps
[i
].enc
[0].page_shift
= 12;
263 info
->sps
[i
].enc
[0].pte_enc
= 0;
266 /* 64K on MMU 2.06 */
267 if (env
->mmu_model
== POWERPC_MMU_2_06
) {
268 info
->sps
[i
].page_shift
= 16;
269 info
->sps
[i
].slb_enc
= 0x110;
270 info
->sps
[i
].enc
[0].page_shift
= 16;
271 info
->sps
[i
].enc
[0].pte_enc
= 1;
275 /* Standard 16M large page size segment */
276 info
->sps
[i
].page_shift
= 24;
277 info
->sps
[i
].slb_enc
= SLB_VSID_L
;
278 info
->sps
[i
].enc
[0].page_shift
= 24;
279 info
->sps
[i
].enc
[0].pte_enc
= 0;
283 static void kvm_get_smmu_info(PowerPCCPU
*cpu
, struct kvm_ppc_smmu_info
*info
)
285 CPUState
*cs
= CPU(cpu
);
288 if (kvm_check_extension(cs
->kvm_state
, KVM_CAP_PPC_GET_SMMU_INFO
)) {
289 ret
= kvm_vm_ioctl(cs
->kvm_state
, KVM_PPC_GET_SMMU_INFO
, info
);
295 kvm_get_fallback_smmu_info(cpu
, info
);
298 static long getrampagesize(void)
304 /* guest RAM is backed by normal anonymous pages */
305 return getpagesize();
309 ret
= statfs(mem_path
, &fs
);
310 } while (ret
!= 0 && errno
== EINTR
);
313 fprintf(stderr
, "Couldn't statfs() memory path: %s\n",
318 #define HUGETLBFS_MAGIC 0x958458f6
320 if (fs
.f_type
!= HUGETLBFS_MAGIC
) {
321 /* Explicit mempath, but it's ordinary pages */
322 return getpagesize();
325 /* It's hugepage, return the huge page size */
329 static bool kvm_valid_page_size(uint32_t flags
, long rampgsize
, uint32_t shift
)
331 if (!(flags
& KVM_PPC_PAGE_SIZES_REAL
)) {
335 return (1ul << shift
) <= rampgsize
;
338 static void kvm_fixup_page_sizes(PowerPCCPU
*cpu
)
340 static struct kvm_ppc_smmu_info smmu_info
;
341 static bool has_smmu_info
;
342 CPUPPCState
*env
= &cpu
->env
;
346 /* We only handle page sizes for 64-bit server guests for now */
347 if (!(env
->mmu_model
& POWERPC_MMU_64
)) {
351 /* Collect MMU info from kernel if not already */
352 if (!has_smmu_info
) {
353 kvm_get_smmu_info(cpu
, &smmu_info
);
354 has_smmu_info
= true;
357 rampagesize
= getrampagesize();
359 /* Convert to QEMU form */
360 memset(&env
->sps
, 0, sizeof(env
->sps
));
362 for (ik
= iq
= 0; ik
< KVM_PPC_PAGE_SIZES_MAX_SZ
; ik
++) {
363 struct ppc_one_seg_page_size
*qsps
= &env
->sps
.sps
[iq
];
364 struct kvm_ppc_one_seg_page_size
*ksps
= &smmu_info
.sps
[ik
];
366 if (!kvm_valid_page_size(smmu_info
.flags
, rampagesize
,
370 qsps
->page_shift
= ksps
->page_shift
;
371 qsps
->slb_enc
= ksps
->slb_enc
;
372 for (jk
= jq
= 0; jk
< KVM_PPC_PAGE_SIZES_MAX_SZ
; jk
++) {
373 if (!kvm_valid_page_size(smmu_info
.flags
, rampagesize
,
374 ksps
->enc
[jk
].page_shift
)) {
377 qsps
->enc
[jq
].page_shift
= ksps
->enc
[jk
].page_shift
;
378 qsps
->enc
[jq
].pte_enc
= ksps
->enc
[jk
].pte_enc
;
379 if (++jq
>= PPC_PAGE_SIZES_MAX_SZ
) {
383 if (++iq
>= PPC_PAGE_SIZES_MAX_SZ
) {
387 env
->slb_nr
= smmu_info
.slb_size
;
388 if (smmu_info
.flags
& KVM_PPC_1T_SEGMENTS
) {
389 env
->mmu_model
|= POWERPC_MMU_1TSEG
;
391 env
->mmu_model
&= ~POWERPC_MMU_1TSEG
;
394 #else /* defined (TARGET_PPC64) */
396 static inline void kvm_fixup_page_sizes(PowerPCCPU
*cpu
)
400 #endif /* !defined (TARGET_PPC64) */
402 unsigned long kvm_arch_vcpu_id(CPUState
*cpu
)
404 return cpu
->cpu_index
;
407 int kvm_arch_init_vcpu(CPUState
*cs
)
409 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
410 CPUPPCState
*cenv
= &cpu
->env
;
413 /* Gather server mmu info from KVM and update the CPU state */
414 kvm_fixup_page_sizes(cpu
);
416 /* Synchronize sregs with kvm */
417 ret
= kvm_arch_sync_sregs(cpu
);
422 idle_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL
, kvm_kick_cpu
, cpu
);
424 /* Some targets support access to KVM's guest TLB. */
425 switch (cenv
->mmu_model
) {
426 case POWERPC_MMU_BOOKE206
:
427 ret
= kvm_booke206_tlb_init(cpu
);
436 void kvm_arch_reset_vcpu(CPUState
*cpu
)
440 static void kvm_sw_tlb_put(PowerPCCPU
*cpu
)
442 CPUPPCState
*env
= &cpu
->env
;
443 CPUState
*cs
= CPU(cpu
);
444 struct kvm_dirty_tlb dirty_tlb
;
445 unsigned char *bitmap
;
448 if (!env
->kvm_sw_tlb
) {
452 bitmap
= g_malloc((env
->nb_tlb
+ 7) / 8);
453 memset(bitmap
, 0xFF, (env
->nb_tlb
+ 7) / 8);
455 dirty_tlb
.bitmap
= (uintptr_t)bitmap
;
456 dirty_tlb
.num_dirty
= env
->nb_tlb
;
458 ret
= kvm_vcpu_ioctl(cs
, KVM_DIRTY_TLB
, &dirty_tlb
);
460 fprintf(stderr
, "%s: KVM_DIRTY_TLB: %s\n",
461 __func__
, strerror(-ret
));
467 static void kvm_get_one_spr(CPUState
*cs
, uint64_t id
, int spr
)
469 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
470 CPUPPCState
*env
= &cpu
->env
;
475 struct kvm_one_reg reg
= {
477 .addr
= (uintptr_t) &val
,
481 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
483 fprintf(stderr
, "Warning: Unable to retrieve SPR %d from KVM: %s\n",
484 spr
, strerror(errno
));
486 switch (id
& KVM_REG_SIZE_MASK
) {
487 case KVM_REG_SIZE_U32
:
488 env
->spr
[spr
] = val
.u32
;
491 case KVM_REG_SIZE_U64
:
492 env
->spr
[spr
] = val
.u64
;
496 /* Don't handle this size yet */
502 static void kvm_put_one_spr(CPUState
*cs
, uint64_t id
, int spr
)
504 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
505 CPUPPCState
*env
= &cpu
->env
;
510 struct kvm_one_reg reg
= {
512 .addr
= (uintptr_t) &val
,
516 switch (id
& KVM_REG_SIZE_MASK
) {
517 case KVM_REG_SIZE_U32
:
518 val
.u32
= env
->spr
[spr
];
521 case KVM_REG_SIZE_U64
:
522 val
.u64
= env
->spr
[spr
];
526 /* Don't handle this size yet */
530 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
532 fprintf(stderr
, "Warning: Unable to set SPR %d to KVM: %s\n",
533 spr
, strerror(errno
));
537 static int kvm_put_fp(CPUState
*cs
)
539 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
540 CPUPPCState
*env
= &cpu
->env
;
541 struct kvm_one_reg reg
;
545 if (env
->insns_flags
& PPC_FLOAT
) {
546 uint64_t fpscr
= env
->fpscr
;
547 bool vsx
= !!(env
->insns_flags2
& PPC2_VSX
);
549 reg
.id
= KVM_REG_PPC_FPSCR
;
550 reg
.addr
= (uintptr_t)&fpscr
;
551 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
553 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno
));
557 for (i
= 0; i
< 32; i
++) {
560 vsr
[0] = float64_val(env
->fpr
[i
]);
561 vsr
[1] = env
->vsr
[i
];
562 reg
.addr
= (uintptr_t) &vsr
;
563 reg
.id
= vsx
? KVM_REG_PPC_VSR(i
) : KVM_REG_PPC_FPR(i
);
565 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
567 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx
? "VSR" : "FPR",
574 if (env
->insns_flags
& PPC_ALTIVEC
) {
575 reg
.id
= KVM_REG_PPC_VSCR
;
576 reg
.addr
= (uintptr_t)&env
->vscr
;
577 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
579 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno
));
583 for (i
= 0; i
< 32; i
++) {
584 reg
.id
= KVM_REG_PPC_VR(i
);
585 reg
.addr
= (uintptr_t)&env
->avr
[i
];
586 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
588 DPRINTF("Unable to set VR%d to KVM: %s\n", i
, strerror(errno
));
597 static int kvm_get_fp(CPUState
*cs
)
599 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
600 CPUPPCState
*env
= &cpu
->env
;
601 struct kvm_one_reg reg
;
605 if (env
->insns_flags
& PPC_FLOAT
) {
607 bool vsx
= !!(env
->insns_flags2
& PPC2_VSX
);
609 reg
.id
= KVM_REG_PPC_FPSCR
;
610 reg
.addr
= (uintptr_t)&fpscr
;
611 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
613 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno
));
619 for (i
= 0; i
< 32; i
++) {
622 reg
.addr
= (uintptr_t) &vsr
;
623 reg
.id
= vsx
? KVM_REG_PPC_VSR(i
) : KVM_REG_PPC_FPR(i
);
625 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
627 DPRINTF("Unable to get %s%d from KVM: %s\n",
628 vsx
? "VSR" : "FPR", i
, strerror(errno
));
631 env
->fpr
[i
] = vsr
[0];
633 env
->vsr
[i
] = vsr
[1];
639 if (env
->insns_flags
& PPC_ALTIVEC
) {
640 reg
.id
= KVM_REG_PPC_VSCR
;
641 reg
.addr
= (uintptr_t)&env
->vscr
;
642 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
644 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno
));
648 for (i
= 0; i
< 32; i
++) {
649 reg
.id
= KVM_REG_PPC_VR(i
);
650 reg
.addr
= (uintptr_t)&env
->avr
[i
];
651 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
653 DPRINTF("Unable to get VR%d from KVM: %s\n",
663 #if defined(TARGET_PPC64)
664 static int kvm_get_vpa(CPUState
*cs
)
666 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
667 CPUPPCState
*env
= &cpu
->env
;
668 struct kvm_one_reg reg
;
671 reg
.id
= KVM_REG_PPC_VPA_ADDR
;
672 reg
.addr
= (uintptr_t)&env
->vpa_addr
;
673 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
675 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno
));
679 assert((uintptr_t)&env
->slb_shadow_size
680 == ((uintptr_t)&env
->slb_shadow_addr
+ 8));
681 reg
.id
= KVM_REG_PPC_VPA_SLB
;
682 reg
.addr
= (uintptr_t)&env
->slb_shadow_addr
;
683 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
685 DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
690 assert((uintptr_t)&env
->dtl_size
== ((uintptr_t)&env
->dtl_addr
+ 8));
691 reg
.id
= KVM_REG_PPC_VPA_DTL
;
692 reg
.addr
= (uintptr_t)&env
->dtl_addr
;
693 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
695 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
703 static int kvm_put_vpa(CPUState
*cs
)
705 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
706 CPUPPCState
*env
= &cpu
->env
;
707 struct kvm_one_reg reg
;
710 /* SLB shadow or DTL can't be registered unless a master VPA is
711 * registered. That means when restoring state, if a VPA *is*
712 * registered, we need to set that up first. If not, we need to
713 * deregister the others before deregistering the master VPA */
714 assert(env
->vpa_addr
|| !(env
->slb_shadow_addr
|| env
->dtl_addr
));
717 reg
.id
= KVM_REG_PPC_VPA_ADDR
;
718 reg
.addr
= (uintptr_t)&env
->vpa_addr
;
719 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
721 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno
));
726 assert((uintptr_t)&env
->slb_shadow_size
727 == ((uintptr_t)&env
->slb_shadow_addr
+ 8));
728 reg
.id
= KVM_REG_PPC_VPA_SLB
;
729 reg
.addr
= (uintptr_t)&env
->slb_shadow_addr
;
730 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
732 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno
));
736 assert((uintptr_t)&env
->dtl_size
== ((uintptr_t)&env
->dtl_addr
+ 8));
737 reg
.id
= KVM_REG_PPC_VPA_DTL
;
738 reg
.addr
= (uintptr_t)&env
->dtl_addr
;
739 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
741 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
746 if (!env
->vpa_addr
) {
747 reg
.id
= KVM_REG_PPC_VPA_ADDR
;
748 reg
.addr
= (uintptr_t)&env
->vpa_addr
;
749 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
751 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno
));
758 #endif /* TARGET_PPC64 */
760 int kvm_arch_put_registers(CPUState
*cs
, int level
)
762 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
763 CPUPPCState
*env
= &cpu
->env
;
764 struct kvm_regs regs
;
768 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_REGS
, ®s
);
775 regs
.xer
= cpu_read_xer(env
);
779 regs
.srr0
= env
->spr
[SPR_SRR0
];
780 regs
.srr1
= env
->spr
[SPR_SRR1
];
782 regs
.sprg0
= env
->spr
[SPR_SPRG0
];
783 regs
.sprg1
= env
->spr
[SPR_SPRG1
];
784 regs
.sprg2
= env
->spr
[SPR_SPRG2
];
785 regs
.sprg3
= env
->spr
[SPR_SPRG3
];
786 regs
.sprg4
= env
->spr
[SPR_SPRG4
];
787 regs
.sprg5
= env
->spr
[SPR_SPRG5
];
788 regs
.sprg6
= env
->spr
[SPR_SPRG6
];
789 regs
.sprg7
= env
->spr
[SPR_SPRG7
];
791 regs
.pid
= env
->spr
[SPR_BOOKE_PID
];
793 for (i
= 0;i
< 32; i
++)
794 regs
.gpr
[i
] = env
->gpr
[i
];
797 for (i
= 0; i
< 8; i
++) {
798 regs
.cr
|= (env
->crf
[i
] & 15) << (4 * (7 - i
));
801 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_REGS
, ®s
);
807 if (env
->tlb_dirty
) {
809 env
->tlb_dirty
= false;
812 if (cap_segstate
&& (level
>= KVM_PUT_RESET_STATE
)) {
813 struct kvm_sregs sregs
;
815 sregs
.pvr
= env
->spr
[SPR_PVR
];
817 sregs
.u
.s
.sdr1
= env
->spr
[SPR_SDR1
];
821 for (i
= 0; i
< 64; i
++) {
822 sregs
.u
.s
.ppc64
.slb
[i
].slbe
= env
->slb
[i
].esid
;
823 sregs
.u
.s
.ppc64
.slb
[i
].slbv
= env
->slb
[i
].vsid
;
828 for (i
= 0; i
< 16; i
++) {
829 sregs
.u
.s
.ppc32
.sr
[i
] = env
->sr
[i
];
833 for (i
= 0; i
< 8; i
++) {
834 /* Beware. We have to swap upper and lower bits here */
835 sregs
.u
.s
.ppc32
.dbat
[i
] = ((uint64_t)env
->DBAT
[0][i
] << 32)
837 sregs
.u
.s
.ppc32
.ibat
[i
] = ((uint64_t)env
->IBAT
[0][i
] << 32)
841 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_SREGS
, &sregs
);
847 if (cap_hior
&& (level
>= KVM_PUT_RESET_STATE
)) {
848 kvm_put_one_spr(cs
, KVM_REG_PPC_HIOR
, SPR_HIOR
);
854 /* We deliberately ignore errors here, for kernels which have
855 * the ONE_REG calls, but don't support the specific
856 * registers, there's a reasonable chance things will still
857 * work, at least until we try to migrate. */
858 for (i
= 0; i
< 1024; i
++) {
859 uint64_t id
= env
->spr_cb
[i
].one_reg_id
;
862 kvm_put_one_spr(cs
, id
, i
);
868 if (kvm_put_vpa(cs
) < 0) {
869 DPRINTF("Warning: Unable to set VPA information to KVM\n");
872 #endif /* TARGET_PPC64 */
878 int kvm_arch_get_registers(CPUState
*cs
)
880 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
881 CPUPPCState
*env
= &cpu
->env
;
882 struct kvm_regs regs
;
883 struct kvm_sregs sregs
;
887 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_REGS
, ®s
);
892 for (i
= 7; i
>= 0; i
--) {
893 env
->crf
[i
] = cr
& 15;
899 cpu_write_xer(env
, regs
.xer
);
903 env
->spr
[SPR_SRR0
] = regs
.srr0
;
904 env
->spr
[SPR_SRR1
] = regs
.srr1
;
906 env
->spr
[SPR_SPRG0
] = regs
.sprg0
;
907 env
->spr
[SPR_SPRG1
] = regs
.sprg1
;
908 env
->spr
[SPR_SPRG2
] = regs
.sprg2
;
909 env
->spr
[SPR_SPRG3
] = regs
.sprg3
;
910 env
->spr
[SPR_SPRG4
] = regs
.sprg4
;
911 env
->spr
[SPR_SPRG5
] = regs
.sprg5
;
912 env
->spr
[SPR_SPRG6
] = regs
.sprg6
;
913 env
->spr
[SPR_SPRG7
] = regs
.sprg7
;
915 env
->spr
[SPR_BOOKE_PID
] = regs
.pid
;
917 for (i
= 0;i
< 32; i
++)
918 env
->gpr
[i
] = regs
.gpr
[i
];
922 if (cap_booke_sregs
) {
923 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_SREGS
, &sregs
);
928 if (sregs
.u
.e
.features
& KVM_SREGS_E_BASE
) {
929 env
->spr
[SPR_BOOKE_CSRR0
] = sregs
.u
.e
.csrr0
;
930 env
->spr
[SPR_BOOKE_CSRR1
] = sregs
.u
.e
.csrr1
;
931 env
->spr
[SPR_BOOKE_ESR
] = sregs
.u
.e
.esr
;
932 env
->spr
[SPR_BOOKE_DEAR
] = sregs
.u
.e
.dear
;
933 env
->spr
[SPR_BOOKE_MCSR
] = sregs
.u
.e
.mcsr
;
934 env
->spr
[SPR_BOOKE_TSR
] = sregs
.u
.e
.tsr
;
935 env
->spr
[SPR_BOOKE_TCR
] = sregs
.u
.e
.tcr
;
936 env
->spr
[SPR_DECR
] = sregs
.u
.e
.dec
;
937 env
->spr
[SPR_TBL
] = sregs
.u
.e
.tb
& 0xffffffff;
938 env
->spr
[SPR_TBU
] = sregs
.u
.e
.tb
>> 32;
939 env
->spr
[SPR_VRSAVE
] = sregs
.u
.e
.vrsave
;
942 if (sregs
.u
.e
.features
& KVM_SREGS_E_ARCH206
) {
943 env
->spr
[SPR_BOOKE_PIR
] = sregs
.u
.e
.pir
;
944 env
->spr
[SPR_BOOKE_MCSRR0
] = sregs
.u
.e
.mcsrr0
;
945 env
->spr
[SPR_BOOKE_MCSRR1
] = sregs
.u
.e
.mcsrr1
;
946 env
->spr
[SPR_BOOKE_DECAR
] = sregs
.u
.e
.decar
;
947 env
->spr
[SPR_BOOKE_IVPR
] = sregs
.u
.e
.ivpr
;
950 if (sregs
.u
.e
.features
& KVM_SREGS_E_64
) {
951 env
->spr
[SPR_BOOKE_EPCR
] = sregs
.u
.e
.epcr
;
954 if (sregs
.u
.e
.features
& KVM_SREGS_E_SPRG8
) {
955 env
->spr
[SPR_BOOKE_SPRG8
] = sregs
.u
.e
.sprg8
;
958 if (sregs
.u
.e
.features
& KVM_SREGS_E_IVOR
) {
959 env
->spr
[SPR_BOOKE_IVOR0
] = sregs
.u
.e
.ivor_low
[0];
960 env
->spr
[SPR_BOOKE_IVOR1
] = sregs
.u
.e
.ivor_low
[1];
961 env
->spr
[SPR_BOOKE_IVOR2
] = sregs
.u
.e
.ivor_low
[2];
962 env
->spr
[SPR_BOOKE_IVOR3
] = sregs
.u
.e
.ivor_low
[3];
963 env
->spr
[SPR_BOOKE_IVOR4
] = sregs
.u
.e
.ivor_low
[4];
964 env
->spr
[SPR_BOOKE_IVOR5
] = sregs
.u
.e
.ivor_low
[5];
965 env
->spr
[SPR_BOOKE_IVOR6
] = sregs
.u
.e
.ivor_low
[6];
966 env
->spr
[SPR_BOOKE_IVOR7
] = sregs
.u
.e
.ivor_low
[7];
967 env
->spr
[SPR_BOOKE_IVOR8
] = sregs
.u
.e
.ivor_low
[8];
968 env
->spr
[SPR_BOOKE_IVOR9
] = sregs
.u
.e
.ivor_low
[9];
969 env
->spr
[SPR_BOOKE_IVOR10
] = sregs
.u
.e
.ivor_low
[10];
970 env
->spr
[SPR_BOOKE_IVOR11
] = sregs
.u
.e
.ivor_low
[11];
971 env
->spr
[SPR_BOOKE_IVOR12
] = sregs
.u
.e
.ivor_low
[12];
972 env
->spr
[SPR_BOOKE_IVOR13
] = sregs
.u
.e
.ivor_low
[13];
973 env
->spr
[SPR_BOOKE_IVOR14
] = sregs
.u
.e
.ivor_low
[14];
974 env
->spr
[SPR_BOOKE_IVOR15
] = sregs
.u
.e
.ivor_low
[15];
976 if (sregs
.u
.e
.features
& KVM_SREGS_E_SPE
) {
977 env
->spr
[SPR_BOOKE_IVOR32
] = sregs
.u
.e
.ivor_high
[0];
978 env
->spr
[SPR_BOOKE_IVOR33
] = sregs
.u
.e
.ivor_high
[1];
979 env
->spr
[SPR_BOOKE_IVOR34
] = sregs
.u
.e
.ivor_high
[2];
982 if (sregs
.u
.e
.features
& KVM_SREGS_E_PM
) {
983 env
->spr
[SPR_BOOKE_IVOR35
] = sregs
.u
.e
.ivor_high
[3];
986 if (sregs
.u
.e
.features
& KVM_SREGS_E_PC
) {
987 env
->spr
[SPR_BOOKE_IVOR36
] = sregs
.u
.e
.ivor_high
[4];
988 env
->spr
[SPR_BOOKE_IVOR37
] = sregs
.u
.e
.ivor_high
[5];
992 if (sregs
.u
.e
.features
& KVM_SREGS_E_ARCH206_MMU
) {
993 env
->spr
[SPR_BOOKE_MAS0
] = sregs
.u
.e
.mas0
;
994 env
->spr
[SPR_BOOKE_MAS1
] = sregs
.u
.e
.mas1
;
995 env
->spr
[SPR_BOOKE_MAS2
] = sregs
.u
.e
.mas2
;
996 env
->spr
[SPR_BOOKE_MAS3
] = sregs
.u
.e
.mas7_3
& 0xffffffff;
997 env
->spr
[SPR_BOOKE_MAS4
] = sregs
.u
.e
.mas4
;
998 env
->spr
[SPR_BOOKE_MAS6
] = sregs
.u
.e
.mas6
;
999 env
->spr
[SPR_BOOKE_MAS7
] = sregs
.u
.e
.mas7_3
>> 32;
1000 env
->spr
[SPR_MMUCFG
] = sregs
.u
.e
.mmucfg
;
1001 env
->spr
[SPR_BOOKE_TLB0CFG
] = sregs
.u
.e
.tlbcfg
[0];
1002 env
->spr
[SPR_BOOKE_TLB1CFG
] = sregs
.u
.e
.tlbcfg
[1];
1005 if (sregs
.u
.e
.features
& KVM_SREGS_EXP
) {
1006 env
->spr
[SPR_BOOKE_EPR
] = sregs
.u
.e
.epr
;
1009 if (sregs
.u
.e
.features
& KVM_SREGS_E_PD
) {
1010 env
->spr
[SPR_BOOKE_EPLC
] = sregs
.u
.e
.eplc
;
1011 env
->spr
[SPR_BOOKE_EPSC
] = sregs
.u
.e
.epsc
;
1014 if (sregs
.u
.e
.impl_id
== KVM_SREGS_E_IMPL_FSL
) {
1015 env
->spr
[SPR_E500_SVR
] = sregs
.u
.e
.impl
.fsl
.svr
;
1016 env
->spr
[SPR_Exxx_MCAR
] = sregs
.u
.e
.impl
.fsl
.mcar
;
1017 env
->spr
[SPR_HID0
] = sregs
.u
.e
.impl
.fsl
.hid0
;
1019 if (sregs
.u
.e
.impl
.fsl
.features
& KVM_SREGS_E_FSL_PIDn
) {
1020 env
->spr
[SPR_BOOKE_PID1
] = sregs
.u
.e
.impl
.fsl
.pid1
;
1021 env
->spr
[SPR_BOOKE_PID2
] = sregs
.u
.e
.impl
.fsl
.pid2
;
1027 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_SREGS
, &sregs
);
1032 ppc_store_sdr1(env
, sregs
.u
.s
.sdr1
);
1036 for (i
= 0; i
< 64; i
++) {
1037 ppc_store_slb(env
, sregs
.u
.s
.ppc64
.slb
[i
].slbe
,
1038 sregs
.u
.s
.ppc64
.slb
[i
].slbv
);
1043 for (i
= 0; i
< 16; i
++) {
1044 env
->sr
[i
] = sregs
.u
.s
.ppc32
.sr
[i
];
1048 for (i
= 0; i
< 8; i
++) {
1049 env
->DBAT
[0][i
] = sregs
.u
.s
.ppc32
.dbat
[i
] & 0xffffffff;
1050 env
->DBAT
[1][i
] = sregs
.u
.s
.ppc32
.dbat
[i
] >> 32;
1051 env
->IBAT
[0][i
] = sregs
.u
.s
.ppc32
.ibat
[i
] & 0xffffffff;
1052 env
->IBAT
[1][i
] = sregs
.u
.s
.ppc32
.ibat
[i
] >> 32;
1057 kvm_get_one_spr(cs
, KVM_REG_PPC_HIOR
, SPR_HIOR
);
1063 /* We deliberately ignore errors here, for kernels which have
1064 * the ONE_REG calls, but don't support the specific
1065 * registers, there's a reasonable chance things will still
1066 * work, at least until we try to migrate. */
1067 for (i
= 0; i
< 1024; i
++) {
1068 uint64_t id
= env
->spr_cb
[i
].one_reg_id
;
1071 kvm_get_one_spr(cs
, id
, i
);
1077 if (kvm_get_vpa(cs
) < 0) {
1078 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1087 int kvmppc_set_interrupt(PowerPCCPU
*cpu
, int irq
, int level
)
1089 unsigned virq
= level
? KVM_INTERRUPT_SET_LEVEL
: KVM_INTERRUPT_UNSET
;
1091 if (irq
!= PPC_INTERRUPT_EXT
) {
1095 if (!kvm_enabled() || !cap_interrupt_unset
|| !cap_interrupt_level
) {
1099 kvm_vcpu_ioctl(CPU(cpu
), KVM_INTERRUPT
, &virq
);
1104 #if defined(TARGET_PPCEMB)
1105 #define PPC_INPUT_INT PPC40x_INPUT_INT
1106 #elif defined(TARGET_PPC64)
1107 #define PPC_INPUT_INT PPC970_INPUT_INT
1109 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1112 void kvm_arch_pre_run(CPUState
*cs
, struct kvm_run
*run
)
1114 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
1115 CPUPPCState
*env
= &cpu
->env
;
1119 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1120 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1121 if (!cap_interrupt_level
&&
1122 run
->ready_for_interrupt_injection
&&
1123 (cs
->interrupt_request
& CPU_INTERRUPT_HARD
) &&
1124 (env
->irq_input_state
& (1<<PPC_INPUT_INT
)))
1126 /* For now KVM disregards the 'irq' argument. However, in the
1127 * future KVM could cache it in-kernel to avoid a heavyweight exit
1128 * when reading the UIC.
1130 irq
= KVM_INTERRUPT_SET
;
1132 DPRINTF("injected interrupt %d\n", irq
);
1133 r
= kvm_vcpu_ioctl(cs
, KVM_INTERRUPT
, &irq
);
1135 printf("cpu %d fail inject %x\n", cs
->cpu_index
, irq
);
1138 /* Always wake up soon in case the interrupt was level based */
1139 timer_mod(idle_timer
, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) +
1140 (get_ticks_per_sec() / 50));
1143 /* We don't know if there are more interrupts pending after this. However,
1144 * the guest will return to userspace in the course of handling this one
1145 * anyways, so we will get a chance to deliver the rest. */
1148 void kvm_arch_post_run(CPUState
*cpu
, struct kvm_run
*run
)
1152 int kvm_arch_process_async_events(CPUState
*cs
)
1157 static int kvmppc_handle_halt(PowerPCCPU
*cpu
)
1159 CPUState
*cs
= CPU(cpu
);
1160 CPUPPCState
*env
= &cpu
->env
;
1162 if (!(cs
->interrupt_request
& CPU_INTERRUPT_HARD
) && (msr_ee
)) {
1164 env
->exception_index
= EXCP_HLT
;
1170 /* map dcr access to existing qemu dcr emulation */
1171 static int kvmppc_handle_dcr_read(CPUPPCState
*env
, uint32_t dcrn
, uint32_t *data
)
1173 if (ppc_dcr_read(env
->dcr_env
, dcrn
, data
) < 0)
1174 fprintf(stderr
, "Read to unhandled DCR (0x%x)\n", dcrn
);
1179 static int kvmppc_handle_dcr_write(CPUPPCState
*env
, uint32_t dcrn
, uint32_t data
)
1181 if (ppc_dcr_write(env
->dcr_env
, dcrn
, data
) < 0)
1182 fprintf(stderr
, "Write to unhandled DCR (0x%x)\n", dcrn
);
1187 int kvm_arch_handle_exit(CPUState
*cs
, struct kvm_run
*run
)
1189 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
1190 CPUPPCState
*env
= &cpu
->env
;
1193 switch (run
->exit_reason
) {
1195 if (run
->dcr
.is_write
) {
1196 DPRINTF("handle dcr write\n");
1197 ret
= kvmppc_handle_dcr_write(env
, run
->dcr
.dcrn
, run
->dcr
.data
);
1199 DPRINTF("handle dcr read\n");
1200 ret
= kvmppc_handle_dcr_read(env
, run
->dcr
.dcrn
, &run
->dcr
.data
);
1204 DPRINTF("handle halt\n");
1205 ret
= kvmppc_handle_halt(cpu
);
1207 #if defined(TARGET_PPC64)
1208 case KVM_EXIT_PAPR_HCALL
:
1209 DPRINTF("handle PAPR hypercall\n");
1210 run
->papr_hcall
.ret
= spapr_hypercall(cpu
,
1212 run
->papr_hcall
.args
);
1217 DPRINTF("handle epr\n");
1218 run
->epr
.epr
= ldl_phys(env
->mpic_iack
);
1221 case KVM_EXIT_WATCHDOG
:
1222 DPRINTF("handle watchdog expiry\n");
1223 watchdog_perform_action();
1228 fprintf(stderr
, "KVM: unknown exit reason %d\n", run
->exit_reason
);
1236 int kvmppc_or_tsr_bits(PowerPCCPU
*cpu
, uint32_t tsr_bits
)
1238 CPUState
*cs
= CPU(cpu
);
1239 uint32_t bits
= tsr_bits
;
1240 struct kvm_one_reg reg
= {
1241 .id
= KVM_REG_PPC_OR_TSR
,
1242 .addr
= (uintptr_t) &bits
,
1245 return kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
1248 int kvmppc_clear_tsr_bits(PowerPCCPU
*cpu
, uint32_t tsr_bits
)
1251 CPUState
*cs
= CPU(cpu
);
1252 uint32_t bits
= tsr_bits
;
1253 struct kvm_one_reg reg
= {
1254 .id
= KVM_REG_PPC_CLEAR_TSR
,
1255 .addr
= (uintptr_t) &bits
,
1258 return kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
1261 int kvmppc_set_tcr(PowerPCCPU
*cpu
)
1263 CPUState
*cs
= CPU(cpu
);
1264 CPUPPCState
*env
= &cpu
->env
;
1265 uint32_t tcr
= env
->spr
[SPR_BOOKE_TCR
];
1267 struct kvm_one_reg reg
= {
1268 .id
= KVM_REG_PPC_TCR
,
1269 .addr
= (uintptr_t) &tcr
,
1272 return kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
1275 int kvmppc_booke_watchdog_enable(PowerPCCPU
*cpu
)
1277 CPUState
*cs
= CPU(cpu
);
1278 struct kvm_enable_cap encap
= {};
1281 if (!kvm_enabled()) {
1285 if (!cap_ppc_watchdog
) {
1286 printf("warning: KVM does not support watchdog");
1290 encap
.cap
= KVM_CAP_PPC_BOOKE_WATCHDOG
;
1291 ret
= kvm_vcpu_ioctl(cs
, KVM_ENABLE_CAP
, &encap
);
1293 fprintf(stderr
, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1294 __func__
, strerror(-ret
));
1301 static int read_cpuinfo(const char *field
, char *value
, int len
)
1305 int field_len
= strlen(field
);
1308 f
= fopen("/proc/cpuinfo", "r");
1314 if(!fgets(line
, sizeof(line
), f
)) {
1317 if (!strncmp(line
, field
, field_len
)) {
1318 pstrcpy(value
, len
, line
);
1329 uint32_t kvmppc_get_tbfreq(void)
1333 uint32_t retval
= get_ticks_per_sec();
1335 if (read_cpuinfo("timebase", line
, sizeof(line
))) {
1339 if (!(ns
= strchr(line
, ':'))) {
1349 /* Try to find a device tree node for a CPU with clock-frequency property */
1350 static int kvmppc_find_cpu_dt(char *buf
, int buf_len
)
1352 struct dirent
*dirp
;
1355 if ((dp
= opendir(PROC_DEVTREE_CPU
)) == NULL
) {
1356 printf("Can't open directory " PROC_DEVTREE_CPU
"\n");
1361 while ((dirp
= readdir(dp
)) != NULL
) {
1363 snprintf(buf
, buf_len
, "%s%s/clock-frequency", PROC_DEVTREE_CPU
,
1365 f
= fopen(buf
, "r");
1367 snprintf(buf
, buf_len
, "%s%s", PROC_DEVTREE_CPU
, dirp
->d_name
);
1374 if (buf
[0] == '\0') {
1375 printf("Unknown host!\n");
1382 /* Read a CPU node property from the host device tree that's a single
1383 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1384 * (can't find or open the property, or doesn't understand the
1386 static uint64_t kvmppc_read_int_cpu_dt(const char *propname
)
1396 if (kvmppc_find_cpu_dt(buf
, sizeof(buf
))) {
1400 strncat(buf
, "/", sizeof(buf
) - strlen(buf
));
1401 strncat(buf
, propname
, sizeof(buf
) - strlen(buf
));
1403 f
= fopen(buf
, "rb");
1408 len
= fread(&u
, 1, sizeof(u
), f
);
1412 /* property is a 32-bit quantity */
1413 return be32_to_cpu(u
.v32
);
1415 return be64_to_cpu(u
.v64
);
1421 uint64_t kvmppc_get_clockfreq(void)
1423 return kvmppc_read_int_cpu_dt("clock-frequency");
1426 uint32_t kvmppc_get_vmx(void)
1428 return kvmppc_read_int_cpu_dt("ibm,vmx");
1431 uint32_t kvmppc_get_dfp(void)
1433 return kvmppc_read_int_cpu_dt("ibm,dfp");
1436 static int kvmppc_get_pvinfo(CPUPPCState
*env
, struct kvm_ppc_pvinfo
*pvinfo
)
1438 PowerPCCPU
*cpu
= ppc_env_get_cpu(env
);
1439 CPUState
*cs
= CPU(cpu
);
1441 if (kvm_check_extension(cs
->kvm_state
, KVM_CAP_PPC_GET_PVINFO
) &&
1442 !kvm_vm_ioctl(cs
->kvm_state
, KVM_PPC_GET_PVINFO
, pvinfo
)) {
1449 int kvmppc_get_hasidle(CPUPPCState
*env
)
1451 struct kvm_ppc_pvinfo pvinfo
;
1453 if (!kvmppc_get_pvinfo(env
, &pvinfo
) &&
1454 (pvinfo
.flags
& KVM_PPC_PVINFO_FLAGS_EV_IDLE
)) {
1461 int kvmppc_get_hypercall(CPUPPCState
*env
, uint8_t *buf
, int buf_len
)
1463 uint32_t *hc
= (uint32_t*)buf
;
1464 struct kvm_ppc_pvinfo pvinfo
;
1466 if (!kvmppc_get_pvinfo(env
, &pvinfo
)) {
1467 memcpy(buf
, pvinfo
.hcall
, buf_len
);
1472 * Fallback to always fail hypercalls:
1488 void kvmppc_set_papr(PowerPCCPU
*cpu
)
1490 CPUPPCState
*env
= &cpu
->env
;
1491 CPUState
*cs
= CPU(cpu
);
1492 struct kvm_enable_cap cap
= {};
1495 cap
.cap
= KVM_CAP_PPC_PAPR
;
1496 ret
= kvm_vcpu_ioctl(cs
, KVM_ENABLE_CAP
, &cap
);
1499 cpu_abort(env
, "This KVM version does not support PAPR\n");
1502 /* Update the capability flag so we sync the right information
1507 void kvmppc_set_mpic_proxy(PowerPCCPU
*cpu
, int mpic_proxy
)
1509 CPUPPCState
*env
= &cpu
->env
;
1510 CPUState
*cs
= CPU(cpu
);
1511 struct kvm_enable_cap cap
= {};
1514 cap
.cap
= KVM_CAP_PPC_EPR
;
1515 cap
.args
[0] = mpic_proxy
;
1516 ret
= kvm_vcpu_ioctl(cs
, KVM_ENABLE_CAP
, &cap
);
1518 if (ret
&& mpic_proxy
) {
1519 cpu_abort(env
, "This KVM version does not support EPR\n");
1523 int kvmppc_smt_threads(void)
1525 return cap_ppc_smt
? cap_ppc_smt
: 1;
1529 off_t
kvmppc_alloc_rma(const char *name
, MemoryRegion
*sysmem
)
1534 struct kvm_allocate_rma ret
;
1535 MemoryRegion
*rma_region
;
1537 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
1538 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
1539 * not necessary on this hardware
1540 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
1542 * FIXME: We should allow the user to force contiguous RMA
1543 * allocation in the cap_ppc_rma==1 case.
1545 if (cap_ppc_rma
< 2) {
1549 fd
= kvm_vm_ioctl(kvm_state
, KVM_ALLOCATE_RMA
, &ret
);
1551 fprintf(stderr
, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
1556 size
= MIN(ret
.rma_size
, 256ul << 20);
1558 rma
= mmap(NULL
, size
, PROT_READ
|PROT_WRITE
, MAP_SHARED
, fd
, 0);
1559 if (rma
== MAP_FAILED
) {
1560 fprintf(stderr
, "KVM: Error mapping RMA: %s\n", strerror(errno
));
1564 rma_region
= g_new(MemoryRegion
, 1);
1565 memory_region_init_ram_ptr(rma_region
, NULL
, name
, size
, rma
);
1566 vmstate_register_ram_global(rma_region
);
1567 memory_region_add_subregion(sysmem
, 0, rma_region
);
1572 uint64_t kvmppc_rma_size(uint64_t current_size
, unsigned int hash_shift
)
1574 struct kvm_ppc_smmu_info info
;
1575 long rampagesize
, best_page_shift
;
1578 if (cap_ppc_rma
>= 2) {
1579 return current_size
;
1582 /* Find the largest hardware supported page size that's less than
1583 * or equal to the (logical) backing page size of guest RAM */
1584 kvm_get_smmu_info(POWERPC_CPU(first_cpu
), &info
);
1585 rampagesize
= getrampagesize();
1586 best_page_shift
= 0;
1588 for (i
= 0; i
< KVM_PPC_PAGE_SIZES_MAX_SZ
; i
++) {
1589 struct kvm_ppc_one_seg_page_size
*sps
= &info
.sps
[i
];
1591 if (!sps
->page_shift
) {
1595 if ((sps
->page_shift
> best_page_shift
)
1596 && ((1UL << sps
->page_shift
) <= rampagesize
)) {
1597 best_page_shift
= sps
->page_shift
;
1601 return MIN(current_size
,
1602 1ULL << (best_page_shift
+ hash_shift
- 7));
1606 void *kvmppc_create_spapr_tce(uint32_t liobn
, uint32_t window_size
, int *pfd
)
1608 struct kvm_create_spapr_tce args
= {
1610 .window_size
= window_size
,
1616 /* Must set fd to -1 so we don't try to munmap when called for
1617 * destroying the table, which the upper layers -will- do
1620 if (!cap_spapr_tce
) {
1624 fd
= kvm_vm_ioctl(kvm_state
, KVM_CREATE_SPAPR_TCE
, &args
);
1626 fprintf(stderr
, "KVM: Failed to create TCE table for liobn 0x%x\n",
1631 len
= (window_size
/ SPAPR_TCE_PAGE_SIZE
) * sizeof(uint64_t);
1632 /* FIXME: round this up to page size */
1634 table
= mmap(NULL
, len
, PROT_READ
|PROT_WRITE
, MAP_SHARED
, fd
, 0);
1635 if (table
== MAP_FAILED
) {
1636 fprintf(stderr
, "KVM: Failed to map TCE table for liobn 0x%x\n",
1646 int kvmppc_remove_spapr_tce(void *table
, int fd
, uint32_t window_size
)
1654 len
= (window_size
/ SPAPR_TCE_PAGE_SIZE
)*sizeof(uint64_t);
1655 if ((munmap(table
, len
) < 0) ||
1657 fprintf(stderr
, "KVM: Unexpected error removing TCE table: %s",
1659 /* Leak the table */
1665 int kvmppc_reset_htab(int shift_hint
)
1667 uint32_t shift
= shift_hint
;
1669 if (!kvm_enabled()) {
1670 /* Full emulation, tell caller to allocate htab itself */
1673 if (kvm_check_extension(kvm_state
, KVM_CAP_PPC_ALLOC_HTAB
)) {
1675 ret
= kvm_vm_ioctl(kvm_state
, KVM_PPC_ALLOCATE_HTAB
, &shift
);
1676 if (ret
== -ENOTTY
) {
1677 /* At least some versions of PR KVM advertise the
1678 * capability, but don't implement the ioctl(). Oops.
1679 * Return 0 so that we allocate the htab in qemu, as is
1680 * correct for PR. */
1682 } else if (ret
< 0) {
1688 /* We have a kernel that predates the htab reset calls. For PR
1689 * KVM, we need to allocate the htab ourselves, for an HV KVM of
1690 * this era, it has allocated a 16MB fixed size hash table
1691 * already. Kernels of this era have the GET_PVINFO capability
1692 * only on PR, so we use this hack to determine the right
1694 if (kvm_check_extension(kvm_state
, KVM_CAP_PPC_GET_PVINFO
)) {
1695 /* PR - tell caller to allocate htab */
1698 /* HV - assume 16MB kernel allocated htab */
1703 static inline uint32_t mfpvr(void)
1712 static void alter_insns(uint64_t *word
, uint64_t flags
, bool on
)
1721 static void kvmppc_host_cpu_initfn(Object
*obj
)
1723 assert(kvm_enabled());
1726 static void kvmppc_host_cpu_class_init(ObjectClass
*oc
, void *data
)
1728 PowerPCCPUClass
*pcc
= POWERPC_CPU_CLASS(oc
);
1729 uint32_t vmx
= kvmppc_get_vmx();
1730 uint32_t dfp
= kvmppc_get_dfp();
1731 uint32_t dcache_size
= kvmppc_read_int_cpu_dt("d-cache-size");
1732 uint32_t icache_size
= kvmppc_read_int_cpu_dt("i-cache-size");
1734 /* Now fix up the class with information we can query from the host */
1737 /* Only override when we know what the host supports */
1738 alter_insns(&pcc
->insns_flags
, PPC_ALTIVEC
, vmx
> 0);
1739 alter_insns(&pcc
->insns_flags2
, PPC2_VSX
, vmx
> 1);
1742 /* Only override when we know what the host supports */
1743 alter_insns(&pcc
->insns_flags2
, PPC2_DFP
, dfp
);
1746 if (dcache_size
!= -1) {
1747 pcc
->l1_dcache_size
= dcache_size
;
1750 if (icache_size
!= -1) {
1751 pcc
->l1_icache_size
= icache_size
;
1755 int kvmppc_fixup_cpu(PowerPCCPU
*cpu
)
1757 CPUState
*cs
= CPU(cpu
);
1760 /* Adjust cpu index for SMT */
1761 smt
= kvmppc_smt_threads();
1762 cs
->cpu_index
= (cs
->cpu_index
/ smp_threads
) * smt
1763 + (cs
->cpu_index
% smp_threads
);
1768 bool kvmppc_has_cap_epr(void)
1773 static int kvm_ppc_register_host_cpu_type(void)
1775 TypeInfo type_info
= {
1776 .name
= TYPE_HOST_POWERPC_CPU
,
1777 .instance_init
= kvmppc_host_cpu_initfn
,
1778 .class_init
= kvmppc_host_cpu_class_init
,
1780 uint32_t host_pvr
= mfpvr();
1781 PowerPCCPUClass
*pvr_pcc
;
1783 pvr_pcc
= ppc_cpu_class_by_pvr(host_pvr
);
1784 if (pvr_pcc
== NULL
) {
1787 type_info
.parent
= object_class_get_name(OBJECT_CLASS(pvr_pcc
));
1788 type_register(&type_info
);
1793 int kvmppc_get_htab_fd(bool write
)
1795 struct kvm_get_htab_fd s
= {
1796 .flags
= write
? KVM_GET_HTAB_WRITE
: 0,
1801 fprintf(stderr
, "KVM version doesn't support saving the hash table\n");
1805 return kvm_vm_ioctl(kvm_state
, KVM_PPC_GET_HTAB_FD
, &s
);
1808 int kvmppc_save_htab(QEMUFile
*f
, int fd
, size_t bufsize
, int64_t max_ns
)
1810 int64_t starttime
= qemu_clock_get_ns(QEMU_CLOCK_REALTIME
);
1811 uint8_t buf
[bufsize
];
1815 rc
= read(fd
, buf
, bufsize
);
1817 fprintf(stderr
, "Error reading data from KVM HTAB fd: %s\n",
1821 /* Kernel already retuns data in BE format for the file */
1822 qemu_put_buffer(f
, buf
, rc
);
1826 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME
) - starttime
) < max_ns
)));
1828 return (rc
== 0) ? 1 : 0;
1831 int kvmppc_load_htab_chunk(QEMUFile
*f
, int fd
, uint32_t index
,
1832 uint16_t n_valid
, uint16_t n_invalid
)
1834 struct kvm_get_htab_header
*buf
;
1835 size_t chunksize
= sizeof(*buf
) + n_valid
*HASH_PTE_SIZE_64
;
1838 buf
= alloca(chunksize
);
1839 /* This is KVM on ppc, so this is all big-endian */
1841 buf
->n_valid
= n_valid
;
1842 buf
->n_invalid
= n_invalid
;
1844 qemu_get_buffer(f
, (void *)(buf
+ 1), HASH_PTE_SIZE_64
*n_valid
);
1846 rc
= write(fd
, buf
, chunksize
);
1848 fprintf(stderr
, "Error writing KVM hash table: %s\n",
1852 if (rc
!= chunksize
) {
1853 /* We should never get a short write on a single chunk */
1854 fprintf(stderr
, "Short write, restoring KVM hash table\n");
1860 bool kvm_arch_stop_on_emulation_error(CPUState
*cpu
)
1865 int kvm_arch_on_sigbus_vcpu(CPUState
*cpu
, int code
, void *addr
)
1870 int kvm_arch_on_sigbus(int code
, void *addr
)
1875 void kvm_arch_init_irq_routing(KVMState
*s
)