2 * PowerPC implementation of KVM hooks
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
18 #include <sys/types.h>
19 #include <sys/ioctl.h>
23 #include <linux/kvm.h>
25 #include "qemu-common.h"
26 #include "qemu/timer.h"
27 #include "sysemu/sysemu.h"
28 #include "sysemu/kvm.h"
31 #include "sysemu/cpus.h"
32 #include "sysemu/device_tree.h"
33 #include "mmu-hash64.h"
35 #include "hw/sysbus.h"
36 #include "hw/ppc/spapr.h"
37 #include "hw/ppc/spapr_vio.h"
38 #include "hw/ppc/ppc.h"
39 #include "sysemu/watchdog.h"
41 #include "exec/gdbstub.h"
46 #define DPRINTF(fmt, ...) \
47 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
49 #define DPRINTF(fmt, ...) \
53 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
55 const KVMCapabilityInfo kvm_arch_required_capabilities
[] = {
59 static int cap_interrupt_unset
= false;
60 static int cap_interrupt_level
= false;
61 static int cap_segstate
;
62 static int cap_booke_sregs
;
63 static int cap_ppc_smt
;
64 static int cap_ppc_rma
;
65 static int cap_spapr_tce
;
66 static int cap_spapr_multitce
;
67 static int cap_spapr_vfio
;
69 static int cap_one_reg
;
71 static int cap_ppc_watchdog
;
73 static int cap_htab_fd
;
74 static int cap_fixup_hcalls
;
76 static uint32_t debug_inst_opcode
;
78 /* XXX We have a race condition where we actually have a level triggered
79 * interrupt, but the infrastructure can't expose that yet, so the guest
80 * takes but ignores it, goes to sleep and never gets notified that there's
81 * still an interrupt pending.
83 * As a quick workaround, let's just wake up again 20 ms after we injected
84 * an interrupt. That way we can assure that we're always reinjecting
85 * interrupts in case the guest swallowed them.
87 static QEMUTimer
*idle_timer
;
89 static void kvm_kick_cpu(void *opaque
)
91 PowerPCCPU
*cpu
= opaque
;
93 qemu_cpu_kick(CPU(cpu
));
96 static int kvm_ppc_register_host_cpu_type(void);
98 int kvm_arch_init(MachineState
*ms
, KVMState
*s
)
100 cap_interrupt_unset
= kvm_check_extension(s
, KVM_CAP_PPC_UNSET_IRQ
);
101 cap_interrupt_level
= kvm_check_extension(s
, KVM_CAP_PPC_IRQ_LEVEL
);
102 cap_segstate
= kvm_check_extension(s
, KVM_CAP_PPC_SEGSTATE
);
103 cap_booke_sregs
= kvm_check_extension(s
, KVM_CAP_PPC_BOOKE_SREGS
);
104 cap_ppc_smt
= kvm_check_extension(s
, KVM_CAP_PPC_SMT
);
105 cap_ppc_rma
= kvm_check_extension(s
, KVM_CAP_PPC_RMA
);
106 cap_spapr_tce
= kvm_check_extension(s
, KVM_CAP_SPAPR_TCE
);
107 cap_spapr_multitce
= kvm_check_extension(s
, KVM_CAP_SPAPR_MULTITCE
);
108 cap_spapr_vfio
= false;
109 cap_one_reg
= kvm_check_extension(s
, KVM_CAP_ONE_REG
);
110 cap_hior
= kvm_check_extension(s
, KVM_CAP_PPC_HIOR
);
111 cap_epr
= kvm_check_extension(s
, KVM_CAP_PPC_EPR
);
112 cap_ppc_watchdog
= kvm_check_extension(s
, KVM_CAP_PPC_BOOKE_WATCHDOG
);
113 /* Note: we don't set cap_papr here, because this capability is
114 * only activated after this by kvmppc_set_papr() */
115 cap_htab_fd
= kvm_check_extension(s
, KVM_CAP_PPC_HTAB_FD
);
116 cap_fixup_hcalls
= kvm_check_extension(s
, KVM_CAP_PPC_FIXUP_HCALL
);
118 if (!cap_interrupt_level
) {
119 fprintf(stderr
, "KVM: Couldn't find level irq capability. Expect the "
120 "VM to stall at times!\n");
123 kvm_ppc_register_host_cpu_type();
128 static int kvm_arch_sync_sregs(PowerPCCPU
*cpu
)
130 CPUPPCState
*cenv
= &cpu
->env
;
131 CPUState
*cs
= CPU(cpu
);
132 struct kvm_sregs sregs
;
135 if (cenv
->excp_model
== POWERPC_EXCP_BOOKE
) {
136 /* What we're really trying to say is "if we're on BookE, we use
137 the native PVR for now". This is the only sane way to check
138 it though, so we potentially confuse users that they can run
139 BookE guests on BookS. Let's hope nobody dares enough :) */
143 fprintf(stderr
, "kvm error: missing PVR setting capability\n");
148 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_SREGS
, &sregs
);
153 sregs
.pvr
= cenv
->spr
[SPR_PVR
];
154 return kvm_vcpu_ioctl(cs
, KVM_SET_SREGS
, &sregs
);
157 /* Set up a shared TLB array with KVM */
158 static int kvm_booke206_tlb_init(PowerPCCPU
*cpu
)
160 CPUPPCState
*env
= &cpu
->env
;
161 CPUState
*cs
= CPU(cpu
);
162 struct kvm_book3e_206_tlb_params params
= {};
163 struct kvm_config_tlb cfg
= {};
164 unsigned int entries
= 0;
167 if (!kvm_enabled() ||
168 !kvm_check_extension(cs
->kvm_state
, KVM_CAP_SW_TLB
)) {
172 assert(ARRAY_SIZE(params
.tlb_sizes
) == BOOKE206_MAX_TLBN
);
174 for (i
= 0; i
< BOOKE206_MAX_TLBN
; i
++) {
175 params
.tlb_sizes
[i
] = booke206_tlb_size(env
, i
);
176 params
.tlb_ways
[i
] = booke206_tlb_ways(env
, i
);
177 entries
+= params
.tlb_sizes
[i
];
180 assert(entries
== env
->nb_tlb
);
181 assert(sizeof(struct kvm_book3e_206_tlb_entry
) == sizeof(ppcmas_tlb_t
));
183 env
->tlb_dirty
= true;
185 cfg
.array
= (uintptr_t)env
->tlb
.tlbm
;
186 cfg
.array_len
= sizeof(ppcmas_tlb_t
) * entries
;
187 cfg
.params
= (uintptr_t)¶ms
;
188 cfg
.mmu_type
= KVM_MMU_FSL_BOOKE_NOHV
;
190 ret
= kvm_vcpu_enable_cap(cs
, KVM_CAP_SW_TLB
, 0, (uintptr_t)&cfg
);
192 fprintf(stderr
, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
193 __func__
, strerror(-ret
));
197 env
->kvm_sw_tlb
= true;
202 #if defined(TARGET_PPC64)
203 static void kvm_get_fallback_smmu_info(PowerPCCPU
*cpu
,
204 struct kvm_ppc_smmu_info
*info
)
206 CPUPPCState
*env
= &cpu
->env
;
207 CPUState
*cs
= CPU(cpu
);
209 memset(info
, 0, sizeof(*info
));
211 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
212 * need to "guess" what the supported page sizes are.
214 * For that to work we make a few assumptions:
216 * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
217 * KVM which only supports 4K and 16M pages, but supports them
218 * regardless of the backing store characteritics. We also don't
219 * support 1T segments.
221 * This is safe as if HV KVM ever supports that capability or PR
222 * KVM grows supports for more page/segment sizes, those versions
223 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
224 * will not hit this fallback
226 * - Else we are running HV KVM. This means we only support page
227 * sizes that fit in the backing store. Additionally we only
228 * advertize 64K pages if the processor is ARCH 2.06 and we assume
229 * P7 encodings for the SLB and hash table. Here too, we assume
230 * support for any newer processor will mean a kernel that
231 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
234 if (kvm_check_extension(cs
->kvm_state
, KVM_CAP_PPC_GET_PVINFO
)) {
239 /* Standard 4k base page size segment */
240 info
->sps
[0].page_shift
= 12;
241 info
->sps
[0].slb_enc
= 0;
242 info
->sps
[0].enc
[0].page_shift
= 12;
243 info
->sps
[0].enc
[0].pte_enc
= 0;
245 /* Standard 16M large page size segment */
246 info
->sps
[1].page_shift
= 24;
247 info
->sps
[1].slb_enc
= SLB_VSID_L
;
248 info
->sps
[1].enc
[0].page_shift
= 24;
249 info
->sps
[1].enc
[0].pte_enc
= 0;
253 /* HV KVM has backing store size restrictions */
254 info
->flags
= KVM_PPC_PAGE_SIZES_REAL
;
256 if (env
->mmu_model
& POWERPC_MMU_1TSEG
) {
257 info
->flags
|= KVM_PPC_1T_SEGMENTS
;
260 if (env
->mmu_model
== POWERPC_MMU_2_06
) {
266 /* Standard 4k base page size segment */
267 info
->sps
[i
].page_shift
= 12;
268 info
->sps
[i
].slb_enc
= 0;
269 info
->sps
[i
].enc
[0].page_shift
= 12;
270 info
->sps
[i
].enc
[0].pte_enc
= 0;
273 /* 64K on MMU 2.06 */
274 if (env
->mmu_model
== POWERPC_MMU_2_06
) {
275 info
->sps
[i
].page_shift
= 16;
276 info
->sps
[i
].slb_enc
= 0x110;
277 info
->sps
[i
].enc
[0].page_shift
= 16;
278 info
->sps
[i
].enc
[0].pte_enc
= 1;
282 /* Standard 16M large page size segment */
283 info
->sps
[i
].page_shift
= 24;
284 info
->sps
[i
].slb_enc
= SLB_VSID_L
;
285 info
->sps
[i
].enc
[0].page_shift
= 24;
286 info
->sps
[i
].enc
[0].pte_enc
= 0;
290 static void kvm_get_smmu_info(PowerPCCPU
*cpu
, struct kvm_ppc_smmu_info
*info
)
292 CPUState
*cs
= CPU(cpu
);
295 if (kvm_check_extension(cs
->kvm_state
, KVM_CAP_PPC_GET_SMMU_INFO
)) {
296 ret
= kvm_vm_ioctl(cs
->kvm_state
, KVM_PPC_GET_SMMU_INFO
, info
);
302 kvm_get_fallback_smmu_info(cpu
, info
);
305 static long getrampagesize(void)
311 /* guest RAM is backed by normal anonymous pages */
312 return getpagesize();
316 ret
= statfs(mem_path
, &fs
);
317 } while (ret
!= 0 && errno
== EINTR
);
320 fprintf(stderr
, "Couldn't statfs() memory path: %s\n",
325 #define HUGETLBFS_MAGIC 0x958458f6
327 if (fs
.f_type
!= HUGETLBFS_MAGIC
) {
328 /* Explicit mempath, but it's ordinary pages */
329 return getpagesize();
332 /* It's hugepage, return the huge page size */
336 static bool kvm_valid_page_size(uint32_t flags
, long rampgsize
, uint32_t shift
)
338 if (!(flags
& KVM_PPC_PAGE_SIZES_REAL
)) {
342 return (1ul << shift
) <= rampgsize
;
345 static void kvm_fixup_page_sizes(PowerPCCPU
*cpu
)
347 static struct kvm_ppc_smmu_info smmu_info
;
348 static bool has_smmu_info
;
349 CPUPPCState
*env
= &cpu
->env
;
353 /* We only handle page sizes for 64-bit server guests for now */
354 if (!(env
->mmu_model
& POWERPC_MMU_64
)) {
358 /* Collect MMU info from kernel if not already */
359 if (!has_smmu_info
) {
360 kvm_get_smmu_info(cpu
, &smmu_info
);
361 has_smmu_info
= true;
364 rampagesize
= getrampagesize();
366 /* Convert to QEMU form */
367 memset(&env
->sps
, 0, sizeof(env
->sps
));
370 * XXX This loop should be an entry wide AND of the capabilities that
371 * the selected CPU has with the capabilities that KVM supports.
373 for (ik
= iq
= 0; ik
< KVM_PPC_PAGE_SIZES_MAX_SZ
; ik
++) {
374 struct ppc_one_seg_page_size
*qsps
= &env
->sps
.sps
[iq
];
375 struct kvm_ppc_one_seg_page_size
*ksps
= &smmu_info
.sps
[ik
];
377 if (!kvm_valid_page_size(smmu_info
.flags
, rampagesize
,
381 qsps
->page_shift
= ksps
->page_shift
;
382 qsps
->slb_enc
= ksps
->slb_enc
;
383 for (jk
= jq
= 0; jk
< KVM_PPC_PAGE_SIZES_MAX_SZ
; jk
++) {
384 if (!kvm_valid_page_size(smmu_info
.flags
, rampagesize
,
385 ksps
->enc
[jk
].page_shift
)) {
388 qsps
->enc
[jq
].page_shift
= ksps
->enc
[jk
].page_shift
;
389 qsps
->enc
[jq
].pte_enc
= ksps
->enc
[jk
].pte_enc
;
390 if (++jq
>= PPC_PAGE_SIZES_MAX_SZ
) {
394 if (++iq
>= PPC_PAGE_SIZES_MAX_SZ
) {
398 env
->slb_nr
= smmu_info
.slb_size
;
399 if (!(smmu_info
.flags
& KVM_PPC_1T_SEGMENTS
)) {
400 env
->mmu_model
&= ~POWERPC_MMU_1TSEG
;
403 #else /* defined (TARGET_PPC64) */
405 static inline void kvm_fixup_page_sizes(PowerPCCPU
*cpu
)
409 #endif /* !defined (TARGET_PPC64) */
411 unsigned long kvm_arch_vcpu_id(CPUState
*cpu
)
413 return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu
));
416 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
417 * book3s supports only 1 watchpoint, so array size
418 * of 4 is sufficient for now.
420 #define MAX_HW_BKPTS 4
422 static struct HWBreakpoint
{
425 } hw_debug_points
[MAX_HW_BKPTS
];
427 static CPUWatchpoint hw_watchpoint
;
429 /* Default there is no breakpoint and watchpoint supported */
430 static int max_hw_breakpoint
;
431 static int max_hw_watchpoint
;
432 static int nb_hw_breakpoint
;
433 static int nb_hw_watchpoint
;
435 static void kvmppc_hw_debug_points_init(CPUPPCState
*cenv
)
437 if (cenv
->excp_model
== POWERPC_EXCP_BOOKE
) {
438 max_hw_breakpoint
= 2;
439 max_hw_watchpoint
= 2;
442 if ((max_hw_breakpoint
+ max_hw_watchpoint
) > MAX_HW_BKPTS
) {
443 fprintf(stderr
, "Error initializing h/w breakpoints\n");
448 int kvm_arch_init_vcpu(CPUState
*cs
)
450 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
451 CPUPPCState
*cenv
= &cpu
->env
;
454 /* Gather server mmu info from KVM and update the CPU state */
455 kvm_fixup_page_sizes(cpu
);
457 /* Synchronize sregs with kvm */
458 ret
= kvm_arch_sync_sregs(cpu
);
463 idle_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL
, kvm_kick_cpu
, cpu
);
465 /* Some targets support access to KVM's guest TLB. */
466 switch (cenv
->mmu_model
) {
467 case POWERPC_MMU_BOOKE206
:
468 ret
= kvm_booke206_tlb_init(cpu
);
474 kvm_get_one_reg(cs
, KVM_REG_PPC_DEBUG_INST
, &debug_inst_opcode
);
475 kvmppc_hw_debug_points_init(cenv
);
480 static void kvm_sw_tlb_put(PowerPCCPU
*cpu
)
482 CPUPPCState
*env
= &cpu
->env
;
483 CPUState
*cs
= CPU(cpu
);
484 struct kvm_dirty_tlb dirty_tlb
;
485 unsigned char *bitmap
;
488 if (!env
->kvm_sw_tlb
) {
492 bitmap
= g_malloc((env
->nb_tlb
+ 7) / 8);
493 memset(bitmap
, 0xFF, (env
->nb_tlb
+ 7) / 8);
495 dirty_tlb
.bitmap
= (uintptr_t)bitmap
;
496 dirty_tlb
.num_dirty
= env
->nb_tlb
;
498 ret
= kvm_vcpu_ioctl(cs
, KVM_DIRTY_TLB
, &dirty_tlb
);
500 fprintf(stderr
, "%s: KVM_DIRTY_TLB: %s\n",
501 __func__
, strerror(-ret
));
507 static void kvm_get_one_spr(CPUState
*cs
, uint64_t id
, int spr
)
509 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
510 CPUPPCState
*env
= &cpu
->env
;
515 struct kvm_one_reg reg
= {
517 .addr
= (uintptr_t) &val
,
521 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
523 trace_kvm_failed_spr_get(spr
, strerror(errno
));
525 switch (id
& KVM_REG_SIZE_MASK
) {
526 case KVM_REG_SIZE_U32
:
527 env
->spr
[spr
] = val
.u32
;
530 case KVM_REG_SIZE_U64
:
531 env
->spr
[spr
] = val
.u64
;
535 /* Don't handle this size yet */
541 static void kvm_put_one_spr(CPUState
*cs
, uint64_t id
, int spr
)
543 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
544 CPUPPCState
*env
= &cpu
->env
;
549 struct kvm_one_reg reg
= {
551 .addr
= (uintptr_t) &val
,
555 switch (id
& KVM_REG_SIZE_MASK
) {
556 case KVM_REG_SIZE_U32
:
557 val
.u32
= env
->spr
[spr
];
560 case KVM_REG_SIZE_U64
:
561 val
.u64
= env
->spr
[spr
];
565 /* Don't handle this size yet */
569 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
571 trace_kvm_failed_spr_set(spr
, strerror(errno
));
575 static int kvm_put_fp(CPUState
*cs
)
577 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
578 CPUPPCState
*env
= &cpu
->env
;
579 struct kvm_one_reg reg
;
583 if (env
->insns_flags
& PPC_FLOAT
) {
584 uint64_t fpscr
= env
->fpscr
;
585 bool vsx
= !!(env
->insns_flags2
& PPC2_VSX
);
587 reg
.id
= KVM_REG_PPC_FPSCR
;
588 reg
.addr
= (uintptr_t)&fpscr
;
589 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
591 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno
));
595 for (i
= 0; i
< 32; i
++) {
598 vsr
[0] = float64_val(env
->fpr
[i
]);
599 vsr
[1] = env
->vsr
[i
];
600 reg
.addr
= (uintptr_t) &vsr
;
601 reg
.id
= vsx
? KVM_REG_PPC_VSR(i
) : KVM_REG_PPC_FPR(i
);
603 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
605 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx
? "VSR" : "FPR",
612 if (env
->insns_flags
& PPC_ALTIVEC
) {
613 reg
.id
= KVM_REG_PPC_VSCR
;
614 reg
.addr
= (uintptr_t)&env
->vscr
;
615 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
617 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno
));
621 for (i
= 0; i
< 32; i
++) {
622 reg
.id
= KVM_REG_PPC_VR(i
);
623 reg
.addr
= (uintptr_t)&env
->avr
[i
];
624 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
626 DPRINTF("Unable to set VR%d to KVM: %s\n", i
, strerror(errno
));
635 static int kvm_get_fp(CPUState
*cs
)
637 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
638 CPUPPCState
*env
= &cpu
->env
;
639 struct kvm_one_reg reg
;
643 if (env
->insns_flags
& PPC_FLOAT
) {
645 bool vsx
= !!(env
->insns_flags2
& PPC2_VSX
);
647 reg
.id
= KVM_REG_PPC_FPSCR
;
648 reg
.addr
= (uintptr_t)&fpscr
;
649 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
651 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno
));
657 for (i
= 0; i
< 32; i
++) {
660 reg
.addr
= (uintptr_t) &vsr
;
661 reg
.id
= vsx
? KVM_REG_PPC_VSR(i
) : KVM_REG_PPC_FPR(i
);
663 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
665 DPRINTF("Unable to get %s%d from KVM: %s\n",
666 vsx
? "VSR" : "FPR", i
, strerror(errno
));
669 env
->fpr
[i
] = vsr
[0];
671 env
->vsr
[i
] = vsr
[1];
677 if (env
->insns_flags
& PPC_ALTIVEC
) {
678 reg
.id
= KVM_REG_PPC_VSCR
;
679 reg
.addr
= (uintptr_t)&env
->vscr
;
680 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
682 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno
));
686 for (i
= 0; i
< 32; i
++) {
687 reg
.id
= KVM_REG_PPC_VR(i
);
688 reg
.addr
= (uintptr_t)&env
->avr
[i
];
689 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
691 DPRINTF("Unable to get VR%d from KVM: %s\n",
701 #if defined(TARGET_PPC64)
702 static int kvm_get_vpa(CPUState
*cs
)
704 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
705 CPUPPCState
*env
= &cpu
->env
;
706 struct kvm_one_reg reg
;
709 reg
.id
= KVM_REG_PPC_VPA_ADDR
;
710 reg
.addr
= (uintptr_t)&env
->vpa_addr
;
711 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
713 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno
));
717 assert((uintptr_t)&env
->slb_shadow_size
718 == ((uintptr_t)&env
->slb_shadow_addr
+ 8));
719 reg
.id
= KVM_REG_PPC_VPA_SLB
;
720 reg
.addr
= (uintptr_t)&env
->slb_shadow_addr
;
721 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
723 DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
728 assert((uintptr_t)&env
->dtl_size
== ((uintptr_t)&env
->dtl_addr
+ 8));
729 reg
.id
= KVM_REG_PPC_VPA_DTL
;
730 reg
.addr
= (uintptr_t)&env
->dtl_addr
;
731 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
733 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
741 static int kvm_put_vpa(CPUState
*cs
)
743 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
744 CPUPPCState
*env
= &cpu
->env
;
745 struct kvm_one_reg reg
;
748 /* SLB shadow or DTL can't be registered unless a master VPA is
749 * registered. That means when restoring state, if a VPA *is*
750 * registered, we need to set that up first. If not, we need to
751 * deregister the others before deregistering the master VPA */
752 assert(env
->vpa_addr
|| !(env
->slb_shadow_addr
|| env
->dtl_addr
));
755 reg
.id
= KVM_REG_PPC_VPA_ADDR
;
756 reg
.addr
= (uintptr_t)&env
->vpa_addr
;
757 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
759 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno
));
764 assert((uintptr_t)&env
->slb_shadow_size
765 == ((uintptr_t)&env
->slb_shadow_addr
+ 8));
766 reg
.id
= KVM_REG_PPC_VPA_SLB
;
767 reg
.addr
= (uintptr_t)&env
->slb_shadow_addr
;
768 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
770 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno
));
774 assert((uintptr_t)&env
->dtl_size
== ((uintptr_t)&env
->dtl_addr
+ 8));
775 reg
.id
= KVM_REG_PPC_VPA_DTL
;
776 reg
.addr
= (uintptr_t)&env
->dtl_addr
;
777 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
779 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
784 if (!env
->vpa_addr
) {
785 reg
.id
= KVM_REG_PPC_VPA_ADDR
;
786 reg
.addr
= (uintptr_t)&env
->vpa_addr
;
787 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
789 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno
));
796 #endif /* TARGET_PPC64 */
798 int kvm_arch_put_registers(CPUState
*cs
, int level
)
800 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
801 CPUPPCState
*env
= &cpu
->env
;
802 struct kvm_regs regs
;
806 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_REGS
, ®s
);
813 regs
.xer
= cpu_read_xer(env
);
817 regs
.srr0
= env
->spr
[SPR_SRR0
];
818 regs
.srr1
= env
->spr
[SPR_SRR1
];
820 regs
.sprg0
= env
->spr
[SPR_SPRG0
];
821 regs
.sprg1
= env
->spr
[SPR_SPRG1
];
822 regs
.sprg2
= env
->spr
[SPR_SPRG2
];
823 regs
.sprg3
= env
->spr
[SPR_SPRG3
];
824 regs
.sprg4
= env
->spr
[SPR_SPRG4
];
825 regs
.sprg5
= env
->spr
[SPR_SPRG5
];
826 regs
.sprg6
= env
->spr
[SPR_SPRG6
];
827 regs
.sprg7
= env
->spr
[SPR_SPRG7
];
829 regs
.pid
= env
->spr
[SPR_BOOKE_PID
];
831 for (i
= 0;i
< 32; i
++)
832 regs
.gpr
[i
] = env
->gpr
[i
];
835 for (i
= 0; i
< 8; i
++) {
836 regs
.cr
|= (env
->crf
[i
] & 15) << (4 * (7 - i
));
839 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_REGS
, ®s
);
845 if (env
->tlb_dirty
) {
847 env
->tlb_dirty
= false;
850 if (cap_segstate
&& (level
>= KVM_PUT_RESET_STATE
)) {
851 struct kvm_sregs sregs
;
853 sregs
.pvr
= env
->spr
[SPR_PVR
];
855 sregs
.u
.s
.sdr1
= env
->spr
[SPR_SDR1
];
859 for (i
= 0; i
< ARRAY_SIZE(env
->slb
); i
++) {
860 sregs
.u
.s
.ppc64
.slb
[i
].slbe
= env
->slb
[i
].esid
;
861 if (env
->slb
[i
].esid
& SLB_ESID_V
) {
862 sregs
.u
.s
.ppc64
.slb
[i
].slbe
|= i
;
864 sregs
.u
.s
.ppc64
.slb
[i
].slbv
= env
->slb
[i
].vsid
;
869 for (i
= 0; i
< 16; i
++) {
870 sregs
.u
.s
.ppc32
.sr
[i
] = env
->sr
[i
];
874 for (i
= 0; i
< 8; i
++) {
875 /* Beware. We have to swap upper and lower bits here */
876 sregs
.u
.s
.ppc32
.dbat
[i
] = ((uint64_t)env
->DBAT
[0][i
] << 32)
878 sregs
.u
.s
.ppc32
.ibat
[i
] = ((uint64_t)env
->IBAT
[0][i
] << 32)
882 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_SREGS
, &sregs
);
888 if (cap_hior
&& (level
>= KVM_PUT_RESET_STATE
)) {
889 kvm_put_one_spr(cs
, KVM_REG_PPC_HIOR
, SPR_HIOR
);
895 /* We deliberately ignore errors here, for kernels which have
896 * the ONE_REG calls, but don't support the specific
897 * registers, there's a reasonable chance things will still
898 * work, at least until we try to migrate. */
899 for (i
= 0; i
< 1024; i
++) {
900 uint64_t id
= env
->spr_cb
[i
].one_reg_id
;
903 kvm_put_one_spr(cs
, id
, i
);
909 for (i
= 0; i
< ARRAY_SIZE(env
->tm_gpr
); i
++) {
910 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_GPR(i
), &env
->tm_gpr
[i
]);
912 for (i
= 0; i
< ARRAY_SIZE(env
->tm_vsr
); i
++) {
913 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_VSR(i
), &env
->tm_vsr
[i
]);
915 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_CR
, &env
->tm_cr
);
916 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_LR
, &env
->tm_lr
);
917 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_CTR
, &env
->tm_ctr
);
918 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_FPSCR
, &env
->tm_fpscr
);
919 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_AMR
, &env
->tm_amr
);
920 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_PPR
, &env
->tm_ppr
);
921 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_VRSAVE
, &env
->tm_vrsave
);
922 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_VSCR
, &env
->tm_vscr
);
923 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_DSCR
, &env
->tm_dscr
);
924 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_TAR
, &env
->tm_tar
);
928 if (kvm_put_vpa(cs
) < 0) {
929 DPRINTF("Warning: Unable to set VPA information to KVM\n");
933 kvm_set_one_reg(cs
, KVM_REG_PPC_TB_OFFSET
, &env
->tb_env
->tb_offset
);
934 #endif /* TARGET_PPC64 */
940 static void kvm_sync_excp(CPUPPCState
*env
, int vector
, int ivor
)
942 env
->excp_vectors
[vector
] = env
->spr
[ivor
] + env
->spr
[SPR_BOOKE_IVPR
];
945 int kvm_arch_get_registers(CPUState
*cs
)
947 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
948 CPUPPCState
*env
= &cpu
->env
;
949 struct kvm_regs regs
;
950 struct kvm_sregs sregs
;
954 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_REGS
, ®s
);
959 for (i
= 7; i
>= 0; i
--) {
960 env
->crf
[i
] = cr
& 15;
966 cpu_write_xer(env
, regs
.xer
);
970 env
->spr
[SPR_SRR0
] = regs
.srr0
;
971 env
->spr
[SPR_SRR1
] = regs
.srr1
;
973 env
->spr
[SPR_SPRG0
] = regs
.sprg0
;
974 env
->spr
[SPR_SPRG1
] = regs
.sprg1
;
975 env
->spr
[SPR_SPRG2
] = regs
.sprg2
;
976 env
->spr
[SPR_SPRG3
] = regs
.sprg3
;
977 env
->spr
[SPR_SPRG4
] = regs
.sprg4
;
978 env
->spr
[SPR_SPRG5
] = regs
.sprg5
;
979 env
->spr
[SPR_SPRG6
] = regs
.sprg6
;
980 env
->spr
[SPR_SPRG7
] = regs
.sprg7
;
982 env
->spr
[SPR_BOOKE_PID
] = regs
.pid
;
984 for (i
= 0;i
< 32; i
++)
985 env
->gpr
[i
] = regs
.gpr
[i
];
989 if (cap_booke_sregs
) {
990 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_SREGS
, &sregs
);
995 if (sregs
.u
.e
.features
& KVM_SREGS_E_BASE
) {
996 env
->spr
[SPR_BOOKE_CSRR0
] = sregs
.u
.e
.csrr0
;
997 env
->spr
[SPR_BOOKE_CSRR1
] = sregs
.u
.e
.csrr1
;
998 env
->spr
[SPR_BOOKE_ESR
] = sregs
.u
.e
.esr
;
999 env
->spr
[SPR_BOOKE_DEAR
] = sregs
.u
.e
.dear
;
1000 env
->spr
[SPR_BOOKE_MCSR
] = sregs
.u
.e
.mcsr
;
1001 env
->spr
[SPR_BOOKE_TSR
] = sregs
.u
.e
.tsr
;
1002 env
->spr
[SPR_BOOKE_TCR
] = sregs
.u
.e
.tcr
;
1003 env
->spr
[SPR_DECR
] = sregs
.u
.e
.dec
;
1004 env
->spr
[SPR_TBL
] = sregs
.u
.e
.tb
& 0xffffffff;
1005 env
->spr
[SPR_TBU
] = sregs
.u
.e
.tb
>> 32;
1006 env
->spr
[SPR_VRSAVE
] = sregs
.u
.e
.vrsave
;
1009 if (sregs
.u
.e
.features
& KVM_SREGS_E_ARCH206
) {
1010 env
->spr
[SPR_BOOKE_PIR
] = sregs
.u
.e
.pir
;
1011 env
->spr
[SPR_BOOKE_MCSRR0
] = sregs
.u
.e
.mcsrr0
;
1012 env
->spr
[SPR_BOOKE_MCSRR1
] = sregs
.u
.e
.mcsrr1
;
1013 env
->spr
[SPR_BOOKE_DECAR
] = sregs
.u
.e
.decar
;
1014 env
->spr
[SPR_BOOKE_IVPR
] = sregs
.u
.e
.ivpr
;
1017 if (sregs
.u
.e
.features
& KVM_SREGS_E_64
) {
1018 env
->spr
[SPR_BOOKE_EPCR
] = sregs
.u
.e
.epcr
;
1021 if (sregs
.u
.e
.features
& KVM_SREGS_E_SPRG8
) {
1022 env
->spr
[SPR_BOOKE_SPRG8
] = sregs
.u
.e
.sprg8
;
1025 if (sregs
.u
.e
.features
& KVM_SREGS_E_IVOR
) {
1026 env
->spr
[SPR_BOOKE_IVOR0
] = sregs
.u
.e
.ivor_low
[0];
1027 kvm_sync_excp(env
, POWERPC_EXCP_CRITICAL
, SPR_BOOKE_IVOR0
);
1028 env
->spr
[SPR_BOOKE_IVOR1
] = sregs
.u
.e
.ivor_low
[1];
1029 kvm_sync_excp(env
, POWERPC_EXCP_MCHECK
, SPR_BOOKE_IVOR1
);
1030 env
->spr
[SPR_BOOKE_IVOR2
] = sregs
.u
.e
.ivor_low
[2];
1031 kvm_sync_excp(env
, POWERPC_EXCP_DSI
, SPR_BOOKE_IVOR2
);
1032 env
->spr
[SPR_BOOKE_IVOR3
] = sregs
.u
.e
.ivor_low
[3];
1033 kvm_sync_excp(env
, POWERPC_EXCP_ISI
, SPR_BOOKE_IVOR3
);
1034 env
->spr
[SPR_BOOKE_IVOR4
] = sregs
.u
.e
.ivor_low
[4];
1035 kvm_sync_excp(env
, POWERPC_EXCP_EXTERNAL
, SPR_BOOKE_IVOR4
);
1036 env
->spr
[SPR_BOOKE_IVOR5
] = sregs
.u
.e
.ivor_low
[5];
1037 kvm_sync_excp(env
, POWERPC_EXCP_ALIGN
, SPR_BOOKE_IVOR5
);
1038 env
->spr
[SPR_BOOKE_IVOR6
] = sregs
.u
.e
.ivor_low
[6];
1039 kvm_sync_excp(env
, POWERPC_EXCP_PROGRAM
, SPR_BOOKE_IVOR6
);
1040 env
->spr
[SPR_BOOKE_IVOR7
] = sregs
.u
.e
.ivor_low
[7];
1041 kvm_sync_excp(env
, POWERPC_EXCP_FPU
, SPR_BOOKE_IVOR7
);
1042 env
->spr
[SPR_BOOKE_IVOR8
] = sregs
.u
.e
.ivor_low
[8];
1043 kvm_sync_excp(env
, POWERPC_EXCP_SYSCALL
, SPR_BOOKE_IVOR8
);
1044 env
->spr
[SPR_BOOKE_IVOR9
] = sregs
.u
.e
.ivor_low
[9];
1045 kvm_sync_excp(env
, POWERPC_EXCP_APU
, SPR_BOOKE_IVOR9
);
1046 env
->spr
[SPR_BOOKE_IVOR10
] = sregs
.u
.e
.ivor_low
[10];
1047 kvm_sync_excp(env
, POWERPC_EXCP_DECR
, SPR_BOOKE_IVOR10
);
1048 env
->spr
[SPR_BOOKE_IVOR11
] = sregs
.u
.e
.ivor_low
[11];
1049 kvm_sync_excp(env
, POWERPC_EXCP_FIT
, SPR_BOOKE_IVOR11
);
1050 env
->spr
[SPR_BOOKE_IVOR12
] = sregs
.u
.e
.ivor_low
[12];
1051 kvm_sync_excp(env
, POWERPC_EXCP_WDT
, SPR_BOOKE_IVOR12
);
1052 env
->spr
[SPR_BOOKE_IVOR13
] = sregs
.u
.e
.ivor_low
[13];
1053 kvm_sync_excp(env
, POWERPC_EXCP_DTLB
, SPR_BOOKE_IVOR13
);
1054 env
->spr
[SPR_BOOKE_IVOR14
] = sregs
.u
.e
.ivor_low
[14];
1055 kvm_sync_excp(env
, POWERPC_EXCP_ITLB
, SPR_BOOKE_IVOR14
);
1056 env
->spr
[SPR_BOOKE_IVOR15
] = sregs
.u
.e
.ivor_low
[15];
1057 kvm_sync_excp(env
, POWERPC_EXCP_DEBUG
, SPR_BOOKE_IVOR15
);
1059 if (sregs
.u
.e
.features
& KVM_SREGS_E_SPE
) {
1060 env
->spr
[SPR_BOOKE_IVOR32
] = sregs
.u
.e
.ivor_high
[0];
1061 kvm_sync_excp(env
, POWERPC_EXCP_SPEU
, SPR_BOOKE_IVOR32
);
1062 env
->spr
[SPR_BOOKE_IVOR33
] = sregs
.u
.e
.ivor_high
[1];
1063 kvm_sync_excp(env
, POWERPC_EXCP_EFPDI
, SPR_BOOKE_IVOR33
);
1064 env
->spr
[SPR_BOOKE_IVOR34
] = sregs
.u
.e
.ivor_high
[2];
1065 kvm_sync_excp(env
, POWERPC_EXCP_EFPRI
, SPR_BOOKE_IVOR34
);
1068 if (sregs
.u
.e
.features
& KVM_SREGS_E_PM
) {
1069 env
->spr
[SPR_BOOKE_IVOR35
] = sregs
.u
.e
.ivor_high
[3];
1070 kvm_sync_excp(env
, POWERPC_EXCP_EPERFM
, SPR_BOOKE_IVOR35
);
1073 if (sregs
.u
.e
.features
& KVM_SREGS_E_PC
) {
1074 env
->spr
[SPR_BOOKE_IVOR36
] = sregs
.u
.e
.ivor_high
[4];
1075 kvm_sync_excp(env
, POWERPC_EXCP_DOORI
, SPR_BOOKE_IVOR36
);
1076 env
->spr
[SPR_BOOKE_IVOR37
] = sregs
.u
.e
.ivor_high
[5];
1077 kvm_sync_excp(env
, POWERPC_EXCP_DOORCI
, SPR_BOOKE_IVOR37
);
1081 if (sregs
.u
.e
.features
& KVM_SREGS_E_ARCH206_MMU
) {
1082 env
->spr
[SPR_BOOKE_MAS0
] = sregs
.u
.e
.mas0
;
1083 env
->spr
[SPR_BOOKE_MAS1
] = sregs
.u
.e
.mas1
;
1084 env
->spr
[SPR_BOOKE_MAS2
] = sregs
.u
.e
.mas2
;
1085 env
->spr
[SPR_BOOKE_MAS3
] = sregs
.u
.e
.mas7_3
& 0xffffffff;
1086 env
->spr
[SPR_BOOKE_MAS4
] = sregs
.u
.e
.mas4
;
1087 env
->spr
[SPR_BOOKE_MAS6
] = sregs
.u
.e
.mas6
;
1088 env
->spr
[SPR_BOOKE_MAS7
] = sregs
.u
.e
.mas7_3
>> 32;
1089 env
->spr
[SPR_MMUCFG
] = sregs
.u
.e
.mmucfg
;
1090 env
->spr
[SPR_BOOKE_TLB0CFG
] = sregs
.u
.e
.tlbcfg
[0];
1091 env
->spr
[SPR_BOOKE_TLB1CFG
] = sregs
.u
.e
.tlbcfg
[1];
1094 if (sregs
.u
.e
.features
& KVM_SREGS_EXP
) {
1095 env
->spr
[SPR_BOOKE_EPR
] = sregs
.u
.e
.epr
;
1098 if (sregs
.u
.e
.features
& KVM_SREGS_E_PD
) {
1099 env
->spr
[SPR_BOOKE_EPLC
] = sregs
.u
.e
.eplc
;
1100 env
->spr
[SPR_BOOKE_EPSC
] = sregs
.u
.e
.epsc
;
1103 if (sregs
.u
.e
.impl_id
== KVM_SREGS_E_IMPL_FSL
) {
1104 env
->spr
[SPR_E500_SVR
] = sregs
.u
.e
.impl
.fsl
.svr
;
1105 env
->spr
[SPR_Exxx_MCAR
] = sregs
.u
.e
.impl
.fsl
.mcar
;
1106 env
->spr
[SPR_HID0
] = sregs
.u
.e
.impl
.fsl
.hid0
;
1108 if (sregs
.u
.e
.impl
.fsl
.features
& KVM_SREGS_E_FSL_PIDn
) {
1109 env
->spr
[SPR_BOOKE_PID1
] = sregs
.u
.e
.impl
.fsl
.pid1
;
1110 env
->spr
[SPR_BOOKE_PID2
] = sregs
.u
.e
.impl
.fsl
.pid2
;
1116 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_SREGS
, &sregs
);
1121 if (!env
->external_htab
) {
1122 ppc_store_sdr1(env
, sregs
.u
.s
.sdr1
);
1128 * The packed SLB array we get from KVM_GET_SREGS only contains
1129 * information about valid entries. So we flush our internal
1130 * copy to get rid of stale ones, then put all valid SLB entries
1133 memset(env
->slb
, 0, sizeof(env
->slb
));
1134 for (i
= 0; i
< ARRAY_SIZE(env
->slb
); i
++) {
1135 target_ulong rb
= sregs
.u
.s
.ppc64
.slb
[i
].slbe
;
1136 target_ulong rs
= sregs
.u
.s
.ppc64
.slb
[i
].slbv
;
1138 * Only restore valid entries
1140 if (rb
& SLB_ESID_V
) {
1141 ppc_store_slb(env
, rb
, rs
);
1147 for (i
= 0; i
< 16; i
++) {
1148 env
->sr
[i
] = sregs
.u
.s
.ppc32
.sr
[i
];
1152 for (i
= 0; i
< 8; i
++) {
1153 env
->DBAT
[0][i
] = sregs
.u
.s
.ppc32
.dbat
[i
] & 0xffffffff;
1154 env
->DBAT
[1][i
] = sregs
.u
.s
.ppc32
.dbat
[i
] >> 32;
1155 env
->IBAT
[0][i
] = sregs
.u
.s
.ppc32
.ibat
[i
] & 0xffffffff;
1156 env
->IBAT
[1][i
] = sregs
.u
.s
.ppc32
.ibat
[i
] >> 32;
1161 kvm_get_one_spr(cs
, KVM_REG_PPC_HIOR
, SPR_HIOR
);
1167 /* We deliberately ignore errors here, for kernels which have
1168 * the ONE_REG calls, but don't support the specific
1169 * registers, there's a reasonable chance things will still
1170 * work, at least until we try to migrate. */
1171 for (i
= 0; i
< 1024; i
++) {
1172 uint64_t id
= env
->spr_cb
[i
].one_reg_id
;
1175 kvm_get_one_spr(cs
, id
, i
);
1181 for (i
= 0; i
< ARRAY_SIZE(env
->tm_gpr
); i
++) {
1182 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_GPR(i
), &env
->tm_gpr
[i
]);
1184 for (i
= 0; i
< ARRAY_SIZE(env
->tm_vsr
); i
++) {
1185 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_VSR(i
), &env
->tm_vsr
[i
]);
1187 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_CR
, &env
->tm_cr
);
1188 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_LR
, &env
->tm_lr
);
1189 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_CTR
, &env
->tm_ctr
);
1190 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_FPSCR
, &env
->tm_fpscr
);
1191 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_AMR
, &env
->tm_amr
);
1192 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_PPR
, &env
->tm_ppr
);
1193 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_VRSAVE
, &env
->tm_vrsave
);
1194 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_VSCR
, &env
->tm_vscr
);
1195 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_DSCR
, &env
->tm_dscr
);
1196 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_TAR
, &env
->tm_tar
);
1200 if (kvm_get_vpa(cs
) < 0) {
1201 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1205 kvm_get_one_reg(cs
, KVM_REG_PPC_TB_OFFSET
, &env
->tb_env
->tb_offset
);
1212 int kvmppc_set_interrupt(PowerPCCPU
*cpu
, int irq
, int level
)
1214 unsigned virq
= level
? KVM_INTERRUPT_SET_LEVEL
: KVM_INTERRUPT_UNSET
;
1216 if (irq
!= PPC_INTERRUPT_EXT
) {
1220 if (!kvm_enabled() || !cap_interrupt_unset
|| !cap_interrupt_level
) {
1224 kvm_vcpu_ioctl(CPU(cpu
), KVM_INTERRUPT
, &virq
);
1229 #if defined(TARGET_PPCEMB)
1230 #define PPC_INPUT_INT PPC40x_INPUT_INT
1231 #elif defined(TARGET_PPC64)
1232 #define PPC_INPUT_INT PPC970_INPUT_INT
1234 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1237 void kvm_arch_pre_run(CPUState
*cs
, struct kvm_run
*run
)
1239 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
1240 CPUPPCState
*env
= &cpu
->env
;
1244 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1245 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1246 if (!cap_interrupt_level
&&
1247 run
->ready_for_interrupt_injection
&&
1248 (cs
->interrupt_request
& CPU_INTERRUPT_HARD
) &&
1249 (env
->irq_input_state
& (1<<PPC_INPUT_INT
)))
1251 /* For now KVM disregards the 'irq' argument. However, in the
1252 * future KVM could cache it in-kernel to avoid a heavyweight exit
1253 * when reading the UIC.
1255 irq
= KVM_INTERRUPT_SET
;
1257 DPRINTF("injected interrupt %d\n", irq
);
1258 r
= kvm_vcpu_ioctl(cs
, KVM_INTERRUPT
, &irq
);
1260 printf("cpu %d fail inject %x\n", cs
->cpu_index
, irq
);
1263 /* Always wake up soon in case the interrupt was level based */
1264 timer_mod(idle_timer
, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) +
1265 (get_ticks_per_sec() / 50));
1268 /* We don't know if there are more interrupts pending after this. However,
1269 * the guest will return to userspace in the course of handling this one
1270 * anyways, so we will get a chance to deliver the rest. */
1273 void kvm_arch_post_run(CPUState
*cpu
, struct kvm_run
*run
)
1277 int kvm_arch_process_async_events(CPUState
*cs
)
1282 static int kvmppc_handle_halt(PowerPCCPU
*cpu
)
1284 CPUState
*cs
= CPU(cpu
);
1285 CPUPPCState
*env
= &cpu
->env
;
1287 if (!(cs
->interrupt_request
& CPU_INTERRUPT_HARD
) && (msr_ee
)) {
1289 cs
->exception_index
= EXCP_HLT
;
1295 /* map dcr access to existing qemu dcr emulation */
1296 static int kvmppc_handle_dcr_read(CPUPPCState
*env
, uint32_t dcrn
, uint32_t *data
)
1298 if (ppc_dcr_read(env
->dcr_env
, dcrn
, data
) < 0)
1299 fprintf(stderr
, "Read to unhandled DCR (0x%x)\n", dcrn
);
1304 static int kvmppc_handle_dcr_write(CPUPPCState
*env
, uint32_t dcrn
, uint32_t data
)
1306 if (ppc_dcr_write(env
->dcr_env
, dcrn
, data
) < 0)
1307 fprintf(stderr
, "Write to unhandled DCR (0x%x)\n", dcrn
);
1312 int kvm_arch_insert_sw_breakpoint(CPUState
*cs
, struct kvm_sw_breakpoint
*bp
)
1314 /* Mixed endian case is not handled */
1315 uint32_t sc
= debug_inst_opcode
;
1317 if (cpu_memory_rw_debug(cs
, bp
->pc
, (uint8_t *)&bp
->saved_insn
,
1319 cpu_memory_rw_debug(cs
, bp
->pc
, (uint8_t *)&sc
, sizeof(sc
), 1)) {
1326 int kvm_arch_remove_sw_breakpoint(CPUState
*cs
, struct kvm_sw_breakpoint
*bp
)
1330 if (cpu_memory_rw_debug(cs
, bp
->pc
, (uint8_t *)&sc
, sizeof(sc
), 0) ||
1331 sc
!= debug_inst_opcode
||
1332 cpu_memory_rw_debug(cs
, bp
->pc
, (uint8_t *)&bp
->saved_insn
,
1340 static int find_hw_breakpoint(target_ulong addr
, int type
)
1344 assert((nb_hw_breakpoint
+ nb_hw_watchpoint
)
1345 <= ARRAY_SIZE(hw_debug_points
));
1347 for (n
= 0; n
< nb_hw_breakpoint
+ nb_hw_watchpoint
; n
++) {
1348 if (hw_debug_points
[n
].addr
== addr
&&
1349 hw_debug_points
[n
].type
== type
) {
1357 static int find_hw_watchpoint(target_ulong addr
, int *flag
)
1361 n
= find_hw_breakpoint(addr
, GDB_WATCHPOINT_ACCESS
);
1363 *flag
= BP_MEM_ACCESS
;
1367 n
= find_hw_breakpoint(addr
, GDB_WATCHPOINT_WRITE
);
1369 *flag
= BP_MEM_WRITE
;
1373 n
= find_hw_breakpoint(addr
, GDB_WATCHPOINT_READ
);
1375 *flag
= BP_MEM_READ
;
1382 int kvm_arch_insert_hw_breakpoint(target_ulong addr
,
1383 target_ulong len
, int type
)
1385 if ((nb_hw_breakpoint
+ nb_hw_watchpoint
) >= ARRAY_SIZE(hw_debug_points
)) {
1389 hw_debug_points
[nb_hw_breakpoint
+ nb_hw_watchpoint
].addr
= addr
;
1390 hw_debug_points
[nb_hw_breakpoint
+ nb_hw_watchpoint
].type
= type
;
1393 case GDB_BREAKPOINT_HW
:
1394 if (nb_hw_breakpoint
>= max_hw_breakpoint
) {
1398 if (find_hw_breakpoint(addr
, type
) >= 0) {
1405 case GDB_WATCHPOINT_WRITE
:
1406 case GDB_WATCHPOINT_READ
:
1407 case GDB_WATCHPOINT_ACCESS
:
1408 if (nb_hw_watchpoint
>= max_hw_watchpoint
) {
1412 if (find_hw_breakpoint(addr
, type
) >= 0) {
1426 int kvm_arch_remove_hw_breakpoint(target_ulong addr
,
1427 target_ulong len
, int type
)
1431 n
= find_hw_breakpoint(addr
, type
);
1437 case GDB_BREAKPOINT_HW
:
1441 case GDB_WATCHPOINT_WRITE
:
1442 case GDB_WATCHPOINT_READ
:
1443 case GDB_WATCHPOINT_ACCESS
:
1450 hw_debug_points
[n
] = hw_debug_points
[nb_hw_breakpoint
+ nb_hw_watchpoint
];
1455 void kvm_arch_remove_all_hw_breakpoints(void)
1457 nb_hw_breakpoint
= nb_hw_watchpoint
= 0;
1460 void kvm_arch_update_guest_debug(CPUState
*cs
, struct kvm_guest_debug
*dbg
)
1464 /* Software Breakpoint updates */
1465 if (kvm_sw_breakpoints_active(cs
)) {
1466 dbg
->control
|= KVM_GUESTDBG_ENABLE
| KVM_GUESTDBG_USE_SW_BP
;
1469 assert((nb_hw_breakpoint
+ nb_hw_watchpoint
)
1470 <= ARRAY_SIZE(hw_debug_points
));
1471 assert((nb_hw_breakpoint
+ nb_hw_watchpoint
) <= ARRAY_SIZE(dbg
->arch
.bp
));
1473 if (nb_hw_breakpoint
+ nb_hw_watchpoint
> 0) {
1474 dbg
->control
|= KVM_GUESTDBG_ENABLE
| KVM_GUESTDBG_USE_HW_BP
;
1475 memset(dbg
->arch
.bp
, 0, sizeof(dbg
->arch
.bp
));
1476 for (n
= 0; n
< nb_hw_breakpoint
+ nb_hw_watchpoint
; n
++) {
1477 switch (hw_debug_points
[n
].type
) {
1478 case GDB_BREAKPOINT_HW
:
1479 dbg
->arch
.bp
[n
].type
= KVMPPC_DEBUG_BREAKPOINT
;
1481 case GDB_WATCHPOINT_WRITE
:
1482 dbg
->arch
.bp
[n
].type
= KVMPPC_DEBUG_WATCH_WRITE
;
1484 case GDB_WATCHPOINT_READ
:
1485 dbg
->arch
.bp
[n
].type
= KVMPPC_DEBUG_WATCH_READ
;
1487 case GDB_WATCHPOINT_ACCESS
:
1488 dbg
->arch
.bp
[n
].type
= KVMPPC_DEBUG_WATCH_WRITE
|
1489 KVMPPC_DEBUG_WATCH_READ
;
1492 cpu_abort(cs
, "Unsupported breakpoint type\n");
1494 dbg
->arch
.bp
[n
].addr
= hw_debug_points
[n
].addr
;
1499 static int kvm_handle_debug(PowerPCCPU
*cpu
, struct kvm_run
*run
)
1501 CPUState
*cs
= CPU(cpu
);
1502 CPUPPCState
*env
= &cpu
->env
;
1503 struct kvm_debug_exit_arch
*arch_info
= &run
->debug
.arch
;
1508 if (cs
->singlestep_enabled
) {
1510 } else if (arch_info
->status
) {
1511 if (nb_hw_breakpoint
+ nb_hw_watchpoint
> 0) {
1512 if (arch_info
->status
& KVMPPC_DEBUG_BREAKPOINT
) {
1513 n
= find_hw_breakpoint(arch_info
->address
, GDB_BREAKPOINT_HW
);
1517 } else if (arch_info
->status
& (KVMPPC_DEBUG_WATCH_READ
|
1518 KVMPPC_DEBUG_WATCH_WRITE
)) {
1519 n
= find_hw_watchpoint(arch_info
->address
, &flag
);
1522 cs
->watchpoint_hit
= &hw_watchpoint
;
1523 hw_watchpoint
.vaddr
= hw_debug_points
[n
].addr
;
1524 hw_watchpoint
.flags
= flag
;
1528 } else if (kvm_find_sw_breakpoint(cs
, arch_info
->address
)) {
1531 /* QEMU is not able to handle debug exception, so inject
1532 * program exception to guest;
1533 * Yes program exception NOT debug exception !!
1534 * When QEMU is using debug resources then debug exception must
1535 * be always set. To achieve this we set MSR_DE and also set
1536 * MSRP_DEP so guest cannot change MSR_DE.
1537 * When emulating debug resource for guest we want guest
1538 * to control MSR_DE (enable/disable debug interrupt on need).
1539 * Supporting both configurations are NOT possible.
1540 * So the result is that we cannot share debug resources
1541 * between QEMU and Guest on BOOKE architecture.
1542 * In the current design QEMU gets the priority over guest,
1543 * this means that if QEMU is using debug resources then guest
1545 * For software breakpoint QEMU uses a privileged instruction;
1546 * So there cannot be any reason that we are here for guest
1547 * set debug exception, only possibility is guest executed a
1548 * privileged / illegal instruction and that's why we are
1549 * injecting a program interrupt.
1552 cpu_synchronize_state(cs
);
1553 /* env->nip is PC, so increment this by 4 to use
1554 * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1557 cs
->exception_index
= POWERPC_EXCP_PROGRAM
;
1558 env
->error_code
= POWERPC_EXCP_INVAL
;
1559 ppc_cpu_do_interrupt(cs
);
1565 int kvm_arch_handle_exit(CPUState
*cs
, struct kvm_run
*run
)
1567 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
1568 CPUPPCState
*env
= &cpu
->env
;
1571 switch (run
->exit_reason
) {
1573 if (run
->dcr
.is_write
) {
1574 DPRINTF("handle dcr write\n");
1575 ret
= kvmppc_handle_dcr_write(env
, run
->dcr
.dcrn
, run
->dcr
.data
);
1577 DPRINTF("handle dcr read\n");
1578 ret
= kvmppc_handle_dcr_read(env
, run
->dcr
.dcrn
, &run
->dcr
.data
);
1582 DPRINTF("handle halt\n");
1583 ret
= kvmppc_handle_halt(cpu
);
1585 #if defined(TARGET_PPC64)
1586 case KVM_EXIT_PAPR_HCALL
:
1587 DPRINTF("handle PAPR hypercall\n");
1588 run
->papr_hcall
.ret
= spapr_hypercall(cpu
,
1590 run
->papr_hcall
.args
);
1595 DPRINTF("handle epr\n");
1596 run
->epr
.epr
= ldl_phys(cs
->as
, env
->mpic_iack
);
1599 case KVM_EXIT_WATCHDOG
:
1600 DPRINTF("handle watchdog expiry\n");
1601 watchdog_perform_action();
1605 case KVM_EXIT_DEBUG
:
1606 DPRINTF("handle debug exception\n");
1607 if (kvm_handle_debug(cpu
, run
)) {
1611 /* re-enter, this exception was guest-internal */
1616 fprintf(stderr
, "KVM: unknown exit reason %d\n", run
->exit_reason
);
1624 int kvmppc_or_tsr_bits(PowerPCCPU
*cpu
, uint32_t tsr_bits
)
1626 CPUState
*cs
= CPU(cpu
);
1627 uint32_t bits
= tsr_bits
;
1628 struct kvm_one_reg reg
= {
1629 .id
= KVM_REG_PPC_OR_TSR
,
1630 .addr
= (uintptr_t) &bits
,
1633 return kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
1636 int kvmppc_clear_tsr_bits(PowerPCCPU
*cpu
, uint32_t tsr_bits
)
1639 CPUState
*cs
= CPU(cpu
);
1640 uint32_t bits
= tsr_bits
;
1641 struct kvm_one_reg reg
= {
1642 .id
= KVM_REG_PPC_CLEAR_TSR
,
1643 .addr
= (uintptr_t) &bits
,
1646 return kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
1649 int kvmppc_set_tcr(PowerPCCPU
*cpu
)
1651 CPUState
*cs
= CPU(cpu
);
1652 CPUPPCState
*env
= &cpu
->env
;
1653 uint32_t tcr
= env
->spr
[SPR_BOOKE_TCR
];
1655 struct kvm_one_reg reg
= {
1656 .id
= KVM_REG_PPC_TCR
,
1657 .addr
= (uintptr_t) &tcr
,
1660 return kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
1663 int kvmppc_booke_watchdog_enable(PowerPCCPU
*cpu
)
1665 CPUState
*cs
= CPU(cpu
);
1668 if (!kvm_enabled()) {
1672 if (!cap_ppc_watchdog
) {
1673 printf("warning: KVM does not support watchdog");
1677 ret
= kvm_vcpu_enable_cap(cs
, KVM_CAP_PPC_BOOKE_WATCHDOG
, 0);
1679 fprintf(stderr
, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1680 __func__
, strerror(-ret
));
1687 static int read_cpuinfo(const char *field
, char *value
, int len
)
1691 int field_len
= strlen(field
);
1694 f
= fopen("/proc/cpuinfo", "r");
1700 if (!fgets(line
, sizeof(line
), f
)) {
1703 if (!strncmp(line
, field
, field_len
)) {
1704 pstrcpy(value
, len
, line
);
1715 uint32_t kvmppc_get_tbfreq(void)
1719 uint32_t retval
= get_ticks_per_sec();
1721 if (read_cpuinfo("timebase", line
, sizeof(line
))) {
1725 if (!(ns
= strchr(line
, ':'))) {
1735 bool kvmppc_get_host_serial(char **value
)
1737 return g_file_get_contents("/proc/device-tree/system-id", value
, NULL
,
1741 bool kvmppc_get_host_model(char **value
)
1743 return g_file_get_contents("/proc/device-tree/model", value
, NULL
, NULL
);
1746 /* Try to find a device tree node for a CPU with clock-frequency property */
1747 static int kvmppc_find_cpu_dt(char *buf
, int buf_len
)
1749 struct dirent
*dirp
;
1752 if ((dp
= opendir(PROC_DEVTREE_CPU
)) == NULL
) {
1753 printf("Can't open directory " PROC_DEVTREE_CPU
"\n");
1758 while ((dirp
= readdir(dp
)) != NULL
) {
1760 snprintf(buf
, buf_len
, "%s%s/clock-frequency", PROC_DEVTREE_CPU
,
1762 f
= fopen(buf
, "r");
1764 snprintf(buf
, buf_len
, "%s%s", PROC_DEVTREE_CPU
, dirp
->d_name
);
1771 if (buf
[0] == '\0') {
1772 printf("Unknown host!\n");
1779 /* Read a CPU node property from the host device tree that's a single
1780 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1781 * (can't find or open the property, or doesn't understand the
1783 static uint64_t kvmppc_read_int_cpu_dt(const char *propname
)
1785 char buf
[PATH_MAX
], *tmp
;
1793 if (kvmppc_find_cpu_dt(buf
, sizeof(buf
))) {
1797 tmp
= g_strdup_printf("%s/%s", buf
, propname
);
1799 f
= fopen(tmp
, "rb");
1805 len
= fread(&u
, 1, sizeof(u
), f
);
1809 /* property is a 32-bit quantity */
1810 return be32_to_cpu(u
.v32
);
1812 return be64_to_cpu(u
.v64
);
1818 uint64_t kvmppc_get_clockfreq(void)
1820 return kvmppc_read_int_cpu_dt("clock-frequency");
1823 uint32_t kvmppc_get_vmx(void)
1825 return kvmppc_read_int_cpu_dt("ibm,vmx");
1828 uint32_t kvmppc_get_dfp(void)
1830 return kvmppc_read_int_cpu_dt("ibm,dfp");
1833 static int kvmppc_get_pvinfo(CPUPPCState
*env
, struct kvm_ppc_pvinfo
*pvinfo
)
1835 PowerPCCPU
*cpu
= ppc_env_get_cpu(env
);
1836 CPUState
*cs
= CPU(cpu
);
1838 if (kvm_vm_check_extension(cs
->kvm_state
, KVM_CAP_PPC_GET_PVINFO
) &&
1839 !kvm_vm_ioctl(cs
->kvm_state
, KVM_PPC_GET_PVINFO
, pvinfo
)) {
1846 int kvmppc_get_hasidle(CPUPPCState
*env
)
1848 struct kvm_ppc_pvinfo pvinfo
;
1850 if (!kvmppc_get_pvinfo(env
, &pvinfo
) &&
1851 (pvinfo
.flags
& KVM_PPC_PVINFO_FLAGS_EV_IDLE
)) {
1858 int kvmppc_get_hypercall(CPUPPCState
*env
, uint8_t *buf
, int buf_len
)
1860 uint32_t *hc
= (uint32_t*)buf
;
1861 struct kvm_ppc_pvinfo pvinfo
;
1863 if (!kvmppc_get_pvinfo(env
, &pvinfo
)) {
1864 memcpy(buf
, pvinfo
.hcall
, buf_len
);
1869 * Fallback to always fail hypercalls regardless of endianness:
1871 * tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
1873 * b .+8 (becomes nop in wrong endian)
1874 * bswap32(li r3, -1)
1877 hc
[0] = cpu_to_be32(0x08000048);
1878 hc
[1] = cpu_to_be32(0x3860ffff);
1879 hc
[2] = cpu_to_be32(0x48000008);
1880 hc
[3] = cpu_to_be32(bswap32(0x3860ffff));
1885 void kvmppc_set_papr(PowerPCCPU
*cpu
)
1887 CPUState
*cs
= CPU(cpu
);
1890 ret
= kvm_vcpu_enable_cap(cs
, KVM_CAP_PPC_PAPR
, 0);
1892 cpu_abort(cs
, "This KVM version does not support PAPR\n");
1895 /* Update the capability flag so we sync the right information
1900 int kvmppc_set_compat(PowerPCCPU
*cpu
, uint32_t cpu_version
)
1902 return kvm_set_one_reg(CPU(cpu
), KVM_REG_PPC_ARCH_COMPAT
, &cpu_version
);
1905 void kvmppc_set_mpic_proxy(PowerPCCPU
*cpu
, int mpic_proxy
)
1907 CPUState
*cs
= CPU(cpu
);
1910 ret
= kvm_vcpu_enable_cap(cs
, KVM_CAP_PPC_EPR
, 0, mpic_proxy
);
1911 if (ret
&& mpic_proxy
) {
1912 cpu_abort(cs
, "This KVM version does not support EPR\n");
1916 int kvmppc_smt_threads(void)
1918 return cap_ppc_smt
? cap_ppc_smt
: 1;
1922 off_t
kvmppc_alloc_rma(void **rma
)
1926 struct kvm_allocate_rma ret
;
1928 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
1929 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
1930 * not necessary on this hardware
1931 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
1933 * FIXME: We should allow the user to force contiguous RMA
1934 * allocation in the cap_ppc_rma==1 case.
1936 if (cap_ppc_rma
< 2) {
1940 fd
= kvm_vm_ioctl(kvm_state
, KVM_ALLOCATE_RMA
, &ret
);
1942 fprintf(stderr
, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
1947 size
= MIN(ret
.rma_size
, 256ul << 20);
1949 *rma
= mmap(NULL
, size
, PROT_READ
|PROT_WRITE
, MAP_SHARED
, fd
, 0);
1950 if (*rma
== MAP_FAILED
) {
1951 fprintf(stderr
, "KVM: Error mapping RMA: %s\n", strerror(errno
));
1958 uint64_t kvmppc_rma_size(uint64_t current_size
, unsigned int hash_shift
)
1960 struct kvm_ppc_smmu_info info
;
1961 long rampagesize
, best_page_shift
;
1964 if (cap_ppc_rma
>= 2) {
1965 return current_size
;
1968 /* Find the largest hardware supported page size that's less than
1969 * or equal to the (logical) backing page size of guest RAM */
1970 kvm_get_smmu_info(POWERPC_CPU(first_cpu
), &info
);
1971 rampagesize
= getrampagesize();
1972 best_page_shift
= 0;
1974 for (i
= 0; i
< KVM_PPC_PAGE_SIZES_MAX_SZ
; i
++) {
1975 struct kvm_ppc_one_seg_page_size
*sps
= &info
.sps
[i
];
1977 if (!sps
->page_shift
) {
1981 if ((sps
->page_shift
> best_page_shift
)
1982 && ((1UL << sps
->page_shift
) <= rampagesize
)) {
1983 best_page_shift
= sps
->page_shift
;
1987 return MIN(current_size
,
1988 1ULL << (best_page_shift
+ hash_shift
- 7));
1992 bool kvmppc_spapr_use_multitce(void)
1994 return cap_spapr_multitce
;
1997 void *kvmppc_create_spapr_tce(uint32_t liobn
, uint32_t window_size
, int *pfd
,
2000 struct kvm_create_spapr_tce args
= {
2002 .window_size
= window_size
,
2008 /* Must set fd to -1 so we don't try to munmap when called for
2009 * destroying the table, which the upper layers -will- do
2012 if (!cap_spapr_tce
|| (vfio_accel
&& !cap_spapr_vfio
)) {
2016 fd
= kvm_vm_ioctl(kvm_state
, KVM_CREATE_SPAPR_TCE
, &args
);
2018 fprintf(stderr
, "KVM: Failed to create TCE table for liobn 0x%x\n",
2023 len
= (window_size
/ SPAPR_TCE_PAGE_SIZE
) * sizeof(uint64_t);
2024 /* FIXME: round this up to page size */
2026 table
= mmap(NULL
, len
, PROT_READ
|PROT_WRITE
, MAP_SHARED
, fd
, 0);
2027 if (table
== MAP_FAILED
) {
2028 fprintf(stderr
, "KVM: Failed to map TCE table for liobn 0x%x\n",
2038 int kvmppc_remove_spapr_tce(void *table
, int fd
, uint32_t nb_table
)
2046 len
= nb_table
* sizeof(uint64_t);
2047 if ((munmap(table
, len
) < 0) ||
2049 fprintf(stderr
, "KVM: Unexpected error removing TCE table: %s",
2051 /* Leak the table */
2057 int kvmppc_reset_htab(int shift_hint
)
2059 uint32_t shift
= shift_hint
;
2061 if (!kvm_enabled()) {
2062 /* Full emulation, tell caller to allocate htab itself */
2065 if (kvm_check_extension(kvm_state
, KVM_CAP_PPC_ALLOC_HTAB
)) {
2067 ret
= kvm_vm_ioctl(kvm_state
, KVM_PPC_ALLOCATE_HTAB
, &shift
);
2068 if (ret
== -ENOTTY
) {
2069 /* At least some versions of PR KVM advertise the
2070 * capability, but don't implement the ioctl(). Oops.
2071 * Return 0 so that we allocate the htab in qemu, as is
2072 * correct for PR. */
2074 } else if (ret
< 0) {
2080 /* We have a kernel that predates the htab reset calls. For PR
2081 * KVM, we need to allocate the htab ourselves, for an HV KVM of
2082 * this era, it has allocated a 16MB fixed size hash table
2083 * already. Kernels of this era have the GET_PVINFO capability
2084 * only on PR, so we use this hack to determine the right
2086 if (kvm_check_extension(kvm_state
, KVM_CAP_PPC_GET_PVINFO
)) {
2087 /* PR - tell caller to allocate htab */
2090 /* HV - assume 16MB kernel allocated htab */
2095 static inline uint32_t mfpvr(void)
2104 static void alter_insns(uint64_t *word
, uint64_t flags
, bool on
)
2113 static void kvmppc_host_cpu_initfn(Object
*obj
)
2115 assert(kvm_enabled());
2118 static void kvmppc_host_cpu_class_init(ObjectClass
*oc
, void *data
)
2120 PowerPCCPUClass
*pcc
= POWERPC_CPU_CLASS(oc
);
2121 uint32_t vmx
= kvmppc_get_vmx();
2122 uint32_t dfp
= kvmppc_get_dfp();
2123 uint32_t dcache_size
= kvmppc_read_int_cpu_dt("d-cache-size");
2124 uint32_t icache_size
= kvmppc_read_int_cpu_dt("i-cache-size");
2126 /* Now fix up the class with information we can query from the host */
2130 /* Only override when we know what the host supports */
2131 alter_insns(&pcc
->insns_flags
, PPC_ALTIVEC
, vmx
> 0);
2132 alter_insns(&pcc
->insns_flags2
, PPC2_VSX
, vmx
> 1);
2135 /* Only override when we know what the host supports */
2136 alter_insns(&pcc
->insns_flags2
, PPC2_DFP
, dfp
);
2139 if (dcache_size
!= -1) {
2140 pcc
->l1_dcache_size
= dcache_size
;
2143 if (icache_size
!= -1) {
2144 pcc
->l1_icache_size
= icache_size
;
2148 bool kvmppc_has_cap_epr(void)
2153 bool kvmppc_has_cap_htab_fd(void)
2158 bool kvmppc_has_cap_fixup_hcalls(void)
2160 return cap_fixup_hcalls
;
2163 static PowerPCCPUClass
*ppc_cpu_get_family_class(PowerPCCPUClass
*pcc
)
2165 ObjectClass
*oc
= OBJECT_CLASS(pcc
);
2167 while (oc
&& !object_class_is_abstract(oc
)) {
2168 oc
= object_class_get_parent(oc
);
2172 return POWERPC_CPU_CLASS(oc
);
2175 static int kvm_ppc_register_host_cpu_type(void)
2177 TypeInfo type_info
= {
2178 .name
= TYPE_HOST_POWERPC_CPU
,
2179 .instance_init
= kvmppc_host_cpu_initfn
,
2180 .class_init
= kvmppc_host_cpu_class_init
,
2182 uint32_t host_pvr
= mfpvr();
2183 PowerPCCPUClass
*pvr_pcc
;
2186 pvr_pcc
= ppc_cpu_class_by_pvr(host_pvr
);
2187 if (pvr_pcc
== NULL
) {
2188 pvr_pcc
= ppc_cpu_class_by_pvr_mask(host_pvr
);
2190 if (pvr_pcc
== NULL
) {
2193 type_info
.parent
= object_class_get_name(OBJECT_CLASS(pvr_pcc
));
2194 type_register(&type_info
);
2196 /* Register generic family CPU class for a family */
2197 pvr_pcc
= ppc_cpu_get_family_class(pvr_pcc
);
2198 dc
= DEVICE_CLASS(pvr_pcc
);
2199 type_info
.parent
= object_class_get_name(OBJECT_CLASS(pvr_pcc
));
2200 type_info
.name
= g_strdup_printf("%s-"TYPE_POWERPC_CPU
, dc
->desc
);
2201 type_register(&type_info
);
2206 int kvmppc_define_rtas_kernel_token(uint32_t token
, const char *function
)
2208 struct kvm_rtas_token_args args
= {
2212 if (!kvm_check_extension(kvm_state
, KVM_CAP_PPC_RTAS
)) {
2216 strncpy(args
.name
, function
, sizeof(args
.name
));
2218 return kvm_vm_ioctl(kvm_state
, KVM_PPC_RTAS_DEFINE_TOKEN
, &args
);
2221 int kvmppc_get_htab_fd(bool write
)
2223 struct kvm_get_htab_fd s
= {
2224 .flags
= write
? KVM_GET_HTAB_WRITE
: 0,
2229 fprintf(stderr
, "KVM version doesn't support saving the hash table\n");
2233 return kvm_vm_ioctl(kvm_state
, KVM_PPC_GET_HTAB_FD
, &s
);
2236 int kvmppc_save_htab(QEMUFile
*f
, int fd
, size_t bufsize
, int64_t max_ns
)
2238 int64_t starttime
= qemu_clock_get_ns(QEMU_CLOCK_REALTIME
);
2239 uint8_t buf
[bufsize
];
2243 rc
= read(fd
, buf
, bufsize
);
2245 fprintf(stderr
, "Error reading data from KVM HTAB fd: %s\n",
2249 uint8_t *buffer
= buf
;
2252 struct kvm_get_htab_header
*head
=
2253 (struct kvm_get_htab_header
*) buffer
;
2254 size_t chunksize
= sizeof(*head
) +
2255 HASH_PTE_SIZE_64
* head
->n_valid
;
2257 qemu_put_be32(f
, head
->index
);
2258 qemu_put_be16(f
, head
->n_valid
);
2259 qemu_put_be16(f
, head
->n_invalid
);
2260 qemu_put_buffer(f
, (void *)(head
+ 1),
2261 HASH_PTE_SIZE_64
* head
->n_valid
);
2263 buffer
+= chunksize
;
2269 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME
) - starttime
) < max_ns
)));
2271 return (rc
== 0) ? 1 : 0;
2274 int kvmppc_load_htab_chunk(QEMUFile
*f
, int fd
, uint32_t index
,
2275 uint16_t n_valid
, uint16_t n_invalid
)
2277 struct kvm_get_htab_header
*buf
;
2278 size_t chunksize
= sizeof(*buf
) + n_valid
*HASH_PTE_SIZE_64
;
2281 buf
= alloca(chunksize
);
2283 buf
->n_valid
= n_valid
;
2284 buf
->n_invalid
= n_invalid
;
2286 qemu_get_buffer(f
, (void *)(buf
+ 1), HASH_PTE_SIZE_64
*n_valid
);
2288 rc
= write(fd
, buf
, chunksize
);
2290 fprintf(stderr
, "Error writing KVM hash table: %s\n",
2294 if (rc
!= chunksize
) {
2295 /* We should never get a short write on a single chunk */
2296 fprintf(stderr
, "Short write, restoring KVM hash table\n");
2302 bool kvm_arch_stop_on_emulation_error(CPUState
*cpu
)
2307 int kvm_arch_on_sigbus_vcpu(CPUState
*cpu
, int code
, void *addr
)
2312 int kvm_arch_on_sigbus(int code
, void *addr
)
2317 void kvm_arch_init_irq_routing(KVMState
*s
)
2321 struct kvm_get_htab_buf
{
2322 struct kvm_get_htab_header header
;
2324 * We require one extra byte for read
2326 target_ulong hpte
[(HPTES_PER_GROUP
* 2) + 1];
2329 uint64_t kvmppc_hash64_read_pteg(PowerPCCPU
*cpu
, target_ulong pte_index
)
2332 struct kvm_get_htab_fd ghf
;
2333 struct kvm_get_htab_buf
*hpte_buf
;
2336 ghf
.start_index
= pte_index
;
2337 htab_fd
= kvm_vm_ioctl(kvm_state
, KVM_PPC_GET_HTAB_FD
, &ghf
);
2342 hpte_buf
= g_malloc0(sizeof(*hpte_buf
));
2344 * Read the hpte group
2346 if (read(htab_fd
, hpte_buf
, sizeof(*hpte_buf
)) < 0) {
2351 return (uint64_t)(uintptr_t) hpte_buf
->hpte
;
2360 void kvmppc_hash64_free_pteg(uint64_t token
)
2362 struct kvm_get_htab_buf
*htab_buf
;
2364 htab_buf
= container_of((void *)(uintptr_t) token
, struct kvm_get_htab_buf
,
2370 void kvmppc_hash64_write_pte(CPUPPCState
*env
, target_ulong pte_index
,
2371 target_ulong pte0
, target_ulong pte1
)
2374 struct kvm_get_htab_fd ghf
;
2375 struct kvm_get_htab_buf hpte_buf
;
2378 ghf
.start_index
= 0; /* Ignored */
2379 htab_fd
= kvm_vm_ioctl(kvm_state
, KVM_PPC_GET_HTAB_FD
, &ghf
);
2384 hpte_buf
.header
.n_valid
= 1;
2385 hpte_buf
.header
.n_invalid
= 0;
2386 hpte_buf
.header
.index
= pte_index
;
2387 hpte_buf
.hpte
[0] = pte0
;
2388 hpte_buf
.hpte
[1] = pte1
;
2390 * Write the hpte entry.
2391 * CAUTION: write() has the warn_unused_result attribute. Hence we
2392 * need to check the return value, even though we do nothing.
2394 if (write(htab_fd
, &hpte_buf
, sizeof(hpte_buf
)) < 0) {
2406 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry
*route
,
2407 uint64_t address
, uint32_t data
)