2 * PowerPC implementation of KVM hooks
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
18 #include <sys/types.h>
19 #include <sys/ioctl.h>
23 #include <linux/kvm.h>
25 #include "qemu-common.h"
26 #include "qemu/timer.h"
27 #include "sysemu/sysemu.h"
28 #include "sysemu/kvm.h"
31 #include "sysemu/cpus.h"
32 #include "sysemu/device_tree.h"
33 #include "mmu-hash64.h"
35 #include "hw/sysbus.h"
36 #include "hw/ppc/spapr.h"
37 #include "hw/ppc/spapr_vio.h"
38 #include "hw/ppc/ppc.h"
39 #include "sysemu/watchdog.h"
41 #include "exec/gdbstub.h"
42 #include "exec/memattrs.h"
43 #include "sysemu/hostmem.h"
48 #define DPRINTF(fmt, ...) \
49 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
51 #define DPRINTF(fmt, ...) \
55 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
57 const KVMCapabilityInfo kvm_arch_required_capabilities
[] = {
61 static int cap_interrupt_unset
= false;
62 static int cap_interrupt_level
= false;
63 static int cap_segstate
;
64 static int cap_booke_sregs
;
65 static int cap_ppc_smt
;
66 static int cap_ppc_rma
;
67 static int cap_spapr_tce
;
68 static int cap_spapr_multitce
;
69 static int cap_spapr_vfio
;
71 static int cap_one_reg
;
73 static int cap_ppc_watchdog
;
75 static int cap_htab_fd
;
76 static int cap_fixup_hcalls
;
78 static uint32_t debug_inst_opcode
;
80 /* XXX We have a race condition where we actually have a level triggered
81 * interrupt, but the infrastructure can't expose that yet, so the guest
82 * takes but ignores it, goes to sleep and never gets notified that there's
83 * still an interrupt pending.
85 * As a quick workaround, let's just wake up again 20 ms after we injected
86 * an interrupt. That way we can assure that we're always reinjecting
87 * interrupts in case the guest swallowed them.
89 static QEMUTimer
*idle_timer
;
91 static void kvm_kick_cpu(void *opaque
)
93 PowerPCCPU
*cpu
= opaque
;
95 qemu_cpu_kick(CPU(cpu
));
98 static int kvm_ppc_register_host_cpu_type(void);
100 int kvm_arch_init(MachineState
*ms
, KVMState
*s
)
102 cap_interrupt_unset
= kvm_check_extension(s
, KVM_CAP_PPC_UNSET_IRQ
);
103 cap_interrupt_level
= kvm_check_extension(s
, KVM_CAP_PPC_IRQ_LEVEL
);
104 cap_segstate
= kvm_check_extension(s
, KVM_CAP_PPC_SEGSTATE
);
105 cap_booke_sregs
= kvm_check_extension(s
, KVM_CAP_PPC_BOOKE_SREGS
);
106 cap_ppc_smt
= kvm_check_extension(s
, KVM_CAP_PPC_SMT
);
107 cap_ppc_rma
= kvm_check_extension(s
, KVM_CAP_PPC_RMA
);
108 cap_spapr_tce
= kvm_check_extension(s
, KVM_CAP_SPAPR_TCE
);
109 cap_spapr_multitce
= kvm_check_extension(s
, KVM_CAP_SPAPR_MULTITCE
);
110 cap_spapr_vfio
= false;
111 cap_one_reg
= kvm_check_extension(s
, KVM_CAP_ONE_REG
);
112 cap_hior
= kvm_check_extension(s
, KVM_CAP_PPC_HIOR
);
113 cap_epr
= kvm_check_extension(s
, KVM_CAP_PPC_EPR
);
114 cap_ppc_watchdog
= kvm_check_extension(s
, KVM_CAP_PPC_BOOKE_WATCHDOG
);
115 /* Note: we don't set cap_papr here, because this capability is
116 * only activated after this by kvmppc_set_papr() */
117 cap_htab_fd
= kvm_check_extension(s
, KVM_CAP_PPC_HTAB_FD
);
118 cap_fixup_hcalls
= kvm_check_extension(s
, KVM_CAP_PPC_FIXUP_HCALL
);
120 if (!cap_interrupt_level
) {
121 fprintf(stderr
, "KVM: Couldn't find level irq capability. Expect the "
122 "VM to stall at times!\n");
125 kvm_ppc_register_host_cpu_type();
130 static int kvm_arch_sync_sregs(PowerPCCPU
*cpu
)
132 CPUPPCState
*cenv
= &cpu
->env
;
133 CPUState
*cs
= CPU(cpu
);
134 struct kvm_sregs sregs
;
137 if (cenv
->excp_model
== POWERPC_EXCP_BOOKE
) {
138 /* What we're really trying to say is "if we're on BookE, we use
139 the native PVR for now". This is the only sane way to check
140 it though, so we potentially confuse users that they can run
141 BookE guests on BookS. Let's hope nobody dares enough :) */
145 fprintf(stderr
, "kvm error: missing PVR setting capability\n");
150 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_SREGS
, &sregs
);
155 sregs
.pvr
= cenv
->spr
[SPR_PVR
];
156 return kvm_vcpu_ioctl(cs
, KVM_SET_SREGS
, &sregs
);
159 /* Set up a shared TLB array with KVM */
160 static int kvm_booke206_tlb_init(PowerPCCPU
*cpu
)
162 CPUPPCState
*env
= &cpu
->env
;
163 CPUState
*cs
= CPU(cpu
);
164 struct kvm_book3e_206_tlb_params params
= {};
165 struct kvm_config_tlb cfg
= {};
166 unsigned int entries
= 0;
169 if (!kvm_enabled() ||
170 !kvm_check_extension(cs
->kvm_state
, KVM_CAP_SW_TLB
)) {
174 assert(ARRAY_SIZE(params
.tlb_sizes
) == BOOKE206_MAX_TLBN
);
176 for (i
= 0; i
< BOOKE206_MAX_TLBN
; i
++) {
177 params
.tlb_sizes
[i
] = booke206_tlb_size(env
, i
);
178 params
.tlb_ways
[i
] = booke206_tlb_ways(env
, i
);
179 entries
+= params
.tlb_sizes
[i
];
182 assert(entries
== env
->nb_tlb
);
183 assert(sizeof(struct kvm_book3e_206_tlb_entry
) == sizeof(ppcmas_tlb_t
));
185 env
->tlb_dirty
= true;
187 cfg
.array
= (uintptr_t)env
->tlb
.tlbm
;
188 cfg
.array_len
= sizeof(ppcmas_tlb_t
) * entries
;
189 cfg
.params
= (uintptr_t)¶ms
;
190 cfg
.mmu_type
= KVM_MMU_FSL_BOOKE_NOHV
;
192 ret
= kvm_vcpu_enable_cap(cs
, KVM_CAP_SW_TLB
, 0, (uintptr_t)&cfg
);
194 fprintf(stderr
, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
195 __func__
, strerror(-ret
));
199 env
->kvm_sw_tlb
= true;
204 #if defined(TARGET_PPC64)
205 static void kvm_get_fallback_smmu_info(PowerPCCPU
*cpu
,
206 struct kvm_ppc_smmu_info
*info
)
208 CPUPPCState
*env
= &cpu
->env
;
209 CPUState
*cs
= CPU(cpu
);
211 memset(info
, 0, sizeof(*info
));
213 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
214 * need to "guess" what the supported page sizes are.
216 * For that to work we make a few assumptions:
218 * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
219 * KVM which only supports 4K and 16M pages, but supports them
220 * regardless of the backing store characteritics. We also don't
221 * support 1T segments.
223 * This is safe as if HV KVM ever supports that capability or PR
224 * KVM grows supports for more page/segment sizes, those versions
225 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
226 * will not hit this fallback
228 * - Else we are running HV KVM. This means we only support page
229 * sizes that fit in the backing store. Additionally we only
230 * advertize 64K pages if the processor is ARCH 2.06 and we assume
231 * P7 encodings for the SLB and hash table. Here too, we assume
232 * support for any newer processor will mean a kernel that
233 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
236 if (kvm_check_extension(cs
->kvm_state
, KVM_CAP_PPC_GET_PVINFO
)) {
241 /* Standard 4k base page size segment */
242 info
->sps
[0].page_shift
= 12;
243 info
->sps
[0].slb_enc
= 0;
244 info
->sps
[0].enc
[0].page_shift
= 12;
245 info
->sps
[0].enc
[0].pte_enc
= 0;
247 /* Standard 16M large page size segment */
248 info
->sps
[1].page_shift
= 24;
249 info
->sps
[1].slb_enc
= SLB_VSID_L
;
250 info
->sps
[1].enc
[0].page_shift
= 24;
251 info
->sps
[1].enc
[0].pte_enc
= 0;
255 /* HV KVM has backing store size restrictions */
256 info
->flags
= KVM_PPC_PAGE_SIZES_REAL
;
258 if (env
->mmu_model
& POWERPC_MMU_1TSEG
) {
259 info
->flags
|= KVM_PPC_1T_SEGMENTS
;
262 if (env
->mmu_model
== POWERPC_MMU_2_06
) {
268 /* Standard 4k base page size segment */
269 info
->sps
[i
].page_shift
= 12;
270 info
->sps
[i
].slb_enc
= 0;
271 info
->sps
[i
].enc
[0].page_shift
= 12;
272 info
->sps
[i
].enc
[0].pte_enc
= 0;
275 /* 64K on MMU 2.06 */
276 if (env
->mmu_model
== POWERPC_MMU_2_06
) {
277 info
->sps
[i
].page_shift
= 16;
278 info
->sps
[i
].slb_enc
= 0x110;
279 info
->sps
[i
].enc
[0].page_shift
= 16;
280 info
->sps
[i
].enc
[0].pte_enc
= 1;
284 /* Standard 16M large page size segment */
285 info
->sps
[i
].page_shift
= 24;
286 info
->sps
[i
].slb_enc
= SLB_VSID_L
;
287 info
->sps
[i
].enc
[0].page_shift
= 24;
288 info
->sps
[i
].enc
[0].pte_enc
= 0;
292 static void kvm_get_smmu_info(PowerPCCPU
*cpu
, struct kvm_ppc_smmu_info
*info
)
294 CPUState
*cs
= CPU(cpu
);
297 if (kvm_check_extension(cs
->kvm_state
, KVM_CAP_PPC_GET_SMMU_INFO
)) {
298 ret
= kvm_vm_ioctl(cs
->kvm_state
, KVM_PPC_GET_SMMU_INFO
, info
);
304 kvm_get_fallback_smmu_info(cpu
, info
);
307 static long gethugepagesize(const char *mem_path
)
313 ret
= statfs(mem_path
, &fs
);
314 } while (ret
!= 0 && errno
== EINTR
);
317 fprintf(stderr
, "Couldn't statfs() memory path: %s\n",
322 #define HUGETLBFS_MAGIC 0x958458f6
324 if (fs
.f_type
!= HUGETLBFS_MAGIC
) {
325 /* Explicit mempath, but it's ordinary pages */
326 return getpagesize();
329 /* It's hugepage, return the huge page size */
333 static int find_max_supported_pagesize(Object
*obj
, void *opaque
)
336 long *hpsize_min
= opaque
;
338 if (object_dynamic_cast(obj
, TYPE_MEMORY_BACKEND
)) {
339 mem_path
= object_property_get_str(obj
, "mem-path", NULL
);
341 long hpsize
= gethugepagesize(mem_path
);
342 if (hpsize
< *hpsize_min
) {
343 *hpsize_min
= hpsize
;
346 *hpsize_min
= getpagesize();
353 static long getrampagesize(void)
355 long hpsize
= LONG_MAX
;
359 return gethugepagesize(mem_path
);
362 /* it's possible we have memory-backend objects with
363 * hugepage-backed RAM. these may get mapped into system
364 * address space via -numa parameters or memory hotplug
365 * hooks. we want to take these into account, but we
366 * also want to make sure these supported hugepage
367 * sizes are applicable across the entire range of memory
368 * we may boot from, so we take the min across all
369 * backends, and assume normal pages in cases where a
370 * backend isn't backed by hugepages.
372 memdev_root
= object_resolve_path("/objects", NULL
);
374 return getpagesize();
377 object_child_foreach(memdev_root
, find_max_supported_pagesize
, &hpsize
);
379 return (hpsize
== LONG_MAX
) ? getpagesize() : hpsize
;
382 static bool kvm_valid_page_size(uint32_t flags
, long rampgsize
, uint32_t shift
)
384 if (!(flags
& KVM_PPC_PAGE_SIZES_REAL
)) {
388 return (1ul << shift
) <= rampgsize
;
391 static void kvm_fixup_page_sizes(PowerPCCPU
*cpu
)
393 static struct kvm_ppc_smmu_info smmu_info
;
394 static bool has_smmu_info
;
395 CPUPPCState
*env
= &cpu
->env
;
399 /* We only handle page sizes for 64-bit server guests for now */
400 if (!(env
->mmu_model
& POWERPC_MMU_64
)) {
404 /* Collect MMU info from kernel if not already */
405 if (!has_smmu_info
) {
406 kvm_get_smmu_info(cpu
, &smmu_info
);
407 has_smmu_info
= true;
410 rampagesize
= getrampagesize();
412 /* Convert to QEMU form */
413 memset(&env
->sps
, 0, sizeof(env
->sps
));
416 * XXX This loop should be an entry wide AND of the capabilities that
417 * the selected CPU has with the capabilities that KVM supports.
419 for (ik
= iq
= 0; ik
< KVM_PPC_PAGE_SIZES_MAX_SZ
; ik
++) {
420 struct ppc_one_seg_page_size
*qsps
= &env
->sps
.sps
[iq
];
421 struct kvm_ppc_one_seg_page_size
*ksps
= &smmu_info
.sps
[ik
];
423 if (!kvm_valid_page_size(smmu_info
.flags
, rampagesize
,
427 qsps
->page_shift
= ksps
->page_shift
;
428 qsps
->slb_enc
= ksps
->slb_enc
;
429 for (jk
= jq
= 0; jk
< KVM_PPC_PAGE_SIZES_MAX_SZ
; jk
++) {
430 if (!kvm_valid_page_size(smmu_info
.flags
, rampagesize
,
431 ksps
->enc
[jk
].page_shift
)) {
434 qsps
->enc
[jq
].page_shift
= ksps
->enc
[jk
].page_shift
;
435 qsps
->enc
[jq
].pte_enc
= ksps
->enc
[jk
].pte_enc
;
436 if (++jq
>= PPC_PAGE_SIZES_MAX_SZ
) {
440 if (++iq
>= PPC_PAGE_SIZES_MAX_SZ
) {
444 env
->slb_nr
= smmu_info
.slb_size
;
445 if (!(smmu_info
.flags
& KVM_PPC_1T_SEGMENTS
)) {
446 env
->mmu_model
&= ~POWERPC_MMU_1TSEG
;
449 #else /* defined (TARGET_PPC64) */
451 static inline void kvm_fixup_page_sizes(PowerPCCPU
*cpu
)
455 #endif /* !defined (TARGET_PPC64) */
457 unsigned long kvm_arch_vcpu_id(CPUState
*cpu
)
459 return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu
));
462 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
463 * book3s supports only 1 watchpoint, so array size
464 * of 4 is sufficient for now.
466 #define MAX_HW_BKPTS 4
468 static struct HWBreakpoint
{
471 } hw_debug_points
[MAX_HW_BKPTS
];
473 static CPUWatchpoint hw_watchpoint
;
475 /* Default there is no breakpoint and watchpoint supported */
476 static int max_hw_breakpoint
;
477 static int max_hw_watchpoint
;
478 static int nb_hw_breakpoint
;
479 static int nb_hw_watchpoint
;
481 static void kvmppc_hw_debug_points_init(CPUPPCState
*cenv
)
483 if (cenv
->excp_model
== POWERPC_EXCP_BOOKE
) {
484 max_hw_breakpoint
= 2;
485 max_hw_watchpoint
= 2;
488 if ((max_hw_breakpoint
+ max_hw_watchpoint
) > MAX_HW_BKPTS
) {
489 fprintf(stderr
, "Error initializing h/w breakpoints\n");
494 int kvm_arch_init_vcpu(CPUState
*cs
)
496 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
497 CPUPPCState
*cenv
= &cpu
->env
;
500 /* Gather server mmu info from KVM and update the CPU state */
501 kvm_fixup_page_sizes(cpu
);
503 /* Synchronize sregs with kvm */
504 ret
= kvm_arch_sync_sregs(cpu
);
509 idle_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL
, kvm_kick_cpu
, cpu
);
511 /* Some targets support access to KVM's guest TLB. */
512 switch (cenv
->mmu_model
) {
513 case POWERPC_MMU_BOOKE206
:
514 ret
= kvm_booke206_tlb_init(cpu
);
520 kvm_get_one_reg(cs
, KVM_REG_PPC_DEBUG_INST
, &debug_inst_opcode
);
521 kvmppc_hw_debug_points_init(cenv
);
526 static void kvm_sw_tlb_put(PowerPCCPU
*cpu
)
528 CPUPPCState
*env
= &cpu
->env
;
529 CPUState
*cs
= CPU(cpu
);
530 struct kvm_dirty_tlb dirty_tlb
;
531 unsigned char *bitmap
;
534 if (!env
->kvm_sw_tlb
) {
538 bitmap
= g_malloc((env
->nb_tlb
+ 7) / 8);
539 memset(bitmap
, 0xFF, (env
->nb_tlb
+ 7) / 8);
541 dirty_tlb
.bitmap
= (uintptr_t)bitmap
;
542 dirty_tlb
.num_dirty
= env
->nb_tlb
;
544 ret
= kvm_vcpu_ioctl(cs
, KVM_DIRTY_TLB
, &dirty_tlb
);
546 fprintf(stderr
, "%s: KVM_DIRTY_TLB: %s\n",
547 __func__
, strerror(-ret
));
553 static void kvm_get_one_spr(CPUState
*cs
, uint64_t id
, int spr
)
555 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
556 CPUPPCState
*env
= &cpu
->env
;
561 struct kvm_one_reg reg
= {
563 .addr
= (uintptr_t) &val
,
567 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
569 trace_kvm_failed_spr_get(spr
, strerror(errno
));
571 switch (id
& KVM_REG_SIZE_MASK
) {
572 case KVM_REG_SIZE_U32
:
573 env
->spr
[spr
] = val
.u32
;
576 case KVM_REG_SIZE_U64
:
577 env
->spr
[spr
] = val
.u64
;
581 /* Don't handle this size yet */
587 static void kvm_put_one_spr(CPUState
*cs
, uint64_t id
, int spr
)
589 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
590 CPUPPCState
*env
= &cpu
->env
;
595 struct kvm_one_reg reg
= {
597 .addr
= (uintptr_t) &val
,
601 switch (id
& KVM_REG_SIZE_MASK
) {
602 case KVM_REG_SIZE_U32
:
603 val
.u32
= env
->spr
[spr
];
606 case KVM_REG_SIZE_U64
:
607 val
.u64
= env
->spr
[spr
];
611 /* Don't handle this size yet */
615 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
617 trace_kvm_failed_spr_set(spr
, strerror(errno
));
621 static int kvm_put_fp(CPUState
*cs
)
623 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
624 CPUPPCState
*env
= &cpu
->env
;
625 struct kvm_one_reg reg
;
629 if (env
->insns_flags
& PPC_FLOAT
) {
630 uint64_t fpscr
= env
->fpscr
;
631 bool vsx
= !!(env
->insns_flags2
& PPC2_VSX
);
633 reg
.id
= KVM_REG_PPC_FPSCR
;
634 reg
.addr
= (uintptr_t)&fpscr
;
635 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
637 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno
));
641 for (i
= 0; i
< 32; i
++) {
644 vsr
[0] = float64_val(env
->fpr
[i
]);
645 vsr
[1] = env
->vsr
[i
];
646 reg
.addr
= (uintptr_t) &vsr
;
647 reg
.id
= vsx
? KVM_REG_PPC_VSR(i
) : KVM_REG_PPC_FPR(i
);
649 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
651 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx
? "VSR" : "FPR",
658 if (env
->insns_flags
& PPC_ALTIVEC
) {
659 reg
.id
= KVM_REG_PPC_VSCR
;
660 reg
.addr
= (uintptr_t)&env
->vscr
;
661 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
663 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno
));
667 for (i
= 0; i
< 32; i
++) {
668 reg
.id
= KVM_REG_PPC_VR(i
);
669 reg
.addr
= (uintptr_t)&env
->avr
[i
];
670 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
672 DPRINTF("Unable to set VR%d to KVM: %s\n", i
, strerror(errno
));
681 static int kvm_get_fp(CPUState
*cs
)
683 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
684 CPUPPCState
*env
= &cpu
->env
;
685 struct kvm_one_reg reg
;
689 if (env
->insns_flags
& PPC_FLOAT
) {
691 bool vsx
= !!(env
->insns_flags2
& PPC2_VSX
);
693 reg
.id
= KVM_REG_PPC_FPSCR
;
694 reg
.addr
= (uintptr_t)&fpscr
;
695 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
697 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno
));
703 for (i
= 0; i
< 32; i
++) {
706 reg
.addr
= (uintptr_t) &vsr
;
707 reg
.id
= vsx
? KVM_REG_PPC_VSR(i
) : KVM_REG_PPC_FPR(i
);
709 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
711 DPRINTF("Unable to get %s%d from KVM: %s\n",
712 vsx
? "VSR" : "FPR", i
, strerror(errno
));
715 env
->fpr
[i
] = vsr
[0];
717 env
->vsr
[i
] = vsr
[1];
723 if (env
->insns_flags
& PPC_ALTIVEC
) {
724 reg
.id
= KVM_REG_PPC_VSCR
;
725 reg
.addr
= (uintptr_t)&env
->vscr
;
726 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
728 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno
));
732 for (i
= 0; i
< 32; i
++) {
733 reg
.id
= KVM_REG_PPC_VR(i
);
734 reg
.addr
= (uintptr_t)&env
->avr
[i
];
735 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
737 DPRINTF("Unable to get VR%d from KVM: %s\n",
747 #if defined(TARGET_PPC64)
748 static int kvm_get_vpa(CPUState
*cs
)
750 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
751 CPUPPCState
*env
= &cpu
->env
;
752 struct kvm_one_reg reg
;
755 reg
.id
= KVM_REG_PPC_VPA_ADDR
;
756 reg
.addr
= (uintptr_t)&env
->vpa_addr
;
757 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
759 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno
));
763 assert((uintptr_t)&env
->slb_shadow_size
764 == ((uintptr_t)&env
->slb_shadow_addr
+ 8));
765 reg
.id
= KVM_REG_PPC_VPA_SLB
;
766 reg
.addr
= (uintptr_t)&env
->slb_shadow_addr
;
767 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
769 DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
774 assert((uintptr_t)&env
->dtl_size
== ((uintptr_t)&env
->dtl_addr
+ 8));
775 reg
.id
= KVM_REG_PPC_VPA_DTL
;
776 reg
.addr
= (uintptr_t)&env
->dtl_addr
;
777 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
779 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
787 static int kvm_put_vpa(CPUState
*cs
)
789 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
790 CPUPPCState
*env
= &cpu
->env
;
791 struct kvm_one_reg reg
;
794 /* SLB shadow or DTL can't be registered unless a master VPA is
795 * registered. That means when restoring state, if a VPA *is*
796 * registered, we need to set that up first. If not, we need to
797 * deregister the others before deregistering the master VPA */
798 assert(env
->vpa_addr
|| !(env
->slb_shadow_addr
|| env
->dtl_addr
));
801 reg
.id
= KVM_REG_PPC_VPA_ADDR
;
802 reg
.addr
= (uintptr_t)&env
->vpa_addr
;
803 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
805 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno
));
810 assert((uintptr_t)&env
->slb_shadow_size
811 == ((uintptr_t)&env
->slb_shadow_addr
+ 8));
812 reg
.id
= KVM_REG_PPC_VPA_SLB
;
813 reg
.addr
= (uintptr_t)&env
->slb_shadow_addr
;
814 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
816 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno
));
820 assert((uintptr_t)&env
->dtl_size
== ((uintptr_t)&env
->dtl_addr
+ 8));
821 reg
.id
= KVM_REG_PPC_VPA_DTL
;
822 reg
.addr
= (uintptr_t)&env
->dtl_addr
;
823 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
825 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
830 if (!env
->vpa_addr
) {
831 reg
.id
= KVM_REG_PPC_VPA_ADDR
;
832 reg
.addr
= (uintptr_t)&env
->vpa_addr
;
833 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
835 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno
));
842 #endif /* TARGET_PPC64 */
844 int kvm_arch_put_registers(CPUState
*cs
, int level
)
846 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
847 CPUPPCState
*env
= &cpu
->env
;
848 struct kvm_regs regs
;
852 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_REGS
, ®s
);
859 regs
.xer
= cpu_read_xer(env
);
863 regs
.srr0
= env
->spr
[SPR_SRR0
];
864 regs
.srr1
= env
->spr
[SPR_SRR1
];
866 regs
.sprg0
= env
->spr
[SPR_SPRG0
];
867 regs
.sprg1
= env
->spr
[SPR_SPRG1
];
868 regs
.sprg2
= env
->spr
[SPR_SPRG2
];
869 regs
.sprg3
= env
->spr
[SPR_SPRG3
];
870 regs
.sprg4
= env
->spr
[SPR_SPRG4
];
871 regs
.sprg5
= env
->spr
[SPR_SPRG5
];
872 regs
.sprg6
= env
->spr
[SPR_SPRG6
];
873 regs
.sprg7
= env
->spr
[SPR_SPRG7
];
875 regs
.pid
= env
->spr
[SPR_BOOKE_PID
];
877 for (i
= 0;i
< 32; i
++)
878 regs
.gpr
[i
] = env
->gpr
[i
];
881 for (i
= 0; i
< 8; i
++) {
882 regs
.cr
|= (env
->crf
[i
] & 15) << (4 * (7 - i
));
885 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_REGS
, ®s
);
891 if (env
->tlb_dirty
) {
893 env
->tlb_dirty
= false;
896 if (cap_segstate
&& (level
>= KVM_PUT_RESET_STATE
)) {
897 struct kvm_sregs sregs
;
899 sregs
.pvr
= env
->spr
[SPR_PVR
];
901 sregs
.u
.s
.sdr1
= env
->spr
[SPR_SDR1
];
905 for (i
= 0; i
< ARRAY_SIZE(env
->slb
); i
++) {
906 sregs
.u
.s
.ppc64
.slb
[i
].slbe
= env
->slb
[i
].esid
;
907 if (env
->slb
[i
].esid
& SLB_ESID_V
) {
908 sregs
.u
.s
.ppc64
.slb
[i
].slbe
|= i
;
910 sregs
.u
.s
.ppc64
.slb
[i
].slbv
= env
->slb
[i
].vsid
;
915 for (i
= 0; i
< 16; i
++) {
916 sregs
.u
.s
.ppc32
.sr
[i
] = env
->sr
[i
];
920 for (i
= 0; i
< 8; i
++) {
921 /* Beware. We have to swap upper and lower bits here */
922 sregs
.u
.s
.ppc32
.dbat
[i
] = ((uint64_t)env
->DBAT
[0][i
] << 32)
924 sregs
.u
.s
.ppc32
.ibat
[i
] = ((uint64_t)env
->IBAT
[0][i
] << 32)
928 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_SREGS
, &sregs
);
934 if (cap_hior
&& (level
>= KVM_PUT_RESET_STATE
)) {
935 kvm_put_one_spr(cs
, KVM_REG_PPC_HIOR
, SPR_HIOR
);
941 /* We deliberately ignore errors here, for kernels which have
942 * the ONE_REG calls, but don't support the specific
943 * registers, there's a reasonable chance things will still
944 * work, at least until we try to migrate. */
945 for (i
= 0; i
< 1024; i
++) {
946 uint64_t id
= env
->spr_cb
[i
].one_reg_id
;
949 kvm_put_one_spr(cs
, id
, i
);
955 for (i
= 0; i
< ARRAY_SIZE(env
->tm_gpr
); i
++) {
956 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_GPR(i
), &env
->tm_gpr
[i
]);
958 for (i
= 0; i
< ARRAY_SIZE(env
->tm_vsr
); i
++) {
959 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_VSR(i
), &env
->tm_vsr
[i
]);
961 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_CR
, &env
->tm_cr
);
962 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_LR
, &env
->tm_lr
);
963 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_CTR
, &env
->tm_ctr
);
964 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_FPSCR
, &env
->tm_fpscr
);
965 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_AMR
, &env
->tm_amr
);
966 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_PPR
, &env
->tm_ppr
);
967 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_VRSAVE
, &env
->tm_vrsave
);
968 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_VSCR
, &env
->tm_vscr
);
969 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_DSCR
, &env
->tm_dscr
);
970 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_TAR
, &env
->tm_tar
);
974 if (kvm_put_vpa(cs
) < 0) {
975 DPRINTF("Warning: Unable to set VPA information to KVM\n");
979 kvm_set_one_reg(cs
, KVM_REG_PPC_TB_OFFSET
, &env
->tb_env
->tb_offset
);
980 #endif /* TARGET_PPC64 */
986 static void kvm_sync_excp(CPUPPCState
*env
, int vector
, int ivor
)
988 env
->excp_vectors
[vector
] = env
->spr
[ivor
] + env
->spr
[SPR_BOOKE_IVPR
];
991 int kvm_arch_get_registers(CPUState
*cs
)
993 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
994 CPUPPCState
*env
= &cpu
->env
;
995 struct kvm_regs regs
;
996 struct kvm_sregs sregs
;
1000 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_REGS
, ®s
);
1005 for (i
= 7; i
>= 0; i
--) {
1006 env
->crf
[i
] = cr
& 15;
1010 env
->ctr
= regs
.ctr
;
1012 cpu_write_xer(env
, regs
.xer
);
1013 env
->msr
= regs
.msr
;
1016 env
->spr
[SPR_SRR0
] = regs
.srr0
;
1017 env
->spr
[SPR_SRR1
] = regs
.srr1
;
1019 env
->spr
[SPR_SPRG0
] = regs
.sprg0
;
1020 env
->spr
[SPR_SPRG1
] = regs
.sprg1
;
1021 env
->spr
[SPR_SPRG2
] = regs
.sprg2
;
1022 env
->spr
[SPR_SPRG3
] = regs
.sprg3
;
1023 env
->spr
[SPR_SPRG4
] = regs
.sprg4
;
1024 env
->spr
[SPR_SPRG5
] = regs
.sprg5
;
1025 env
->spr
[SPR_SPRG6
] = regs
.sprg6
;
1026 env
->spr
[SPR_SPRG7
] = regs
.sprg7
;
1028 env
->spr
[SPR_BOOKE_PID
] = regs
.pid
;
1030 for (i
= 0;i
< 32; i
++)
1031 env
->gpr
[i
] = regs
.gpr
[i
];
1035 if (cap_booke_sregs
) {
1036 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_SREGS
, &sregs
);
1041 if (sregs
.u
.e
.features
& KVM_SREGS_E_BASE
) {
1042 env
->spr
[SPR_BOOKE_CSRR0
] = sregs
.u
.e
.csrr0
;
1043 env
->spr
[SPR_BOOKE_CSRR1
] = sregs
.u
.e
.csrr1
;
1044 env
->spr
[SPR_BOOKE_ESR
] = sregs
.u
.e
.esr
;
1045 env
->spr
[SPR_BOOKE_DEAR
] = sregs
.u
.e
.dear
;
1046 env
->spr
[SPR_BOOKE_MCSR
] = sregs
.u
.e
.mcsr
;
1047 env
->spr
[SPR_BOOKE_TSR
] = sregs
.u
.e
.tsr
;
1048 env
->spr
[SPR_BOOKE_TCR
] = sregs
.u
.e
.tcr
;
1049 env
->spr
[SPR_DECR
] = sregs
.u
.e
.dec
;
1050 env
->spr
[SPR_TBL
] = sregs
.u
.e
.tb
& 0xffffffff;
1051 env
->spr
[SPR_TBU
] = sregs
.u
.e
.tb
>> 32;
1052 env
->spr
[SPR_VRSAVE
] = sregs
.u
.e
.vrsave
;
1055 if (sregs
.u
.e
.features
& KVM_SREGS_E_ARCH206
) {
1056 env
->spr
[SPR_BOOKE_PIR
] = sregs
.u
.e
.pir
;
1057 env
->spr
[SPR_BOOKE_MCSRR0
] = sregs
.u
.e
.mcsrr0
;
1058 env
->spr
[SPR_BOOKE_MCSRR1
] = sregs
.u
.e
.mcsrr1
;
1059 env
->spr
[SPR_BOOKE_DECAR
] = sregs
.u
.e
.decar
;
1060 env
->spr
[SPR_BOOKE_IVPR
] = sregs
.u
.e
.ivpr
;
1063 if (sregs
.u
.e
.features
& KVM_SREGS_E_64
) {
1064 env
->spr
[SPR_BOOKE_EPCR
] = sregs
.u
.e
.epcr
;
1067 if (sregs
.u
.e
.features
& KVM_SREGS_E_SPRG8
) {
1068 env
->spr
[SPR_BOOKE_SPRG8
] = sregs
.u
.e
.sprg8
;
1071 if (sregs
.u
.e
.features
& KVM_SREGS_E_IVOR
) {
1072 env
->spr
[SPR_BOOKE_IVOR0
] = sregs
.u
.e
.ivor_low
[0];
1073 kvm_sync_excp(env
, POWERPC_EXCP_CRITICAL
, SPR_BOOKE_IVOR0
);
1074 env
->spr
[SPR_BOOKE_IVOR1
] = sregs
.u
.e
.ivor_low
[1];
1075 kvm_sync_excp(env
, POWERPC_EXCP_MCHECK
, SPR_BOOKE_IVOR1
);
1076 env
->spr
[SPR_BOOKE_IVOR2
] = sregs
.u
.e
.ivor_low
[2];
1077 kvm_sync_excp(env
, POWERPC_EXCP_DSI
, SPR_BOOKE_IVOR2
);
1078 env
->spr
[SPR_BOOKE_IVOR3
] = sregs
.u
.e
.ivor_low
[3];
1079 kvm_sync_excp(env
, POWERPC_EXCP_ISI
, SPR_BOOKE_IVOR3
);
1080 env
->spr
[SPR_BOOKE_IVOR4
] = sregs
.u
.e
.ivor_low
[4];
1081 kvm_sync_excp(env
, POWERPC_EXCP_EXTERNAL
, SPR_BOOKE_IVOR4
);
1082 env
->spr
[SPR_BOOKE_IVOR5
] = sregs
.u
.e
.ivor_low
[5];
1083 kvm_sync_excp(env
, POWERPC_EXCP_ALIGN
, SPR_BOOKE_IVOR5
);
1084 env
->spr
[SPR_BOOKE_IVOR6
] = sregs
.u
.e
.ivor_low
[6];
1085 kvm_sync_excp(env
, POWERPC_EXCP_PROGRAM
, SPR_BOOKE_IVOR6
);
1086 env
->spr
[SPR_BOOKE_IVOR7
] = sregs
.u
.e
.ivor_low
[7];
1087 kvm_sync_excp(env
, POWERPC_EXCP_FPU
, SPR_BOOKE_IVOR7
);
1088 env
->spr
[SPR_BOOKE_IVOR8
] = sregs
.u
.e
.ivor_low
[8];
1089 kvm_sync_excp(env
, POWERPC_EXCP_SYSCALL
, SPR_BOOKE_IVOR8
);
1090 env
->spr
[SPR_BOOKE_IVOR9
] = sregs
.u
.e
.ivor_low
[9];
1091 kvm_sync_excp(env
, POWERPC_EXCP_APU
, SPR_BOOKE_IVOR9
);
1092 env
->spr
[SPR_BOOKE_IVOR10
] = sregs
.u
.e
.ivor_low
[10];
1093 kvm_sync_excp(env
, POWERPC_EXCP_DECR
, SPR_BOOKE_IVOR10
);
1094 env
->spr
[SPR_BOOKE_IVOR11
] = sregs
.u
.e
.ivor_low
[11];
1095 kvm_sync_excp(env
, POWERPC_EXCP_FIT
, SPR_BOOKE_IVOR11
);
1096 env
->spr
[SPR_BOOKE_IVOR12
] = sregs
.u
.e
.ivor_low
[12];
1097 kvm_sync_excp(env
, POWERPC_EXCP_WDT
, SPR_BOOKE_IVOR12
);
1098 env
->spr
[SPR_BOOKE_IVOR13
] = sregs
.u
.e
.ivor_low
[13];
1099 kvm_sync_excp(env
, POWERPC_EXCP_DTLB
, SPR_BOOKE_IVOR13
);
1100 env
->spr
[SPR_BOOKE_IVOR14
] = sregs
.u
.e
.ivor_low
[14];
1101 kvm_sync_excp(env
, POWERPC_EXCP_ITLB
, SPR_BOOKE_IVOR14
);
1102 env
->spr
[SPR_BOOKE_IVOR15
] = sregs
.u
.e
.ivor_low
[15];
1103 kvm_sync_excp(env
, POWERPC_EXCP_DEBUG
, SPR_BOOKE_IVOR15
);
1105 if (sregs
.u
.e
.features
& KVM_SREGS_E_SPE
) {
1106 env
->spr
[SPR_BOOKE_IVOR32
] = sregs
.u
.e
.ivor_high
[0];
1107 kvm_sync_excp(env
, POWERPC_EXCP_SPEU
, SPR_BOOKE_IVOR32
);
1108 env
->spr
[SPR_BOOKE_IVOR33
] = sregs
.u
.e
.ivor_high
[1];
1109 kvm_sync_excp(env
, POWERPC_EXCP_EFPDI
, SPR_BOOKE_IVOR33
);
1110 env
->spr
[SPR_BOOKE_IVOR34
] = sregs
.u
.e
.ivor_high
[2];
1111 kvm_sync_excp(env
, POWERPC_EXCP_EFPRI
, SPR_BOOKE_IVOR34
);
1114 if (sregs
.u
.e
.features
& KVM_SREGS_E_PM
) {
1115 env
->spr
[SPR_BOOKE_IVOR35
] = sregs
.u
.e
.ivor_high
[3];
1116 kvm_sync_excp(env
, POWERPC_EXCP_EPERFM
, SPR_BOOKE_IVOR35
);
1119 if (sregs
.u
.e
.features
& KVM_SREGS_E_PC
) {
1120 env
->spr
[SPR_BOOKE_IVOR36
] = sregs
.u
.e
.ivor_high
[4];
1121 kvm_sync_excp(env
, POWERPC_EXCP_DOORI
, SPR_BOOKE_IVOR36
);
1122 env
->spr
[SPR_BOOKE_IVOR37
] = sregs
.u
.e
.ivor_high
[5];
1123 kvm_sync_excp(env
, POWERPC_EXCP_DOORCI
, SPR_BOOKE_IVOR37
);
1127 if (sregs
.u
.e
.features
& KVM_SREGS_E_ARCH206_MMU
) {
1128 env
->spr
[SPR_BOOKE_MAS0
] = sregs
.u
.e
.mas0
;
1129 env
->spr
[SPR_BOOKE_MAS1
] = sregs
.u
.e
.mas1
;
1130 env
->spr
[SPR_BOOKE_MAS2
] = sregs
.u
.e
.mas2
;
1131 env
->spr
[SPR_BOOKE_MAS3
] = sregs
.u
.e
.mas7_3
& 0xffffffff;
1132 env
->spr
[SPR_BOOKE_MAS4
] = sregs
.u
.e
.mas4
;
1133 env
->spr
[SPR_BOOKE_MAS6
] = sregs
.u
.e
.mas6
;
1134 env
->spr
[SPR_BOOKE_MAS7
] = sregs
.u
.e
.mas7_3
>> 32;
1135 env
->spr
[SPR_MMUCFG
] = sregs
.u
.e
.mmucfg
;
1136 env
->spr
[SPR_BOOKE_TLB0CFG
] = sregs
.u
.e
.tlbcfg
[0];
1137 env
->spr
[SPR_BOOKE_TLB1CFG
] = sregs
.u
.e
.tlbcfg
[1];
1140 if (sregs
.u
.e
.features
& KVM_SREGS_EXP
) {
1141 env
->spr
[SPR_BOOKE_EPR
] = sregs
.u
.e
.epr
;
1144 if (sregs
.u
.e
.features
& KVM_SREGS_E_PD
) {
1145 env
->spr
[SPR_BOOKE_EPLC
] = sregs
.u
.e
.eplc
;
1146 env
->spr
[SPR_BOOKE_EPSC
] = sregs
.u
.e
.epsc
;
1149 if (sregs
.u
.e
.impl_id
== KVM_SREGS_E_IMPL_FSL
) {
1150 env
->spr
[SPR_E500_SVR
] = sregs
.u
.e
.impl
.fsl
.svr
;
1151 env
->spr
[SPR_Exxx_MCAR
] = sregs
.u
.e
.impl
.fsl
.mcar
;
1152 env
->spr
[SPR_HID0
] = sregs
.u
.e
.impl
.fsl
.hid0
;
1154 if (sregs
.u
.e
.impl
.fsl
.features
& KVM_SREGS_E_FSL_PIDn
) {
1155 env
->spr
[SPR_BOOKE_PID1
] = sregs
.u
.e
.impl
.fsl
.pid1
;
1156 env
->spr
[SPR_BOOKE_PID2
] = sregs
.u
.e
.impl
.fsl
.pid2
;
1162 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_SREGS
, &sregs
);
1167 if (!env
->external_htab
) {
1168 ppc_store_sdr1(env
, sregs
.u
.s
.sdr1
);
1174 * The packed SLB array we get from KVM_GET_SREGS only contains
1175 * information about valid entries. So we flush our internal
1176 * copy to get rid of stale ones, then put all valid SLB entries
1179 memset(env
->slb
, 0, sizeof(env
->slb
));
1180 for (i
= 0; i
< ARRAY_SIZE(env
->slb
); i
++) {
1181 target_ulong rb
= sregs
.u
.s
.ppc64
.slb
[i
].slbe
;
1182 target_ulong rs
= sregs
.u
.s
.ppc64
.slb
[i
].slbv
;
1184 * Only restore valid entries
1186 if (rb
& SLB_ESID_V
) {
1187 ppc_store_slb(env
, rb
, rs
);
1193 for (i
= 0; i
< 16; i
++) {
1194 env
->sr
[i
] = sregs
.u
.s
.ppc32
.sr
[i
];
1198 for (i
= 0; i
< 8; i
++) {
1199 env
->DBAT
[0][i
] = sregs
.u
.s
.ppc32
.dbat
[i
] & 0xffffffff;
1200 env
->DBAT
[1][i
] = sregs
.u
.s
.ppc32
.dbat
[i
] >> 32;
1201 env
->IBAT
[0][i
] = sregs
.u
.s
.ppc32
.ibat
[i
] & 0xffffffff;
1202 env
->IBAT
[1][i
] = sregs
.u
.s
.ppc32
.ibat
[i
] >> 32;
1207 kvm_get_one_spr(cs
, KVM_REG_PPC_HIOR
, SPR_HIOR
);
1213 /* We deliberately ignore errors here, for kernels which have
1214 * the ONE_REG calls, but don't support the specific
1215 * registers, there's a reasonable chance things will still
1216 * work, at least until we try to migrate. */
1217 for (i
= 0; i
< 1024; i
++) {
1218 uint64_t id
= env
->spr_cb
[i
].one_reg_id
;
1221 kvm_get_one_spr(cs
, id
, i
);
1227 for (i
= 0; i
< ARRAY_SIZE(env
->tm_gpr
); i
++) {
1228 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_GPR(i
), &env
->tm_gpr
[i
]);
1230 for (i
= 0; i
< ARRAY_SIZE(env
->tm_vsr
); i
++) {
1231 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_VSR(i
), &env
->tm_vsr
[i
]);
1233 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_CR
, &env
->tm_cr
);
1234 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_LR
, &env
->tm_lr
);
1235 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_CTR
, &env
->tm_ctr
);
1236 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_FPSCR
, &env
->tm_fpscr
);
1237 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_AMR
, &env
->tm_amr
);
1238 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_PPR
, &env
->tm_ppr
);
1239 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_VRSAVE
, &env
->tm_vrsave
);
1240 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_VSCR
, &env
->tm_vscr
);
1241 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_DSCR
, &env
->tm_dscr
);
1242 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_TAR
, &env
->tm_tar
);
1246 if (kvm_get_vpa(cs
) < 0) {
1247 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1251 kvm_get_one_reg(cs
, KVM_REG_PPC_TB_OFFSET
, &env
->tb_env
->tb_offset
);
1258 int kvmppc_set_interrupt(PowerPCCPU
*cpu
, int irq
, int level
)
1260 unsigned virq
= level
? KVM_INTERRUPT_SET_LEVEL
: KVM_INTERRUPT_UNSET
;
1262 if (irq
!= PPC_INTERRUPT_EXT
) {
1266 if (!kvm_enabled() || !cap_interrupt_unset
|| !cap_interrupt_level
) {
1270 kvm_vcpu_ioctl(CPU(cpu
), KVM_INTERRUPT
, &virq
);
1275 #if defined(TARGET_PPCEMB)
1276 #define PPC_INPUT_INT PPC40x_INPUT_INT
1277 #elif defined(TARGET_PPC64)
1278 #define PPC_INPUT_INT PPC970_INPUT_INT
1280 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1283 void kvm_arch_pre_run(CPUState
*cs
, struct kvm_run
*run
)
1285 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
1286 CPUPPCState
*env
= &cpu
->env
;
1290 qemu_mutex_lock_iothread();
1292 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1293 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1294 if (!cap_interrupt_level
&&
1295 run
->ready_for_interrupt_injection
&&
1296 (cs
->interrupt_request
& CPU_INTERRUPT_HARD
) &&
1297 (env
->irq_input_state
& (1<<PPC_INPUT_INT
)))
1299 /* For now KVM disregards the 'irq' argument. However, in the
1300 * future KVM could cache it in-kernel to avoid a heavyweight exit
1301 * when reading the UIC.
1303 irq
= KVM_INTERRUPT_SET
;
1305 DPRINTF("injected interrupt %d\n", irq
);
1306 r
= kvm_vcpu_ioctl(cs
, KVM_INTERRUPT
, &irq
);
1308 printf("cpu %d fail inject %x\n", cs
->cpu_index
, irq
);
1311 /* Always wake up soon in case the interrupt was level based */
1312 timer_mod(idle_timer
, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) +
1313 (get_ticks_per_sec() / 50));
1316 /* We don't know if there are more interrupts pending after this. However,
1317 * the guest will return to userspace in the course of handling this one
1318 * anyways, so we will get a chance to deliver the rest. */
1320 qemu_mutex_unlock_iothread();
1323 MemTxAttrs
kvm_arch_post_run(CPUState
*cs
, struct kvm_run
*run
)
1325 return MEMTXATTRS_UNSPECIFIED
;
1328 int kvm_arch_process_async_events(CPUState
*cs
)
1333 static int kvmppc_handle_halt(PowerPCCPU
*cpu
)
1335 CPUState
*cs
= CPU(cpu
);
1336 CPUPPCState
*env
= &cpu
->env
;
1338 if (!(cs
->interrupt_request
& CPU_INTERRUPT_HARD
) && (msr_ee
)) {
1340 cs
->exception_index
= EXCP_HLT
;
1346 /* map dcr access to existing qemu dcr emulation */
1347 static int kvmppc_handle_dcr_read(CPUPPCState
*env
, uint32_t dcrn
, uint32_t *data
)
1349 if (ppc_dcr_read(env
->dcr_env
, dcrn
, data
) < 0)
1350 fprintf(stderr
, "Read to unhandled DCR (0x%x)\n", dcrn
);
1355 static int kvmppc_handle_dcr_write(CPUPPCState
*env
, uint32_t dcrn
, uint32_t data
)
1357 if (ppc_dcr_write(env
->dcr_env
, dcrn
, data
) < 0)
1358 fprintf(stderr
, "Write to unhandled DCR (0x%x)\n", dcrn
);
1363 int kvm_arch_insert_sw_breakpoint(CPUState
*cs
, struct kvm_sw_breakpoint
*bp
)
1365 /* Mixed endian case is not handled */
1366 uint32_t sc
= debug_inst_opcode
;
1368 if (cpu_memory_rw_debug(cs
, bp
->pc
, (uint8_t *)&bp
->saved_insn
,
1370 cpu_memory_rw_debug(cs
, bp
->pc
, (uint8_t *)&sc
, sizeof(sc
), 1)) {
1377 int kvm_arch_remove_sw_breakpoint(CPUState
*cs
, struct kvm_sw_breakpoint
*bp
)
1381 if (cpu_memory_rw_debug(cs
, bp
->pc
, (uint8_t *)&sc
, sizeof(sc
), 0) ||
1382 sc
!= debug_inst_opcode
||
1383 cpu_memory_rw_debug(cs
, bp
->pc
, (uint8_t *)&bp
->saved_insn
,
1391 static int find_hw_breakpoint(target_ulong addr
, int type
)
1395 assert((nb_hw_breakpoint
+ nb_hw_watchpoint
)
1396 <= ARRAY_SIZE(hw_debug_points
));
1398 for (n
= 0; n
< nb_hw_breakpoint
+ nb_hw_watchpoint
; n
++) {
1399 if (hw_debug_points
[n
].addr
== addr
&&
1400 hw_debug_points
[n
].type
== type
) {
1408 static int find_hw_watchpoint(target_ulong addr
, int *flag
)
1412 n
= find_hw_breakpoint(addr
, GDB_WATCHPOINT_ACCESS
);
1414 *flag
= BP_MEM_ACCESS
;
1418 n
= find_hw_breakpoint(addr
, GDB_WATCHPOINT_WRITE
);
1420 *flag
= BP_MEM_WRITE
;
1424 n
= find_hw_breakpoint(addr
, GDB_WATCHPOINT_READ
);
1426 *flag
= BP_MEM_READ
;
1433 int kvm_arch_insert_hw_breakpoint(target_ulong addr
,
1434 target_ulong len
, int type
)
1436 if ((nb_hw_breakpoint
+ nb_hw_watchpoint
) >= ARRAY_SIZE(hw_debug_points
)) {
1440 hw_debug_points
[nb_hw_breakpoint
+ nb_hw_watchpoint
].addr
= addr
;
1441 hw_debug_points
[nb_hw_breakpoint
+ nb_hw_watchpoint
].type
= type
;
1444 case GDB_BREAKPOINT_HW
:
1445 if (nb_hw_breakpoint
>= max_hw_breakpoint
) {
1449 if (find_hw_breakpoint(addr
, type
) >= 0) {
1456 case GDB_WATCHPOINT_WRITE
:
1457 case GDB_WATCHPOINT_READ
:
1458 case GDB_WATCHPOINT_ACCESS
:
1459 if (nb_hw_watchpoint
>= max_hw_watchpoint
) {
1463 if (find_hw_breakpoint(addr
, type
) >= 0) {
1477 int kvm_arch_remove_hw_breakpoint(target_ulong addr
,
1478 target_ulong len
, int type
)
1482 n
= find_hw_breakpoint(addr
, type
);
1488 case GDB_BREAKPOINT_HW
:
1492 case GDB_WATCHPOINT_WRITE
:
1493 case GDB_WATCHPOINT_READ
:
1494 case GDB_WATCHPOINT_ACCESS
:
1501 hw_debug_points
[n
] = hw_debug_points
[nb_hw_breakpoint
+ nb_hw_watchpoint
];
1506 void kvm_arch_remove_all_hw_breakpoints(void)
1508 nb_hw_breakpoint
= nb_hw_watchpoint
= 0;
1511 void kvm_arch_update_guest_debug(CPUState
*cs
, struct kvm_guest_debug
*dbg
)
1515 /* Software Breakpoint updates */
1516 if (kvm_sw_breakpoints_active(cs
)) {
1517 dbg
->control
|= KVM_GUESTDBG_ENABLE
| KVM_GUESTDBG_USE_SW_BP
;
1520 assert((nb_hw_breakpoint
+ nb_hw_watchpoint
)
1521 <= ARRAY_SIZE(hw_debug_points
));
1522 assert((nb_hw_breakpoint
+ nb_hw_watchpoint
) <= ARRAY_SIZE(dbg
->arch
.bp
));
1524 if (nb_hw_breakpoint
+ nb_hw_watchpoint
> 0) {
1525 dbg
->control
|= KVM_GUESTDBG_ENABLE
| KVM_GUESTDBG_USE_HW_BP
;
1526 memset(dbg
->arch
.bp
, 0, sizeof(dbg
->arch
.bp
));
1527 for (n
= 0; n
< nb_hw_breakpoint
+ nb_hw_watchpoint
; n
++) {
1528 switch (hw_debug_points
[n
].type
) {
1529 case GDB_BREAKPOINT_HW
:
1530 dbg
->arch
.bp
[n
].type
= KVMPPC_DEBUG_BREAKPOINT
;
1532 case GDB_WATCHPOINT_WRITE
:
1533 dbg
->arch
.bp
[n
].type
= KVMPPC_DEBUG_WATCH_WRITE
;
1535 case GDB_WATCHPOINT_READ
:
1536 dbg
->arch
.bp
[n
].type
= KVMPPC_DEBUG_WATCH_READ
;
1538 case GDB_WATCHPOINT_ACCESS
:
1539 dbg
->arch
.bp
[n
].type
= KVMPPC_DEBUG_WATCH_WRITE
|
1540 KVMPPC_DEBUG_WATCH_READ
;
1543 cpu_abort(cs
, "Unsupported breakpoint type\n");
1545 dbg
->arch
.bp
[n
].addr
= hw_debug_points
[n
].addr
;
1550 static int kvm_handle_debug(PowerPCCPU
*cpu
, struct kvm_run
*run
)
1552 CPUState
*cs
= CPU(cpu
);
1553 CPUPPCState
*env
= &cpu
->env
;
1554 struct kvm_debug_exit_arch
*arch_info
= &run
->debug
.arch
;
1559 if (cs
->singlestep_enabled
) {
1561 } else if (arch_info
->status
) {
1562 if (nb_hw_breakpoint
+ nb_hw_watchpoint
> 0) {
1563 if (arch_info
->status
& KVMPPC_DEBUG_BREAKPOINT
) {
1564 n
= find_hw_breakpoint(arch_info
->address
, GDB_BREAKPOINT_HW
);
1568 } else if (arch_info
->status
& (KVMPPC_DEBUG_WATCH_READ
|
1569 KVMPPC_DEBUG_WATCH_WRITE
)) {
1570 n
= find_hw_watchpoint(arch_info
->address
, &flag
);
1573 cs
->watchpoint_hit
= &hw_watchpoint
;
1574 hw_watchpoint
.vaddr
= hw_debug_points
[n
].addr
;
1575 hw_watchpoint
.flags
= flag
;
1579 } else if (kvm_find_sw_breakpoint(cs
, arch_info
->address
)) {
1582 /* QEMU is not able to handle debug exception, so inject
1583 * program exception to guest;
1584 * Yes program exception NOT debug exception !!
1585 * When QEMU is using debug resources then debug exception must
1586 * be always set. To achieve this we set MSR_DE and also set
1587 * MSRP_DEP so guest cannot change MSR_DE.
1588 * When emulating debug resource for guest we want guest
1589 * to control MSR_DE (enable/disable debug interrupt on need).
1590 * Supporting both configurations are NOT possible.
1591 * So the result is that we cannot share debug resources
1592 * between QEMU and Guest on BOOKE architecture.
1593 * In the current design QEMU gets the priority over guest,
1594 * this means that if QEMU is using debug resources then guest
1596 * For software breakpoint QEMU uses a privileged instruction;
1597 * So there cannot be any reason that we are here for guest
1598 * set debug exception, only possibility is guest executed a
1599 * privileged / illegal instruction and that's why we are
1600 * injecting a program interrupt.
1603 cpu_synchronize_state(cs
);
1604 /* env->nip is PC, so increment this by 4 to use
1605 * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1608 cs
->exception_index
= POWERPC_EXCP_PROGRAM
;
1609 env
->error_code
= POWERPC_EXCP_INVAL
;
1610 ppc_cpu_do_interrupt(cs
);
1616 int kvm_arch_handle_exit(CPUState
*cs
, struct kvm_run
*run
)
1618 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
1619 CPUPPCState
*env
= &cpu
->env
;
1622 qemu_mutex_lock_iothread();
1624 switch (run
->exit_reason
) {
1626 if (run
->dcr
.is_write
) {
1627 DPRINTF("handle dcr write\n");
1628 ret
= kvmppc_handle_dcr_write(env
, run
->dcr
.dcrn
, run
->dcr
.data
);
1630 DPRINTF("handle dcr read\n");
1631 ret
= kvmppc_handle_dcr_read(env
, run
->dcr
.dcrn
, &run
->dcr
.data
);
1635 DPRINTF("handle halt\n");
1636 ret
= kvmppc_handle_halt(cpu
);
1638 #if defined(TARGET_PPC64)
1639 case KVM_EXIT_PAPR_HCALL
:
1640 DPRINTF("handle PAPR hypercall\n");
1641 run
->papr_hcall
.ret
= spapr_hypercall(cpu
,
1643 run
->papr_hcall
.args
);
1648 DPRINTF("handle epr\n");
1649 run
->epr
.epr
= ldl_phys(cs
->as
, env
->mpic_iack
);
1652 case KVM_EXIT_WATCHDOG
:
1653 DPRINTF("handle watchdog expiry\n");
1654 watchdog_perform_action();
1658 case KVM_EXIT_DEBUG
:
1659 DPRINTF("handle debug exception\n");
1660 if (kvm_handle_debug(cpu
, run
)) {
1664 /* re-enter, this exception was guest-internal */
1669 fprintf(stderr
, "KVM: unknown exit reason %d\n", run
->exit_reason
);
1674 qemu_mutex_unlock_iothread();
1678 int kvmppc_or_tsr_bits(PowerPCCPU
*cpu
, uint32_t tsr_bits
)
1680 CPUState
*cs
= CPU(cpu
);
1681 uint32_t bits
= tsr_bits
;
1682 struct kvm_one_reg reg
= {
1683 .id
= KVM_REG_PPC_OR_TSR
,
1684 .addr
= (uintptr_t) &bits
,
1687 return kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
1690 int kvmppc_clear_tsr_bits(PowerPCCPU
*cpu
, uint32_t tsr_bits
)
1693 CPUState
*cs
= CPU(cpu
);
1694 uint32_t bits
= tsr_bits
;
1695 struct kvm_one_reg reg
= {
1696 .id
= KVM_REG_PPC_CLEAR_TSR
,
1697 .addr
= (uintptr_t) &bits
,
1700 return kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
1703 int kvmppc_set_tcr(PowerPCCPU
*cpu
)
1705 CPUState
*cs
= CPU(cpu
);
1706 CPUPPCState
*env
= &cpu
->env
;
1707 uint32_t tcr
= env
->spr
[SPR_BOOKE_TCR
];
1709 struct kvm_one_reg reg
= {
1710 .id
= KVM_REG_PPC_TCR
,
1711 .addr
= (uintptr_t) &tcr
,
1714 return kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
1717 int kvmppc_booke_watchdog_enable(PowerPCCPU
*cpu
)
1719 CPUState
*cs
= CPU(cpu
);
1722 if (!kvm_enabled()) {
1726 if (!cap_ppc_watchdog
) {
1727 printf("warning: KVM does not support watchdog");
1731 ret
= kvm_vcpu_enable_cap(cs
, KVM_CAP_PPC_BOOKE_WATCHDOG
, 0);
1733 fprintf(stderr
, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1734 __func__
, strerror(-ret
));
1741 static int read_cpuinfo(const char *field
, char *value
, int len
)
1745 int field_len
= strlen(field
);
1748 f
= fopen("/proc/cpuinfo", "r");
1754 if (!fgets(line
, sizeof(line
), f
)) {
1757 if (!strncmp(line
, field
, field_len
)) {
1758 pstrcpy(value
, len
, line
);
1769 uint32_t kvmppc_get_tbfreq(void)
1773 uint32_t retval
= get_ticks_per_sec();
1775 if (read_cpuinfo("timebase", line
, sizeof(line
))) {
1779 if (!(ns
= strchr(line
, ':'))) {
1788 bool kvmppc_get_host_serial(char **value
)
1790 return g_file_get_contents("/proc/device-tree/system-id", value
, NULL
,
1794 bool kvmppc_get_host_model(char **value
)
1796 return g_file_get_contents("/proc/device-tree/model", value
, NULL
, NULL
);
1799 /* Try to find a device tree node for a CPU with clock-frequency property */
1800 static int kvmppc_find_cpu_dt(char *buf
, int buf_len
)
1802 struct dirent
*dirp
;
1805 if ((dp
= opendir(PROC_DEVTREE_CPU
)) == NULL
) {
1806 printf("Can't open directory " PROC_DEVTREE_CPU
"\n");
1811 while ((dirp
= readdir(dp
)) != NULL
) {
1813 snprintf(buf
, buf_len
, "%s%s/clock-frequency", PROC_DEVTREE_CPU
,
1815 f
= fopen(buf
, "r");
1817 snprintf(buf
, buf_len
, "%s%s", PROC_DEVTREE_CPU
, dirp
->d_name
);
1824 if (buf
[0] == '\0') {
1825 printf("Unknown host!\n");
1832 /* Read a CPU node property from the host device tree that's a single
1833 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1834 * (can't find or open the property, or doesn't understand the
1836 static uint64_t kvmppc_read_int_cpu_dt(const char *propname
)
1838 char buf
[PATH_MAX
], *tmp
;
1846 if (kvmppc_find_cpu_dt(buf
, sizeof(buf
))) {
1850 tmp
= g_strdup_printf("%s/%s", buf
, propname
);
1852 f
= fopen(tmp
, "rb");
1858 len
= fread(&u
, 1, sizeof(u
), f
);
1862 /* property is a 32-bit quantity */
1863 return be32_to_cpu(u
.v32
);
1865 return be64_to_cpu(u
.v64
);
1871 uint64_t kvmppc_get_clockfreq(void)
1873 return kvmppc_read_int_cpu_dt("clock-frequency");
1876 uint32_t kvmppc_get_vmx(void)
1878 return kvmppc_read_int_cpu_dt("ibm,vmx");
1881 uint32_t kvmppc_get_dfp(void)
1883 return kvmppc_read_int_cpu_dt("ibm,dfp");
1886 static int kvmppc_get_pvinfo(CPUPPCState
*env
, struct kvm_ppc_pvinfo
*pvinfo
)
1888 PowerPCCPU
*cpu
= ppc_env_get_cpu(env
);
1889 CPUState
*cs
= CPU(cpu
);
1891 if (kvm_vm_check_extension(cs
->kvm_state
, KVM_CAP_PPC_GET_PVINFO
) &&
1892 !kvm_vm_ioctl(cs
->kvm_state
, KVM_PPC_GET_PVINFO
, pvinfo
)) {
1899 int kvmppc_get_hasidle(CPUPPCState
*env
)
1901 struct kvm_ppc_pvinfo pvinfo
;
1903 if (!kvmppc_get_pvinfo(env
, &pvinfo
) &&
1904 (pvinfo
.flags
& KVM_PPC_PVINFO_FLAGS_EV_IDLE
)) {
1911 int kvmppc_get_hypercall(CPUPPCState
*env
, uint8_t *buf
, int buf_len
)
1913 uint32_t *hc
= (uint32_t*)buf
;
1914 struct kvm_ppc_pvinfo pvinfo
;
1916 if (!kvmppc_get_pvinfo(env
, &pvinfo
)) {
1917 memcpy(buf
, pvinfo
.hcall
, buf_len
);
1922 * Fallback to always fail hypercalls regardless of endianness:
1924 * tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
1926 * b .+8 (becomes nop in wrong endian)
1927 * bswap32(li r3, -1)
1930 hc
[0] = cpu_to_be32(0x08000048);
1931 hc
[1] = cpu_to_be32(0x3860ffff);
1932 hc
[2] = cpu_to_be32(0x48000008);
1933 hc
[3] = cpu_to_be32(bswap32(0x3860ffff));
1938 static inline int kvmppc_enable_hcall(KVMState
*s
, target_ulong hcall
)
1940 return kvm_vm_enable_cap(s
, KVM_CAP_PPC_ENABLE_HCALL
, 0, hcall
, 1);
1943 void kvmppc_enable_logical_ci_hcalls(void)
1946 * FIXME: it would be nice if we could detect the cases where
1947 * we're using a device which requires the in kernel
1948 * implementation of these hcalls, but the kernel lacks them and
1949 * produce a warning.
1951 kvmppc_enable_hcall(kvm_state
, H_LOGICAL_CI_LOAD
);
1952 kvmppc_enable_hcall(kvm_state
, H_LOGICAL_CI_STORE
);
1955 void kvmppc_enable_set_mode_hcall(void)
1957 kvmppc_enable_hcall(kvm_state
, H_SET_MODE
);
1960 void kvmppc_set_papr(PowerPCCPU
*cpu
)
1962 CPUState
*cs
= CPU(cpu
);
1965 ret
= kvm_vcpu_enable_cap(cs
, KVM_CAP_PPC_PAPR
, 0);
1967 cpu_abort(cs
, "This KVM version does not support PAPR\n");
1970 /* Update the capability flag so we sync the right information
1975 int kvmppc_set_compat(PowerPCCPU
*cpu
, uint32_t cpu_version
)
1977 return kvm_set_one_reg(CPU(cpu
), KVM_REG_PPC_ARCH_COMPAT
, &cpu_version
);
1980 void kvmppc_set_mpic_proxy(PowerPCCPU
*cpu
, int mpic_proxy
)
1982 CPUState
*cs
= CPU(cpu
);
1985 ret
= kvm_vcpu_enable_cap(cs
, KVM_CAP_PPC_EPR
, 0, mpic_proxy
);
1986 if (ret
&& mpic_proxy
) {
1987 cpu_abort(cs
, "This KVM version does not support EPR\n");
1991 int kvmppc_smt_threads(void)
1993 return cap_ppc_smt
? cap_ppc_smt
: 1;
1997 off_t
kvmppc_alloc_rma(void **rma
)
2001 struct kvm_allocate_rma ret
;
2003 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2004 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2005 * not necessary on this hardware
2006 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2008 * FIXME: We should allow the user to force contiguous RMA
2009 * allocation in the cap_ppc_rma==1 case.
2011 if (cap_ppc_rma
< 2) {
2015 fd
= kvm_vm_ioctl(kvm_state
, KVM_ALLOCATE_RMA
, &ret
);
2017 fprintf(stderr
, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2022 size
= MIN(ret
.rma_size
, 256ul << 20);
2024 *rma
= mmap(NULL
, size
, PROT_READ
|PROT_WRITE
, MAP_SHARED
, fd
, 0);
2025 if (*rma
== MAP_FAILED
) {
2026 fprintf(stderr
, "KVM: Error mapping RMA: %s\n", strerror(errno
));
2033 uint64_t kvmppc_rma_size(uint64_t current_size
, unsigned int hash_shift
)
2035 struct kvm_ppc_smmu_info info
;
2036 long rampagesize
, best_page_shift
;
2039 if (cap_ppc_rma
>= 2) {
2040 return current_size
;
2043 /* Find the largest hardware supported page size that's less than
2044 * or equal to the (logical) backing page size of guest RAM */
2045 kvm_get_smmu_info(POWERPC_CPU(first_cpu
), &info
);
2046 rampagesize
= getrampagesize();
2047 best_page_shift
= 0;
2049 for (i
= 0; i
< KVM_PPC_PAGE_SIZES_MAX_SZ
; i
++) {
2050 struct kvm_ppc_one_seg_page_size
*sps
= &info
.sps
[i
];
2052 if (!sps
->page_shift
) {
2056 if ((sps
->page_shift
> best_page_shift
)
2057 && ((1UL << sps
->page_shift
) <= rampagesize
)) {
2058 best_page_shift
= sps
->page_shift
;
2062 return MIN(current_size
,
2063 1ULL << (best_page_shift
+ hash_shift
- 7));
2067 bool kvmppc_spapr_use_multitce(void)
2069 return cap_spapr_multitce
;
2072 void *kvmppc_create_spapr_tce(uint32_t liobn
, uint32_t window_size
, int *pfd
,
2075 struct kvm_create_spapr_tce args
= {
2077 .window_size
= window_size
,
2083 /* Must set fd to -1 so we don't try to munmap when called for
2084 * destroying the table, which the upper layers -will- do
2087 if (!cap_spapr_tce
|| (vfio_accel
&& !cap_spapr_vfio
)) {
2091 fd
= kvm_vm_ioctl(kvm_state
, KVM_CREATE_SPAPR_TCE
, &args
);
2093 fprintf(stderr
, "KVM: Failed to create TCE table for liobn 0x%x\n",
2098 len
= (window_size
/ SPAPR_TCE_PAGE_SIZE
) * sizeof(uint64_t);
2099 /* FIXME: round this up to page size */
2101 table
= mmap(NULL
, len
, PROT_READ
|PROT_WRITE
, MAP_SHARED
, fd
, 0);
2102 if (table
== MAP_FAILED
) {
2103 fprintf(stderr
, "KVM: Failed to map TCE table for liobn 0x%x\n",
2113 int kvmppc_remove_spapr_tce(void *table
, int fd
, uint32_t nb_table
)
2121 len
= nb_table
* sizeof(uint64_t);
2122 if ((munmap(table
, len
) < 0) ||
2124 fprintf(stderr
, "KVM: Unexpected error removing TCE table: %s",
2126 /* Leak the table */
2132 int kvmppc_reset_htab(int shift_hint
)
2134 uint32_t shift
= shift_hint
;
2136 if (!kvm_enabled()) {
2137 /* Full emulation, tell caller to allocate htab itself */
2140 if (kvm_check_extension(kvm_state
, KVM_CAP_PPC_ALLOC_HTAB
)) {
2142 ret
= kvm_vm_ioctl(kvm_state
, KVM_PPC_ALLOCATE_HTAB
, &shift
);
2143 if (ret
== -ENOTTY
) {
2144 /* At least some versions of PR KVM advertise the
2145 * capability, but don't implement the ioctl(). Oops.
2146 * Return 0 so that we allocate the htab in qemu, as is
2147 * correct for PR. */
2149 } else if (ret
< 0) {
2155 /* We have a kernel that predates the htab reset calls. For PR
2156 * KVM, we need to allocate the htab ourselves, for an HV KVM of
2157 * this era, it has allocated a 16MB fixed size hash table
2158 * already. Kernels of this era have the GET_PVINFO capability
2159 * only on PR, so we use this hack to determine the right
2161 if (kvm_check_extension(kvm_state
, KVM_CAP_PPC_GET_PVINFO
)) {
2162 /* PR - tell caller to allocate htab */
2165 /* HV - assume 16MB kernel allocated htab */
2170 static inline uint32_t mfpvr(void)
2179 static void alter_insns(uint64_t *word
, uint64_t flags
, bool on
)
2188 static void kvmppc_host_cpu_initfn(Object
*obj
)
2190 assert(kvm_enabled());
2193 static void kvmppc_host_cpu_class_init(ObjectClass
*oc
, void *data
)
2195 DeviceClass
*dc
= DEVICE_CLASS(oc
);
2196 PowerPCCPUClass
*pcc
= POWERPC_CPU_CLASS(oc
);
2197 uint32_t vmx
= kvmppc_get_vmx();
2198 uint32_t dfp
= kvmppc_get_dfp();
2199 uint32_t dcache_size
= kvmppc_read_int_cpu_dt("d-cache-size");
2200 uint32_t icache_size
= kvmppc_read_int_cpu_dt("i-cache-size");
2202 /* Now fix up the class with information we can query from the host */
2206 /* Only override when we know what the host supports */
2207 alter_insns(&pcc
->insns_flags
, PPC_ALTIVEC
, vmx
> 0);
2208 alter_insns(&pcc
->insns_flags2
, PPC2_VSX
, vmx
> 1);
2211 /* Only override when we know what the host supports */
2212 alter_insns(&pcc
->insns_flags2
, PPC2_DFP
, dfp
);
2215 if (dcache_size
!= -1) {
2216 pcc
->l1_dcache_size
= dcache_size
;
2219 if (icache_size
!= -1) {
2220 pcc
->l1_icache_size
= icache_size
;
2223 /* Reason: kvmppc_host_cpu_initfn() dies when !kvm_enabled() */
2224 dc
->cannot_destroy_with_object_finalize_yet
= true;
2227 bool kvmppc_has_cap_epr(void)
2232 bool kvmppc_has_cap_htab_fd(void)
2237 bool kvmppc_has_cap_fixup_hcalls(void)
2239 return cap_fixup_hcalls
;
2242 static PowerPCCPUClass
*ppc_cpu_get_family_class(PowerPCCPUClass
*pcc
)
2244 ObjectClass
*oc
= OBJECT_CLASS(pcc
);
2246 while (oc
&& !object_class_is_abstract(oc
)) {
2247 oc
= object_class_get_parent(oc
);
2251 return POWERPC_CPU_CLASS(oc
);
2254 static int kvm_ppc_register_host_cpu_type(void)
2256 TypeInfo type_info
= {
2257 .name
= TYPE_HOST_POWERPC_CPU
,
2258 .instance_init
= kvmppc_host_cpu_initfn
,
2259 .class_init
= kvmppc_host_cpu_class_init
,
2261 uint32_t host_pvr
= mfpvr();
2262 PowerPCCPUClass
*pvr_pcc
;
2265 pvr_pcc
= ppc_cpu_class_by_pvr(host_pvr
);
2266 if (pvr_pcc
== NULL
) {
2267 pvr_pcc
= ppc_cpu_class_by_pvr_mask(host_pvr
);
2269 if (pvr_pcc
== NULL
) {
2272 type_info
.parent
= object_class_get_name(OBJECT_CLASS(pvr_pcc
));
2273 type_register(&type_info
);
2275 /* Register generic family CPU class for a family */
2276 pvr_pcc
= ppc_cpu_get_family_class(pvr_pcc
);
2277 dc
= DEVICE_CLASS(pvr_pcc
);
2278 type_info
.parent
= object_class_get_name(OBJECT_CLASS(pvr_pcc
));
2279 type_info
.name
= g_strdup_printf("%s-"TYPE_POWERPC_CPU
, dc
->desc
);
2280 type_register(&type_info
);
2285 int kvmppc_define_rtas_kernel_token(uint32_t token
, const char *function
)
2287 struct kvm_rtas_token_args args
= {
2291 if (!kvm_check_extension(kvm_state
, KVM_CAP_PPC_RTAS
)) {
2295 strncpy(args
.name
, function
, sizeof(args
.name
));
2297 return kvm_vm_ioctl(kvm_state
, KVM_PPC_RTAS_DEFINE_TOKEN
, &args
);
2300 int kvmppc_get_htab_fd(bool write
)
2302 struct kvm_get_htab_fd s
= {
2303 .flags
= write
? KVM_GET_HTAB_WRITE
: 0,
2308 fprintf(stderr
, "KVM version doesn't support saving the hash table\n");
2312 return kvm_vm_ioctl(kvm_state
, KVM_PPC_GET_HTAB_FD
, &s
);
2315 int kvmppc_save_htab(QEMUFile
*f
, int fd
, size_t bufsize
, int64_t max_ns
)
2317 int64_t starttime
= qemu_clock_get_ns(QEMU_CLOCK_REALTIME
);
2318 uint8_t buf
[bufsize
];
2322 rc
= read(fd
, buf
, bufsize
);
2324 fprintf(stderr
, "Error reading data from KVM HTAB fd: %s\n",
2328 uint8_t *buffer
= buf
;
2331 struct kvm_get_htab_header
*head
=
2332 (struct kvm_get_htab_header
*) buffer
;
2333 size_t chunksize
= sizeof(*head
) +
2334 HASH_PTE_SIZE_64
* head
->n_valid
;
2336 qemu_put_be32(f
, head
->index
);
2337 qemu_put_be16(f
, head
->n_valid
);
2338 qemu_put_be16(f
, head
->n_invalid
);
2339 qemu_put_buffer(f
, (void *)(head
+ 1),
2340 HASH_PTE_SIZE_64
* head
->n_valid
);
2342 buffer
+= chunksize
;
2348 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME
) - starttime
) < max_ns
)));
2350 return (rc
== 0) ? 1 : 0;
2353 int kvmppc_load_htab_chunk(QEMUFile
*f
, int fd
, uint32_t index
,
2354 uint16_t n_valid
, uint16_t n_invalid
)
2356 struct kvm_get_htab_header
*buf
;
2357 size_t chunksize
= sizeof(*buf
) + n_valid
*HASH_PTE_SIZE_64
;
2360 buf
= alloca(chunksize
);
2362 buf
->n_valid
= n_valid
;
2363 buf
->n_invalid
= n_invalid
;
2365 qemu_get_buffer(f
, (void *)(buf
+ 1), HASH_PTE_SIZE_64
*n_valid
);
2367 rc
= write(fd
, buf
, chunksize
);
2369 fprintf(stderr
, "Error writing KVM hash table: %s\n",
2373 if (rc
!= chunksize
) {
2374 /* We should never get a short write on a single chunk */
2375 fprintf(stderr
, "Short write, restoring KVM hash table\n");
2381 bool kvm_arch_stop_on_emulation_error(CPUState
*cpu
)
2386 int kvm_arch_on_sigbus_vcpu(CPUState
*cpu
, int code
, void *addr
)
2391 int kvm_arch_on_sigbus(int code
, void *addr
)
2396 void kvm_arch_init_irq_routing(KVMState
*s
)
2400 struct kvm_get_htab_buf
{
2401 struct kvm_get_htab_header header
;
2403 * We require one extra byte for read
2405 target_ulong hpte
[(HPTES_PER_GROUP
* 2) + 1];
2408 uint64_t kvmppc_hash64_read_pteg(PowerPCCPU
*cpu
, target_ulong pte_index
)
2411 struct kvm_get_htab_fd ghf
;
2412 struct kvm_get_htab_buf
*hpte_buf
;
2415 ghf
.start_index
= pte_index
;
2416 htab_fd
= kvm_vm_ioctl(kvm_state
, KVM_PPC_GET_HTAB_FD
, &ghf
);
2421 hpte_buf
= g_malloc0(sizeof(*hpte_buf
));
2423 * Read the hpte group
2425 if (read(htab_fd
, hpte_buf
, sizeof(*hpte_buf
)) < 0) {
2430 return (uint64_t)(uintptr_t) hpte_buf
->hpte
;
2439 void kvmppc_hash64_free_pteg(uint64_t token
)
2441 struct kvm_get_htab_buf
*htab_buf
;
2443 htab_buf
= container_of((void *)(uintptr_t) token
, struct kvm_get_htab_buf
,
2449 void kvmppc_hash64_write_pte(CPUPPCState
*env
, target_ulong pte_index
,
2450 target_ulong pte0
, target_ulong pte1
)
2453 struct kvm_get_htab_fd ghf
;
2454 struct kvm_get_htab_buf hpte_buf
;
2457 ghf
.start_index
= 0; /* Ignored */
2458 htab_fd
= kvm_vm_ioctl(kvm_state
, KVM_PPC_GET_HTAB_FD
, &ghf
);
2463 hpte_buf
.header
.n_valid
= 1;
2464 hpte_buf
.header
.n_invalid
= 0;
2465 hpte_buf
.header
.index
= pte_index
;
2466 hpte_buf
.hpte
[0] = pte0
;
2467 hpte_buf
.hpte
[1] = pte1
;
2469 * Write the hpte entry.
2470 * CAUTION: write() has the warn_unused_result attribute. Hence we
2471 * need to check the return value, even though we do nothing.
2473 if (write(htab_fd
, &hpte_buf
, sizeof(hpte_buf
)) < 0) {
2485 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry
*route
,
2486 uint64_t address
, uint32_t data
)
2491 int kvm_arch_msi_data_to_gsi(uint32_t data
)
2493 return data
& 0xffff;
2496 int kvmppc_enable_hwrng(void)
2498 if (!kvm_enabled() || !kvm_check_extension(kvm_state
, KVM_CAP_PPC_HWRNG
)) {
2502 return kvmppc_enable_hcall(kvm_state
, H_RANDOM
);