2 * PowerPC implementation of KVM hooks
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
17 #include "qemu/osdep.h"
19 #include <sys/ioctl.h>
22 #include <linux/kvm.h>
24 #include "qemu-common.h"
25 #include "qemu/error-report.h"
27 #include "qemu/timer.h"
28 #include "sysemu/sysemu.h"
29 #include "sysemu/kvm.h"
30 #include "sysemu/numa.h"
32 #include "sysemu/cpus.h"
33 #include "sysemu/device_tree.h"
34 #include "mmu-hash64.h"
36 #include "hw/sysbus.h"
37 #include "hw/ppc/spapr.h"
38 #include "hw/ppc/spapr_vio.h"
39 #include "hw/ppc/ppc.h"
40 #include "sysemu/watchdog.h"
42 #include "exec/gdbstub.h"
43 #include "exec/memattrs.h"
44 #include "sysemu/hostmem.h"
45 #include "qemu/cutils.h"
46 #if defined(TARGET_PPC64)
47 #include "hw/ppc/spapr_cpu_core.h"
53 #define DPRINTF(fmt, ...) \
54 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
56 #define DPRINTF(fmt, ...) \
60 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
62 const KVMCapabilityInfo kvm_arch_required_capabilities
[] = {
66 static int cap_interrupt_unset
= false;
67 static int cap_interrupt_level
= false;
68 static int cap_segstate
;
69 static int cap_booke_sregs
;
70 static int cap_ppc_smt
;
71 static int cap_ppc_rma
;
72 static int cap_spapr_tce
;
73 static int cap_spapr_multitce
;
74 static int cap_spapr_vfio
;
76 static int cap_one_reg
;
78 static int cap_ppc_watchdog
;
80 static int cap_htab_fd
;
81 static int cap_fixup_hcalls
;
83 static uint32_t debug_inst_opcode
;
85 /* XXX We have a race condition where we actually have a level triggered
86 * interrupt, but the infrastructure can't expose that yet, so the guest
87 * takes but ignores it, goes to sleep and never gets notified that there's
88 * still an interrupt pending.
90 * As a quick workaround, let's just wake up again 20 ms after we injected
91 * an interrupt. That way we can assure that we're always reinjecting
92 * interrupts in case the guest swallowed them.
94 static QEMUTimer
*idle_timer
;
96 static void kvm_kick_cpu(void *opaque
)
98 PowerPCCPU
*cpu
= opaque
;
100 qemu_cpu_kick(CPU(cpu
));
103 static int kvm_ppc_register_host_cpu_type(void);
105 int kvm_arch_init(MachineState
*ms
, KVMState
*s
)
107 cap_interrupt_unset
= kvm_check_extension(s
, KVM_CAP_PPC_UNSET_IRQ
);
108 cap_interrupt_level
= kvm_check_extension(s
, KVM_CAP_PPC_IRQ_LEVEL
);
109 cap_segstate
= kvm_check_extension(s
, KVM_CAP_PPC_SEGSTATE
);
110 cap_booke_sregs
= kvm_check_extension(s
, KVM_CAP_PPC_BOOKE_SREGS
);
111 cap_ppc_smt
= kvm_check_extension(s
, KVM_CAP_PPC_SMT
);
112 cap_ppc_rma
= kvm_check_extension(s
, KVM_CAP_PPC_RMA
);
113 cap_spapr_tce
= kvm_check_extension(s
, KVM_CAP_SPAPR_TCE
);
114 cap_spapr_multitce
= kvm_check_extension(s
, KVM_CAP_SPAPR_MULTITCE
);
115 cap_spapr_vfio
= false;
116 cap_one_reg
= kvm_check_extension(s
, KVM_CAP_ONE_REG
);
117 cap_hior
= kvm_check_extension(s
, KVM_CAP_PPC_HIOR
);
118 cap_epr
= kvm_check_extension(s
, KVM_CAP_PPC_EPR
);
119 cap_ppc_watchdog
= kvm_check_extension(s
, KVM_CAP_PPC_BOOKE_WATCHDOG
);
120 /* Note: we don't set cap_papr here, because this capability is
121 * only activated after this by kvmppc_set_papr() */
122 cap_htab_fd
= kvm_check_extension(s
, KVM_CAP_PPC_HTAB_FD
);
123 cap_fixup_hcalls
= kvm_check_extension(s
, KVM_CAP_PPC_FIXUP_HCALL
);
125 if (!cap_interrupt_level
) {
126 fprintf(stderr
, "KVM: Couldn't find level irq capability. Expect the "
127 "VM to stall at times!\n");
130 kvm_ppc_register_host_cpu_type();
135 static int kvm_arch_sync_sregs(PowerPCCPU
*cpu
)
137 CPUPPCState
*cenv
= &cpu
->env
;
138 CPUState
*cs
= CPU(cpu
);
139 struct kvm_sregs sregs
;
142 if (cenv
->excp_model
== POWERPC_EXCP_BOOKE
) {
143 /* What we're really trying to say is "if we're on BookE, we use
144 the native PVR for now". This is the only sane way to check
145 it though, so we potentially confuse users that they can run
146 BookE guests on BookS. Let's hope nobody dares enough :) */
150 fprintf(stderr
, "kvm error: missing PVR setting capability\n");
155 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_SREGS
, &sregs
);
160 sregs
.pvr
= cenv
->spr
[SPR_PVR
];
161 return kvm_vcpu_ioctl(cs
, KVM_SET_SREGS
, &sregs
);
164 /* Set up a shared TLB array with KVM */
165 static int kvm_booke206_tlb_init(PowerPCCPU
*cpu
)
167 CPUPPCState
*env
= &cpu
->env
;
168 CPUState
*cs
= CPU(cpu
);
169 struct kvm_book3e_206_tlb_params params
= {};
170 struct kvm_config_tlb cfg
= {};
171 unsigned int entries
= 0;
174 if (!kvm_enabled() ||
175 !kvm_check_extension(cs
->kvm_state
, KVM_CAP_SW_TLB
)) {
179 assert(ARRAY_SIZE(params
.tlb_sizes
) == BOOKE206_MAX_TLBN
);
181 for (i
= 0; i
< BOOKE206_MAX_TLBN
; i
++) {
182 params
.tlb_sizes
[i
] = booke206_tlb_size(env
, i
);
183 params
.tlb_ways
[i
] = booke206_tlb_ways(env
, i
);
184 entries
+= params
.tlb_sizes
[i
];
187 assert(entries
== env
->nb_tlb
);
188 assert(sizeof(struct kvm_book3e_206_tlb_entry
) == sizeof(ppcmas_tlb_t
));
190 env
->tlb_dirty
= true;
192 cfg
.array
= (uintptr_t)env
->tlb
.tlbm
;
193 cfg
.array_len
= sizeof(ppcmas_tlb_t
) * entries
;
194 cfg
.params
= (uintptr_t)¶ms
;
195 cfg
.mmu_type
= KVM_MMU_FSL_BOOKE_NOHV
;
197 ret
= kvm_vcpu_enable_cap(cs
, KVM_CAP_SW_TLB
, 0, (uintptr_t)&cfg
);
199 fprintf(stderr
, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
200 __func__
, strerror(-ret
));
204 env
->kvm_sw_tlb
= true;
209 #if defined(TARGET_PPC64)
210 static void kvm_get_fallback_smmu_info(PowerPCCPU
*cpu
,
211 struct kvm_ppc_smmu_info
*info
)
213 CPUPPCState
*env
= &cpu
->env
;
214 CPUState
*cs
= CPU(cpu
);
216 memset(info
, 0, sizeof(*info
));
218 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
219 * need to "guess" what the supported page sizes are.
221 * For that to work we make a few assumptions:
223 * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
224 * KVM which only supports 4K and 16M pages, but supports them
225 * regardless of the backing store characteritics. We also don't
226 * support 1T segments.
228 * This is safe as if HV KVM ever supports that capability or PR
229 * KVM grows supports for more page/segment sizes, those versions
230 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
231 * will not hit this fallback
233 * - Else we are running HV KVM. This means we only support page
234 * sizes that fit in the backing store. Additionally we only
235 * advertize 64K pages if the processor is ARCH 2.06 and we assume
236 * P7 encodings for the SLB and hash table. Here too, we assume
237 * support for any newer processor will mean a kernel that
238 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
241 if (kvm_check_extension(cs
->kvm_state
, KVM_CAP_PPC_GET_PVINFO
)) {
246 /* Standard 4k base page size segment */
247 info
->sps
[0].page_shift
= 12;
248 info
->sps
[0].slb_enc
= 0;
249 info
->sps
[0].enc
[0].page_shift
= 12;
250 info
->sps
[0].enc
[0].pte_enc
= 0;
252 /* Standard 16M large page size segment */
253 info
->sps
[1].page_shift
= 24;
254 info
->sps
[1].slb_enc
= SLB_VSID_L
;
255 info
->sps
[1].enc
[0].page_shift
= 24;
256 info
->sps
[1].enc
[0].pte_enc
= 0;
260 /* HV KVM has backing store size restrictions */
261 info
->flags
= KVM_PPC_PAGE_SIZES_REAL
;
263 if (env
->mmu_model
& POWERPC_MMU_1TSEG
) {
264 info
->flags
|= KVM_PPC_1T_SEGMENTS
;
267 if (env
->mmu_model
== POWERPC_MMU_2_06
||
268 env
->mmu_model
== POWERPC_MMU_2_07
) {
274 /* Standard 4k base page size segment */
275 info
->sps
[i
].page_shift
= 12;
276 info
->sps
[i
].slb_enc
= 0;
277 info
->sps
[i
].enc
[0].page_shift
= 12;
278 info
->sps
[i
].enc
[0].pte_enc
= 0;
281 /* 64K on MMU 2.06 and later */
282 if (env
->mmu_model
== POWERPC_MMU_2_06
||
283 env
->mmu_model
== POWERPC_MMU_2_07
) {
284 info
->sps
[i
].page_shift
= 16;
285 info
->sps
[i
].slb_enc
= 0x110;
286 info
->sps
[i
].enc
[0].page_shift
= 16;
287 info
->sps
[i
].enc
[0].pte_enc
= 1;
291 /* Standard 16M large page size segment */
292 info
->sps
[i
].page_shift
= 24;
293 info
->sps
[i
].slb_enc
= SLB_VSID_L
;
294 info
->sps
[i
].enc
[0].page_shift
= 24;
295 info
->sps
[i
].enc
[0].pte_enc
= 0;
299 static void kvm_get_smmu_info(PowerPCCPU
*cpu
, struct kvm_ppc_smmu_info
*info
)
301 CPUState
*cs
= CPU(cpu
);
304 if (kvm_check_extension(cs
->kvm_state
, KVM_CAP_PPC_GET_SMMU_INFO
)) {
305 ret
= kvm_vm_ioctl(cs
->kvm_state
, KVM_PPC_GET_SMMU_INFO
, info
);
311 kvm_get_fallback_smmu_info(cpu
, info
);
314 static long gethugepagesize(const char *mem_path
)
320 ret
= statfs(mem_path
, &fs
);
321 } while (ret
!= 0 && errno
== EINTR
);
324 fprintf(stderr
, "Couldn't statfs() memory path: %s\n",
329 #define HUGETLBFS_MAGIC 0x958458f6
331 if (fs
.f_type
!= HUGETLBFS_MAGIC
) {
332 /* Explicit mempath, but it's ordinary pages */
333 return getpagesize();
336 /* It's hugepage, return the huge page size */
341 * FIXME TOCTTOU: this iterates over memory backends' mem-path, which
342 * may or may not name the same files / on the same filesystem now as
343 * when we actually open and map them. Iterate over the file
344 * descriptors instead, and use qemu_fd_getpagesize().
346 static int find_max_supported_pagesize(Object
*obj
, void *opaque
)
349 long *hpsize_min
= opaque
;
351 if (object_dynamic_cast(obj
, TYPE_MEMORY_BACKEND
)) {
352 mem_path
= object_property_get_str(obj
, "mem-path", NULL
);
354 long hpsize
= gethugepagesize(mem_path
);
355 if (hpsize
< *hpsize_min
) {
356 *hpsize_min
= hpsize
;
359 *hpsize_min
= getpagesize();
366 static long getrampagesize(void)
368 long hpsize
= LONG_MAX
;
369 long mainrampagesize
;
373 mainrampagesize
= gethugepagesize(mem_path
);
375 mainrampagesize
= getpagesize();
378 /* it's possible we have memory-backend objects with
379 * hugepage-backed RAM. these may get mapped into system
380 * address space via -numa parameters or memory hotplug
381 * hooks. we want to take these into account, but we
382 * also want to make sure these supported hugepage
383 * sizes are applicable across the entire range of memory
384 * we may boot from, so we take the min across all
385 * backends, and assume normal pages in cases where a
386 * backend isn't backed by hugepages.
388 memdev_root
= object_resolve_path("/objects", NULL
);
390 object_child_foreach(memdev_root
, find_max_supported_pagesize
, &hpsize
);
392 if (hpsize
== LONG_MAX
) {
393 /* No additional memory regions found ==> Report main RAM page size */
394 return mainrampagesize
;
397 /* If NUMA is disabled or the NUMA nodes are not backed with a
398 * memory-backend, then there is at least one node using "normal" RAM,
399 * so if its page size is smaller we have got to report that size instead.
401 if (hpsize
> mainrampagesize
&&
402 (nb_numa_nodes
== 0 || numa_info
[0].node_memdev
== NULL
)) {
405 error_report("Huge page support disabled (n/a for main memory).");
408 return mainrampagesize
;
414 static bool kvm_valid_page_size(uint32_t flags
, long rampgsize
, uint32_t shift
)
416 if (!(flags
& KVM_PPC_PAGE_SIZES_REAL
)) {
420 return (1ul << shift
) <= rampgsize
;
423 static void kvm_fixup_page_sizes(PowerPCCPU
*cpu
)
425 static struct kvm_ppc_smmu_info smmu_info
;
426 static bool has_smmu_info
;
427 CPUPPCState
*env
= &cpu
->env
;
431 /* We only handle page sizes for 64-bit server guests for now */
432 if (!(env
->mmu_model
& POWERPC_MMU_64
)) {
436 /* Collect MMU info from kernel if not already */
437 if (!has_smmu_info
) {
438 kvm_get_smmu_info(cpu
, &smmu_info
);
439 has_smmu_info
= true;
442 rampagesize
= getrampagesize();
444 /* Convert to QEMU form */
445 memset(&env
->sps
, 0, sizeof(env
->sps
));
447 /* If we have HV KVM, we need to forbid CI large pages if our
448 * host page size is smaller than 64K.
450 if (smmu_info
.flags
& KVM_PPC_PAGE_SIZES_REAL
) {
451 env
->ci_large_pages
= getpagesize() >= 0x10000;
455 * XXX This loop should be an entry wide AND of the capabilities that
456 * the selected CPU has with the capabilities that KVM supports.
458 for (ik
= iq
= 0; ik
< KVM_PPC_PAGE_SIZES_MAX_SZ
; ik
++) {
459 struct ppc_one_seg_page_size
*qsps
= &env
->sps
.sps
[iq
];
460 struct kvm_ppc_one_seg_page_size
*ksps
= &smmu_info
.sps
[ik
];
462 if (!kvm_valid_page_size(smmu_info
.flags
, rampagesize
,
466 qsps
->page_shift
= ksps
->page_shift
;
467 qsps
->slb_enc
= ksps
->slb_enc
;
468 for (jk
= jq
= 0; jk
< KVM_PPC_PAGE_SIZES_MAX_SZ
; jk
++) {
469 if (!kvm_valid_page_size(smmu_info
.flags
, rampagesize
,
470 ksps
->enc
[jk
].page_shift
)) {
473 qsps
->enc
[jq
].page_shift
= ksps
->enc
[jk
].page_shift
;
474 qsps
->enc
[jq
].pte_enc
= ksps
->enc
[jk
].pte_enc
;
475 if (++jq
>= PPC_PAGE_SIZES_MAX_SZ
) {
479 if (++iq
>= PPC_PAGE_SIZES_MAX_SZ
) {
483 env
->slb_nr
= smmu_info
.slb_size
;
484 if (!(smmu_info
.flags
& KVM_PPC_1T_SEGMENTS
)) {
485 env
->mmu_model
&= ~POWERPC_MMU_1TSEG
;
488 #else /* defined (TARGET_PPC64) */
490 static inline void kvm_fixup_page_sizes(PowerPCCPU
*cpu
)
494 #endif /* !defined (TARGET_PPC64) */
496 unsigned long kvm_arch_vcpu_id(CPUState
*cpu
)
498 return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu
));
501 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
502 * book3s supports only 1 watchpoint, so array size
503 * of 4 is sufficient for now.
505 #define MAX_HW_BKPTS 4
507 static struct HWBreakpoint
{
510 } hw_debug_points
[MAX_HW_BKPTS
];
512 static CPUWatchpoint hw_watchpoint
;
514 /* Default there is no breakpoint and watchpoint supported */
515 static int max_hw_breakpoint
;
516 static int max_hw_watchpoint
;
517 static int nb_hw_breakpoint
;
518 static int nb_hw_watchpoint
;
520 static void kvmppc_hw_debug_points_init(CPUPPCState
*cenv
)
522 if (cenv
->excp_model
== POWERPC_EXCP_BOOKE
) {
523 max_hw_breakpoint
= 2;
524 max_hw_watchpoint
= 2;
527 if ((max_hw_breakpoint
+ max_hw_watchpoint
) > MAX_HW_BKPTS
) {
528 fprintf(stderr
, "Error initializing h/w breakpoints\n");
533 int kvm_arch_init_vcpu(CPUState
*cs
)
535 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
536 CPUPPCState
*cenv
= &cpu
->env
;
539 /* Gather server mmu info from KVM and update the CPU state */
540 kvm_fixup_page_sizes(cpu
);
542 /* Synchronize sregs with kvm */
543 ret
= kvm_arch_sync_sregs(cpu
);
545 if (ret
== -EINVAL
) {
546 error_report("Register sync failed... If you're using kvm-hv.ko,"
547 " only \"-cpu host\" is possible");
552 idle_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL
, kvm_kick_cpu
, cpu
);
554 /* Some targets support access to KVM's guest TLB. */
555 switch (cenv
->mmu_model
) {
556 case POWERPC_MMU_BOOKE206
:
557 ret
= kvm_booke206_tlb_init(cpu
);
563 kvm_get_one_reg(cs
, KVM_REG_PPC_DEBUG_INST
, &debug_inst_opcode
);
564 kvmppc_hw_debug_points_init(cenv
);
569 static void kvm_sw_tlb_put(PowerPCCPU
*cpu
)
571 CPUPPCState
*env
= &cpu
->env
;
572 CPUState
*cs
= CPU(cpu
);
573 struct kvm_dirty_tlb dirty_tlb
;
574 unsigned char *bitmap
;
577 if (!env
->kvm_sw_tlb
) {
581 bitmap
= g_malloc((env
->nb_tlb
+ 7) / 8);
582 memset(bitmap
, 0xFF, (env
->nb_tlb
+ 7) / 8);
584 dirty_tlb
.bitmap
= (uintptr_t)bitmap
;
585 dirty_tlb
.num_dirty
= env
->nb_tlb
;
587 ret
= kvm_vcpu_ioctl(cs
, KVM_DIRTY_TLB
, &dirty_tlb
);
589 fprintf(stderr
, "%s: KVM_DIRTY_TLB: %s\n",
590 __func__
, strerror(-ret
));
596 static void kvm_get_one_spr(CPUState
*cs
, uint64_t id
, int spr
)
598 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
599 CPUPPCState
*env
= &cpu
->env
;
604 struct kvm_one_reg reg
= {
606 .addr
= (uintptr_t) &val
,
610 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
612 trace_kvm_failed_spr_get(spr
, strerror(errno
));
614 switch (id
& KVM_REG_SIZE_MASK
) {
615 case KVM_REG_SIZE_U32
:
616 env
->spr
[spr
] = val
.u32
;
619 case KVM_REG_SIZE_U64
:
620 env
->spr
[spr
] = val
.u64
;
624 /* Don't handle this size yet */
630 static void kvm_put_one_spr(CPUState
*cs
, uint64_t id
, int spr
)
632 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
633 CPUPPCState
*env
= &cpu
->env
;
638 struct kvm_one_reg reg
= {
640 .addr
= (uintptr_t) &val
,
644 switch (id
& KVM_REG_SIZE_MASK
) {
645 case KVM_REG_SIZE_U32
:
646 val
.u32
= env
->spr
[spr
];
649 case KVM_REG_SIZE_U64
:
650 val
.u64
= env
->spr
[spr
];
654 /* Don't handle this size yet */
658 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
660 trace_kvm_failed_spr_set(spr
, strerror(errno
));
664 static int kvm_put_fp(CPUState
*cs
)
666 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
667 CPUPPCState
*env
= &cpu
->env
;
668 struct kvm_one_reg reg
;
672 if (env
->insns_flags
& PPC_FLOAT
) {
673 uint64_t fpscr
= env
->fpscr
;
674 bool vsx
= !!(env
->insns_flags2
& PPC2_VSX
);
676 reg
.id
= KVM_REG_PPC_FPSCR
;
677 reg
.addr
= (uintptr_t)&fpscr
;
678 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
680 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno
));
684 for (i
= 0; i
< 32; i
++) {
687 #ifdef HOST_WORDS_BIGENDIAN
688 vsr
[0] = float64_val(env
->fpr
[i
]);
689 vsr
[1] = env
->vsr
[i
];
691 vsr
[0] = env
->vsr
[i
];
692 vsr
[1] = float64_val(env
->fpr
[i
]);
694 reg
.addr
= (uintptr_t) &vsr
;
695 reg
.id
= vsx
? KVM_REG_PPC_VSR(i
) : KVM_REG_PPC_FPR(i
);
697 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
699 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx
? "VSR" : "FPR",
706 if (env
->insns_flags
& PPC_ALTIVEC
) {
707 reg
.id
= KVM_REG_PPC_VSCR
;
708 reg
.addr
= (uintptr_t)&env
->vscr
;
709 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
711 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno
));
715 for (i
= 0; i
< 32; i
++) {
716 reg
.id
= KVM_REG_PPC_VR(i
);
717 reg
.addr
= (uintptr_t)&env
->avr
[i
];
718 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
720 DPRINTF("Unable to set VR%d to KVM: %s\n", i
, strerror(errno
));
729 static int kvm_get_fp(CPUState
*cs
)
731 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
732 CPUPPCState
*env
= &cpu
->env
;
733 struct kvm_one_reg reg
;
737 if (env
->insns_flags
& PPC_FLOAT
) {
739 bool vsx
= !!(env
->insns_flags2
& PPC2_VSX
);
741 reg
.id
= KVM_REG_PPC_FPSCR
;
742 reg
.addr
= (uintptr_t)&fpscr
;
743 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
745 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno
));
751 for (i
= 0; i
< 32; i
++) {
754 reg
.addr
= (uintptr_t) &vsr
;
755 reg
.id
= vsx
? KVM_REG_PPC_VSR(i
) : KVM_REG_PPC_FPR(i
);
757 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
759 DPRINTF("Unable to get %s%d from KVM: %s\n",
760 vsx
? "VSR" : "FPR", i
, strerror(errno
));
763 #ifdef HOST_WORDS_BIGENDIAN
764 env
->fpr
[i
] = vsr
[0];
766 env
->vsr
[i
] = vsr
[1];
769 env
->fpr
[i
] = vsr
[1];
771 env
->vsr
[i
] = vsr
[0];
778 if (env
->insns_flags
& PPC_ALTIVEC
) {
779 reg
.id
= KVM_REG_PPC_VSCR
;
780 reg
.addr
= (uintptr_t)&env
->vscr
;
781 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
783 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno
));
787 for (i
= 0; i
< 32; i
++) {
788 reg
.id
= KVM_REG_PPC_VR(i
);
789 reg
.addr
= (uintptr_t)&env
->avr
[i
];
790 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
792 DPRINTF("Unable to get VR%d from KVM: %s\n",
802 #if defined(TARGET_PPC64)
803 static int kvm_get_vpa(CPUState
*cs
)
805 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
806 CPUPPCState
*env
= &cpu
->env
;
807 struct kvm_one_reg reg
;
810 reg
.id
= KVM_REG_PPC_VPA_ADDR
;
811 reg
.addr
= (uintptr_t)&env
->vpa_addr
;
812 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
814 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno
));
818 assert((uintptr_t)&env
->slb_shadow_size
819 == ((uintptr_t)&env
->slb_shadow_addr
+ 8));
820 reg
.id
= KVM_REG_PPC_VPA_SLB
;
821 reg
.addr
= (uintptr_t)&env
->slb_shadow_addr
;
822 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
824 DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
829 assert((uintptr_t)&env
->dtl_size
== ((uintptr_t)&env
->dtl_addr
+ 8));
830 reg
.id
= KVM_REG_PPC_VPA_DTL
;
831 reg
.addr
= (uintptr_t)&env
->dtl_addr
;
832 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
834 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
842 static int kvm_put_vpa(CPUState
*cs
)
844 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
845 CPUPPCState
*env
= &cpu
->env
;
846 struct kvm_one_reg reg
;
849 /* SLB shadow or DTL can't be registered unless a master VPA is
850 * registered. That means when restoring state, if a VPA *is*
851 * registered, we need to set that up first. If not, we need to
852 * deregister the others before deregistering the master VPA */
853 assert(env
->vpa_addr
|| !(env
->slb_shadow_addr
|| env
->dtl_addr
));
856 reg
.id
= KVM_REG_PPC_VPA_ADDR
;
857 reg
.addr
= (uintptr_t)&env
->vpa_addr
;
858 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
860 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno
));
865 assert((uintptr_t)&env
->slb_shadow_size
866 == ((uintptr_t)&env
->slb_shadow_addr
+ 8));
867 reg
.id
= KVM_REG_PPC_VPA_SLB
;
868 reg
.addr
= (uintptr_t)&env
->slb_shadow_addr
;
869 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
871 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno
));
875 assert((uintptr_t)&env
->dtl_size
== ((uintptr_t)&env
->dtl_addr
+ 8));
876 reg
.id
= KVM_REG_PPC_VPA_DTL
;
877 reg
.addr
= (uintptr_t)&env
->dtl_addr
;
878 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
880 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
885 if (!env
->vpa_addr
) {
886 reg
.id
= KVM_REG_PPC_VPA_ADDR
;
887 reg
.addr
= (uintptr_t)&env
->vpa_addr
;
888 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
890 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno
));
897 #endif /* TARGET_PPC64 */
899 int kvmppc_put_books_sregs(PowerPCCPU
*cpu
)
901 CPUPPCState
*env
= &cpu
->env
;
902 struct kvm_sregs sregs
;
905 sregs
.pvr
= env
->spr
[SPR_PVR
];
907 sregs
.u
.s
.sdr1
= env
->spr
[SPR_SDR1
];
911 for (i
= 0; i
< ARRAY_SIZE(env
->slb
); i
++) {
912 sregs
.u
.s
.ppc64
.slb
[i
].slbe
= env
->slb
[i
].esid
;
913 if (env
->slb
[i
].esid
& SLB_ESID_V
) {
914 sregs
.u
.s
.ppc64
.slb
[i
].slbe
|= i
;
916 sregs
.u
.s
.ppc64
.slb
[i
].slbv
= env
->slb
[i
].vsid
;
921 for (i
= 0; i
< 16; i
++) {
922 sregs
.u
.s
.ppc32
.sr
[i
] = env
->sr
[i
];
926 for (i
= 0; i
< 8; i
++) {
927 /* Beware. We have to swap upper and lower bits here */
928 sregs
.u
.s
.ppc32
.dbat
[i
] = ((uint64_t)env
->DBAT
[0][i
] << 32)
930 sregs
.u
.s
.ppc32
.ibat
[i
] = ((uint64_t)env
->IBAT
[0][i
] << 32)
934 return kvm_vcpu_ioctl(CPU(cpu
), KVM_SET_SREGS
, &sregs
);
937 int kvm_arch_put_registers(CPUState
*cs
, int level
)
939 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
940 CPUPPCState
*env
= &cpu
->env
;
941 struct kvm_regs regs
;
945 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_REGS
, ®s
);
952 regs
.xer
= cpu_read_xer(env
);
956 regs
.srr0
= env
->spr
[SPR_SRR0
];
957 regs
.srr1
= env
->spr
[SPR_SRR1
];
959 regs
.sprg0
= env
->spr
[SPR_SPRG0
];
960 regs
.sprg1
= env
->spr
[SPR_SPRG1
];
961 regs
.sprg2
= env
->spr
[SPR_SPRG2
];
962 regs
.sprg3
= env
->spr
[SPR_SPRG3
];
963 regs
.sprg4
= env
->spr
[SPR_SPRG4
];
964 regs
.sprg5
= env
->spr
[SPR_SPRG5
];
965 regs
.sprg6
= env
->spr
[SPR_SPRG6
];
966 regs
.sprg7
= env
->spr
[SPR_SPRG7
];
968 regs
.pid
= env
->spr
[SPR_BOOKE_PID
];
970 for (i
= 0;i
< 32; i
++)
971 regs
.gpr
[i
] = env
->gpr
[i
];
974 for (i
= 0; i
< 8; i
++) {
975 regs
.cr
|= (env
->crf
[i
] & 15) << (4 * (7 - i
));
978 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_REGS
, ®s
);
984 if (env
->tlb_dirty
) {
986 env
->tlb_dirty
= false;
989 if (cap_segstate
&& (level
>= KVM_PUT_RESET_STATE
)) {
990 ret
= kvmppc_put_books_sregs(cpu
);
996 if (cap_hior
&& (level
>= KVM_PUT_RESET_STATE
)) {
997 kvm_put_one_spr(cs
, KVM_REG_PPC_HIOR
, SPR_HIOR
);
1003 /* We deliberately ignore errors here, for kernels which have
1004 * the ONE_REG calls, but don't support the specific
1005 * registers, there's a reasonable chance things will still
1006 * work, at least until we try to migrate. */
1007 for (i
= 0; i
< 1024; i
++) {
1008 uint64_t id
= env
->spr_cb
[i
].one_reg_id
;
1011 kvm_put_one_spr(cs
, id
, i
);
1017 for (i
= 0; i
< ARRAY_SIZE(env
->tm_gpr
); i
++) {
1018 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_GPR(i
), &env
->tm_gpr
[i
]);
1020 for (i
= 0; i
< ARRAY_SIZE(env
->tm_vsr
); i
++) {
1021 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_VSR(i
), &env
->tm_vsr
[i
]);
1023 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_CR
, &env
->tm_cr
);
1024 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_LR
, &env
->tm_lr
);
1025 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_CTR
, &env
->tm_ctr
);
1026 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_FPSCR
, &env
->tm_fpscr
);
1027 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_AMR
, &env
->tm_amr
);
1028 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_PPR
, &env
->tm_ppr
);
1029 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_VRSAVE
, &env
->tm_vrsave
);
1030 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_VSCR
, &env
->tm_vscr
);
1031 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_DSCR
, &env
->tm_dscr
);
1032 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_TAR
, &env
->tm_tar
);
1036 if (kvm_put_vpa(cs
) < 0) {
1037 DPRINTF("Warning: Unable to set VPA information to KVM\n");
1041 kvm_set_one_reg(cs
, KVM_REG_PPC_TB_OFFSET
, &env
->tb_env
->tb_offset
);
1042 #endif /* TARGET_PPC64 */
1048 static void kvm_sync_excp(CPUPPCState
*env
, int vector
, int ivor
)
1050 env
->excp_vectors
[vector
] = env
->spr
[ivor
] + env
->spr
[SPR_BOOKE_IVPR
];
1053 static int kvmppc_get_booke_sregs(PowerPCCPU
*cpu
)
1055 CPUPPCState
*env
= &cpu
->env
;
1056 struct kvm_sregs sregs
;
1059 ret
= kvm_vcpu_ioctl(CPU(cpu
), KVM_GET_SREGS
, &sregs
);
1064 if (sregs
.u
.e
.features
& KVM_SREGS_E_BASE
) {
1065 env
->spr
[SPR_BOOKE_CSRR0
] = sregs
.u
.e
.csrr0
;
1066 env
->spr
[SPR_BOOKE_CSRR1
] = sregs
.u
.e
.csrr1
;
1067 env
->spr
[SPR_BOOKE_ESR
] = sregs
.u
.e
.esr
;
1068 env
->spr
[SPR_BOOKE_DEAR
] = sregs
.u
.e
.dear
;
1069 env
->spr
[SPR_BOOKE_MCSR
] = sregs
.u
.e
.mcsr
;
1070 env
->spr
[SPR_BOOKE_TSR
] = sregs
.u
.e
.tsr
;
1071 env
->spr
[SPR_BOOKE_TCR
] = sregs
.u
.e
.tcr
;
1072 env
->spr
[SPR_DECR
] = sregs
.u
.e
.dec
;
1073 env
->spr
[SPR_TBL
] = sregs
.u
.e
.tb
& 0xffffffff;
1074 env
->spr
[SPR_TBU
] = sregs
.u
.e
.tb
>> 32;
1075 env
->spr
[SPR_VRSAVE
] = sregs
.u
.e
.vrsave
;
1078 if (sregs
.u
.e
.features
& KVM_SREGS_E_ARCH206
) {
1079 env
->spr
[SPR_BOOKE_PIR
] = sregs
.u
.e
.pir
;
1080 env
->spr
[SPR_BOOKE_MCSRR0
] = sregs
.u
.e
.mcsrr0
;
1081 env
->spr
[SPR_BOOKE_MCSRR1
] = sregs
.u
.e
.mcsrr1
;
1082 env
->spr
[SPR_BOOKE_DECAR
] = sregs
.u
.e
.decar
;
1083 env
->spr
[SPR_BOOKE_IVPR
] = sregs
.u
.e
.ivpr
;
1086 if (sregs
.u
.e
.features
& KVM_SREGS_E_64
) {
1087 env
->spr
[SPR_BOOKE_EPCR
] = sregs
.u
.e
.epcr
;
1090 if (sregs
.u
.e
.features
& KVM_SREGS_E_SPRG8
) {
1091 env
->spr
[SPR_BOOKE_SPRG8
] = sregs
.u
.e
.sprg8
;
1094 if (sregs
.u
.e
.features
& KVM_SREGS_E_IVOR
) {
1095 env
->spr
[SPR_BOOKE_IVOR0
] = sregs
.u
.e
.ivor_low
[0];
1096 kvm_sync_excp(env
, POWERPC_EXCP_CRITICAL
, SPR_BOOKE_IVOR0
);
1097 env
->spr
[SPR_BOOKE_IVOR1
] = sregs
.u
.e
.ivor_low
[1];
1098 kvm_sync_excp(env
, POWERPC_EXCP_MCHECK
, SPR_BOOKE_IVOR1
);
1099 env
->spr
[SPR_BOOKE_IVOR2
] = sregs
.u
.e
.ivor_low
[2];
1100 kvm_sync_excp(env
, POWERPC_EXCP_DSI
, SPR_BOOKE_IVOR2
);
1101 env
->spr
[SPR_BOOKE_IVOR3
] = sregs
.u
.e
.ivor_low
[3];
1102 kvm_sync_excp(env
, POWERPC_EXCP_ISI
, SPR_BOOKE_IVOR3
);
1103 env
->spr
[SPR_BOOKE_IVOR4
] = sregs
.u
.e
.ivor_low
[4];
1104 kvm_sync_excp(env
, POWERPC_EXCP_EXTERNAL
, SPR_BOOKE_IVOR4
);
1105 env
->spr
[SPR_BOOKE_IVOR5
] = sregs
.u
.e
.ivor_low
[5];
1106 kvm_sync_excp(env
, POWERPC_EXCP_ALIGN
, SPR_BOOKE_IVOR5
);
1107 env
->spr
[SPR_BOOKE_IVOR6
] = sregs
.u
.e
.ivor_low
[6];
1108 kvm_sync_excp(env
, POWERPC_EXCP_PROGRAM
, SPR_BOOKE_IVOR6
);
1109 env
->spr
[SPR_BOOKE_IVOR7
] = sregs
.u
.e
.ivor_low
[7];
1110 kvm_sync_excp(env
, POWERPC_EXCP_FPU
, SPR_BOOKE_IVOR7
);
1111 env
->spr
[SPR_BOOKE_IVOR8
] = sregs
.u
.e
.ivor_low
[8];
1112 kvm_sync_excp(env
, POWERPC_EXCP_SYSCALL
, SPR_BOOKE_IVOR8
);
1113 env
->spr
[SPR_BOOKE_IVOR9
] = sregs
.u
.e
.ivor_low
[9];
1114 kvm_sync_excp(env
, POWERPC_EXCP_APU
, SPR_BOOKE_IVOR9
);
1115 env
->spr
[SPR_BOOKE_IVOR10
] = sregs
.u
.e
.ivor_low
[10];
1116 kvm_sync_excp(env
, POWERPC_EXCP_DECR
, SPR_BOOKE_IVOR10
);
1117 env
->spr
[SPR_BOOKE_IVOR11
] = sregs
.u
.e
.ivor_low
[11];
1118 kvm_sync_excp(env
, POWERPC_EXCP_FIT
, SPR_BOOKE_IVOR11
);
1119 env
->spr
[SPR_BOOKE_IVOR12
] = sregs
.u
.e
.ivor_low
[12];
1120 kvm_sync_excp(env
, POWERPC_EXCP_WDT
, SPR_BOOKE_IVOR12
);
1121 env
->spr
[SPR_BOOKE_IVOR13
] = sregs
.u
.e
.ivor_low
[13];
1122 kvm_sync_excp(env
, POWERPC_EXCP_DTLB
, SPR_BOOKE_IVOR13
);
1123 env
->spr
[SPR_BOOKE_IVOR14
] = sregs
.u
.e
.ivor_low
[14];
1124 kvm_sync_excp(env
, POWERPC_EXCP_ITLB
, SPR_BOOKE_IVOR14
);
1125 env
->spr
[SPR_BOOKE_IVOR15
] = sregs
.u
.e
.ivor_low
[15];
1126 kvm_sync_excp(env
, POWERPC_EXCP_DEBUG
, SPR_BOOKE_IVOR15
);
1128 if (sregs
.u
.e
.features
& KVM_SREGS_E_SPE
) {
1129 env
->spr
[SPR_BOOKE_IVOR32
] = sregs
.u
.e
.ivor_high
[0];
1130 kvm_sync_excp(env
, POWERPC_EXCP_SPEU
, SPR_BOOKE_IVOR32
);
1131 env
->spr
[SPR_BOOKE_IVOR33
] = sregs
.u
.e
.ivor_high
[1];
1132 kvm_sync_excp(env
, POWERPC_EXCP_EFPDI
, SPR_BOOKE_IVOR33
);
1133 env
->spr
[SPR_BOOKE_IVOR34
] = sregs
.u
.e
.ivor_high
[2];
1134 kvm_sync_excp(env
, POWERPC_EXCP_EFPRI
, SPR_BOOKE_IVOR34
);
1137 if (sregs
.u
.e
.features
& KVM_SREGS_E_PM
) {
1138 env
->spr
[SPR_BOOKE_IVOR35
] = sregs
.u
.e
.ivor_high
[3];
1139 kvm_sync_excp(env
, POWERPC_EXCP_EPERFM
, SPR_BOOKE_IVOR35
);
1142 if (sregs
.u
.e
.features
& KVM_SREGS_E_PC
) {
1143 env
->spr
[SPR_BOOKE_IVOR36
] = sregs
.u
.e
.ivor_high
[4];
1144 kvm_sync_excp(env
, POWERPC_EXCP_DOORI
, SPR_BOOKE_IVOR36
);
1145 env
->spr
[SPR_BOOKE_IVOR37
] = sregs
.u
.e
.ivor_high
[5];
1146 kvm_sync_excp(env
, POWERPC_EXCP_DOORCI
, SPR_BOOKE_IVOR37
);
1150 if (sregs
.u
.e
.features
& KVM_SREGS_E_ARCH206_MMU
) {
1151 env
->spr
[SPR_BOOKE_MAS0
] = sregs
.u
.e
.mas0
;
1152 env
->spr
[SPR_BOOKE_MAS1
] = sregs
.u
.e
.mas1
;
1153 env
->spr
[SPR_BOOKE_MAS2
] = sregs
.u
.e
.mas2
;
1154 env
->spr
[SPR_BOOKE_MAS3
] = sregs
.u
.e
.mas7_3
& 0xffffffff;
1155 env
->spr
[SPR_BOOKE_MAS4
] = sregs
.u
.e
.mas4
;
1156 env
->spr
[SPR_BOOKE_MAS6
] = sregs
.u
.e
.mas6
;
1157 env
->spr
[SPR_BOOKE_MAS7
] = sregs
.u
.e
.mas7_3
>> 32;
1158 env
->spr
[SPR_MMUCFG
] = sregs
.u
.e
.mmucfg
;
1159 env
->spr
[SPR_BOOKE_TLB0CFG
] = sregs
.u
.e
.tlbcfg
[0];
1160 env
->spr
[SPR_BOOKE_TLB1CFG
] = sregs
.u
.e
.tlbcfg
[1];
1163 if (sregs
.u
.e
.features
& KVM_SREGS_EXP
) {
1164 env
->spr
[SPR_BOOKE_EPR
] = sregs
.u
.e
.epr
;
1167 if (sregs
.u
.e
.features
& KVM_SREGS_E_PD
) {
1168 env
->spr
[SPR_BOOKE_EPLC
] = sregs
.u
.e
.eplc
;
1169 env
->spr
[SPR_BOOKE_EPSC
] = sregs
.u
.e
.epsc
;
1172 if (sregs
.u
.e
.impl_id
== KVM_SREGS_E_IMPL_FSL
) {
1173 env
->spr
[SPR_E500_SVR
] = sregs
.u
.e
.impl
.fsl
.svr
;
1174 env
->spr
[SPR_Exxx_MCAR
] = sregs
.u
.e
.impl
.fsl
.mcar
;
1175 env
->spr
[SPR_HID0
] = sregs
.u
.e
.impl
.fsl
.hid0
;
1177 if (sregs
.u
.e
.impl
.fsl
.features
& KVM_SREGS_E_FSL_PIDn
) {
1178 env
->spr
[SPR_BOOKE_PID1
] = sregs
.u
.e
.impl
.fsl
.pid1
;
1179 env
->spr
[SPR_BOOKE_PID2
] = sregs
.u
.e
.impl
.fsl
.pid2
;
1186 static int kvmppc_get_books_sregs(PowerPCCPU
*cpu
)
1188 CPUPPCState
*env
= &cpu
->env
;
1189 struct kvm_sregs sregs
;
1193 ret
= kvm_vcpu_ioctl(CPU(cpu
), KVM_GET_SREGS
, &sregs
);
1198 if (!env
->external_htab
) {
1199 ppc_store_sdr1(env
, sregs
.u
.s
.sdr1
);
1205 * The packed SLB array we get from KVM_GET_SREGS only contains
1206 * information about valid entries. So we flush our internal copy
1207 * to get rid of stale ones, then put all valid SLB entries back
1210 memset(env
->slb
, 0, sizeof(env
->slb
));
1211 for (i
= 0; i
< ARRAY_SIZE(env
->slb
); i
++) {
1212 target_ulong rb
= sregs
.u
.s
.ppc64
.slb
[i
].slbe
;
1213 target_ulong rs
= sregs
.u
.s
.ppc64
.slb
[i
].slbv
;
1215 * Only restore valid entries
1217 if (rb
& SLB_ESID_V
) {
1218 ppc_store_slb(cpu
, rb
& 0xfff, rb
& ~0xfffULL
, rs
);
1224 for (i
= 0; i
< 16; i
++) {
1225 env
->sr
[i
] = sregs
.u
.s
.ppc32
.sr
[i
];
1229 for (i
= 0; i
< 8; i
++) {
1230 env
->DBAT
[0][i
] = sregs
.u
.s
.ppc32
.dbat
[i
] & 0xffffffff;
1231 env
->DBAT
[1][i
] = sregs
.u
.s
.ppc32
.dbat
[i
] >> 32;
1232 env
->IBAT
[0][i
] = sregs
.u
.s
.ppc32
.ibat
[i
] & 0xffffffff;
1233 env
->IBAT
[1][i
] = sregs
.u
.s
.ppc32
.ibat
[i
] >> 32;
1239 int kvm_arch_get_registers(CPUState
*cs
)
1241 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
1242 CPUPPCState
*env
= &cpu
->env
;
1243 struct kvm_regs regs
;
1247 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_REGS
, ®s
);
1252 for (i
= 7; i
>= 0; i
--) {
1253 env
->crf
[i
] = cr
& 15;
1257 env
->ctr
= regs
.ctr
;
1259 cpu_write_xer(env
, regs
.xer
);
1260 env
->msr
= regs
.msr
;
1263 env
->spr
[SPR_SRR0
] = regs
.srr0
;
1264 env
->spr
[SPR_SRR1
] = regs
.srr1
;
1266 env
->spr
[SPR_SPRG0
] = regs
.sprg0
;
1267 env
->spr
[SPR_SPRG1
] = regs
.sprg1
;
1268 env
->spr
[SPR_SPRG2
] = regs
.sprg2
;
1269 env
->spr
[SPR_SPRG3
] = regs
.sprg3
;
1270 env
->spr
[SPR_SPRG4
] = regs
.sprg4
;
1271 env
->spr
[SPR_SPRG5
] = regs
.sprg5
;
1272 env
->spr
[SPR_SPRG6
] = regs
.sprg6
;
1273 env
->spr
[SPR_SPRG7
] = regs
.sprg7
;
1275 env
->spr
[SPR_BOOKE_PID
] = regs
.pid
;
1277 for (i
= 0;i
< 32; i
++)
1278 env
->gpr
[i
] = regs
.gpr
[i
];
1282 if (cap_booke_sregs
) {
1283 ret
= kvmppc_get_booke_sregs(cpu
);
1290 ret
= kvmppc_get_books_sregs(cpu
);
1297 kvm_get_one_spr(cs
, KVM_REG_PPC_HIOR
, SPR_HIOR
);
1303 /* We deliberately ignore errors here, for kernels which have
1304 * the ONE_REG calls, but don't support the specific
1305 * registers, there's a reasonable chance things will still
1306 * work, at least until we try to migrate. */
1307 for (i
= 0; i
< 1024; i
++) {
1308 uint64_t id
= env
->spr_cb
[i
].one_reg_id
;
1311 kvm_get_one_spr(cs
, id
, i
);
1317 for (i
= 0; i
< ARRAY_SIZE(env
->tm_gpr
); i
++) {
1318 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_GPR(i
), &env
->tm_gpr
[i
]);
1320 for (i
= 0; i
< ARRAY_SIZE(env
->tm_vsr
); i
++) {
1321 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_VSR(i
), &env
->tm_vsr
[i
]);
1323 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_CR
, &env
->tm_cr
);
1324 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_LR
, &env
->tm_lr
);
1325 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_CTR
, &env
->tm_ctr
);
1326 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_FPSCR
, &env
->tm_fpscr
);
1327 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_AMR
, &env
->tm_amr
);
1328 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_PPR
, &env
->tm_ppr
);
1329 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_VRSAVE
, &env
->tm_vrsave
);
1330 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_VSCR
, &env
->tm_vscr
);
1331 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_DSCR
, &env
->tm_dscr
);
1332 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_TAR
, &env
->tm_tar
);
1336 if (kvm_get_vpa(cs
) < 0) {
1337 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1341 kvm_get_one_reg(cs
, KVM_REG_PPC_TB_OFFSET
, &env
->tb_env
->tb_offset
);
1348 int kvmppc_set_interrupt(PowerPCCPU
*cpu
, int irq
, int level
)
1350 unsigned virq
= level
? KVM_INTERRUPT_SET_LEVEL
: KVM_INTERRUPT_UNSET
;
1352 if (irq
!= PPC_INTERRUPT_EXT
) {
1356 if (!kvm_enabled() || !cap_interrupt_unset
|| !cap_interrupt_level
) {
1360 kvm_vcpu_ioctl(CPU(cpu
), KVM_INTERRUPT
, &virq
);
1365 #if defined(TARGET_PPCEMB)
1366 #define PPC_INPUT_INT PPC40x_INPUT_INT
1367 #elif defined(TARGET_PPC64)
1368 #define PPC_INPUT_INT PPC970_INPUT_INT
1370 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1373 void kvm_arch_pre_run(CPUState
*cs
, struct kvm_run
*run
)
1375 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
1376 CPUPPCState
*env
= &cpu
->env
;
1380 qemu_mutex_lock_iothread();
1382 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1383 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1384 if (!cap_interrupt_level
&&
1385 run
->ready_for_interrupt_injection
&&
1386 (cs
->interrupt_request
& CPU_INTERRUPT_HARD
) &&
1387 (env
->irq_input_state
& (1<<PPC_INPUT_INT
)))
1389 /* For now KVM disregards the 'irq' argument. However, in the
1390 * future KVM could cache it in-kernel to avoid a heavyweight exit
1391 * when reading the UIC.
1393 irq
= KVM_INTERRUPT_SET
;
1395 DPRINTF("injected interrupt %d\n", irq
);
1396 r
= kvm_vcpu_ioctl(cs
, KVM_INTERRUPT
, &irq
);
1398 printf("cpu %d fail inject %x\n", cs
->cpu_index
, irq
);
1401 /* Always wake up soon in case the interrupt was level based */
1402 timer_mod(idle_timer
, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) +
1403 (NANOSECONDS_PER_SECOND
/ 50));
1406 /* We don't know if there are more interrupts pending after this. However,
1407 * the guest will return to userspace in the course of handling this one
1408 * anyways, so we will get a chance to deliver the rest. */
1410 qemu_mutex_unlock_iothread();
1413 MemTxAttrs
kvm_arch_post_run(CPUState
*cs
, struct kvm_run
*run
)
1415 return MEMTXATTRS_UNSPECIFIED
;
1418 int kvm_arch_process_async_events(CPUState
*cs
)
1423 static int kvmppc_handle_halt(PowerPCCPU
*cpu
)
1425 CPUState
*cs
= CPU(cpu
);
1426 CPUPPCState
*env
= &cpu
->env
;
1428 if (!(cs
->interrupt_request
& CPU_INTERRUPT_HARD
) && (msr_ee
)) {
1430 cs
->exception_index
= EXCP_HLT
;
1436 /* map dcr access to existing qemu dcr emulation */
1437 static int kvmppc_handle_dcr_read(CPUPPCState
*env
, uint32_t dcrn
, uint32_t *data
)
1439 if (ppc_dcr_read(env
->dcr_env
, dcrn
, data
) < 0)
1440 fprintf(stderr
, "Read to unhandled DCR (0x%x)\n", dcrn
);
1445 static int kvmppc_handle_dcr_write(CPUPPCState
*env
, uint32_t dcrn
, uint32_t data
)
1447 if (ppc_dcr_write(env
->dcr_env
, dcrn
, data
) < 0)
1448 fprintf(stderr
, "Write to unhandled DCR (0x%x)\n", dcrn
);
1453 int kvm_arch_insert_sw_breakpoint(CPUState
*cs
, struct kvm_sw_breakpoint
*bp
)
1455 /* Mixed endian case is not handled */
1456 uint32_t sc
= debug_inst_opcode
;
1458 if (cpu_memory_rw_debug(cs
, bp
->pc
, (uint8_t *)&bp
->saved_insn
,
1460 cpu_memory_rw_debug(cs
, bp
->pc
, (uint8_t *)&sc
, sizeof(sc
), 1)) {
1467 int kvm_arch_remove_sw_breakpoint(CPUState
*cs
, struct kvm_sw_breakpoint
*bp
)
1471 if (cpu_memory_rw_debug(cs
, bp
->pc
, (uint8_t *)&sc
, sizeof(sc
), 0) ||
1472 sc
!= debug_inst_opcode
||
1473 cpu_memory_rw_debug(cs
, bp
->pc
, (uint8_t *)&bp
->saved_insn
,
1481 static int find_hw_breakpoint(target_ulong addr
, int type
)
1485 assert((nb_hw_breakpoint
+ nb_hw_watchpoint
)
1486 <= ARRAY_SIZE(hw_debug_points
));
1488 for (n
= 0; n
< nb_hw_breakpoint
+ nb_hw_watchpoint
; n
++) {
1489 if (hw_debug_points
[n
].addr
== addr
&&
1490 hw_debug_points
[n
].type
== type
) {
1498 static int find_hw_watchpoint(target_ulong addr
, int *flag
)
1502 n
= find_hw_breakpoint(addr
, GDB_WATCHPOINT_ACCESS
);
1504 *flag
= BP_MEM_ACCESS
;
1508 n
= find_hw_breakpoint(addr
, GDB_WATCHPOINT_WRITE
);
1510 *flag
= BP_MEM_WRITE
;
1514 n
= find_hw_breakpoint(addr
, GDB_WATCHPOINT_READ
);
1516 *flag
= BP_MEM_READ
;
1523 int kvm_arch_insert_hw_breakpoint(target_ulong addr
,
1524 target_ulong len
, int type
)
1526 if ((nb_hw_breakpoint
+ nb_hw_watchpoint
) >= ARRAY_SIZE(hw_debug_points
)) {
1530 hw_debug_points
[nb_hw_breakpoint
+ nb_hw_watchpoint
].addr
= addr
;
1531 hw_debug_points
[nb_hw_breakpoint
+ nb_hw_watchpoint
].type
= type
;
1534 case GDB_BREAKPOINT_HW
:
1535 if (nb_hw_breakpoint
>= max_hw_breakpoint
) {
1539 if (find_hw_breakpoint(addr
, type
) >= 0) {
1546 case GDB_WATCHPOINT_WRITE
:
1547 case GDB_WATCHPOINT_READ
:
1548 case GDB_WATCHPOINT_ACCESS
:
1549 if (nb_hw_watchpoint
>= max_hw_watchpoint
) {
1553 if (find_hw_breakpoint(addr
, type
) >= 0) {
1567 int kvm_arch_remove_hw_breakpoint(target_ulong addr
,
1568 target_ulong len
, int type
)
1572 n
= find_hw_breakpoint(addr
, type
);
1578 case GDB_BREAKPOINT_HW
:
1582 case GDB_WATCHPOINT_WRITE
:
1583 case GDB_WATCHPOINT_READ
:
1584 case GDB_WATCHPOINT_ACCESS
:
1591 hw_debug_points
[n
] = hw_debug_points
[nb_hw_breakpoint
+ nb_hw_watchpoint
];
1596 void kvm_arch_remove_all_hw_breakpoints(void)
1598 nb_hw_breakpoint
= nb_hw_watchpoint
= 0;
1601 void kvm_arch_update_guest_debug(CPUState
*cs
, struct kvm_guest_debug
*dbg
)
1605 /* Software Breakpoint updates */
1606 if (kvm_sw_breakpoints_active(cs
)) {
1607 dbg
->control
|= KVM_GUESTDBG_ENABLE
| KVM_GUESTDBG_USE_SW_BP
;
1610 assert((nb_hw_breakpoint
+ nb_hw_watchpoint
)
1611 <= ARRAY_SIZE(hw_debug_points
));
1612 assert((nb_hw_breakpoint
+ nb_hw_watchpoint
) <= ARRAY_SIZE(dbg
->arch
.bp
));
1614 if (nb_hw_breakpoint
+ nb_hw_watchpoint
> 0) {
1615 dbg
->control
|= KVM_GUESTDBG_ENABLE
| KVM_GUESTDBG_USE_HW_BP
;
1616 memset(dbg
->arch
.bp
, 0, sizeof(dbg
->arch
.bp
));
1617 for (n
= 0; n
< nb_hw_breakpoint
+ nb_hw_watchpoint
; n
++) {
1618 switch (hw_debug_points
[n
].type
) {
1619 case GDB_BREAKPOINT_HW
:
1620 dbg
->arch
.bp
[n
].type
= KVMPPC_DEBUG_BREAKPOINT
;
1622 case GDB_WATCHPOINT_WRITE
:
1623 dbg
->arch
.bp
[n
].type
= KVMPPC_DEBUG_WATCH_WRITE
;
1625 case GDB_WATCHPOINT_READ
:
1626 dbg
->arch
.bp
[n
].type
= KVMPPC_DEBUG_WATCH_READ
;
1628 case GDB_WATCHPOINT_ACCESS
:
1629 dbg
->arch
.bp
[n
].type
= KVMPPC_DEBUG_WATCH_WRITE
|
1630 KVMPPC_DEBUG_WATCH_READ
;
1633 cpu_abort(cs
, "Unsupported breakpoint type\n");
1635 dbg
->arch
.bp
[n
].addr
= hw_debug_points
[n
].addr
;
1640 static int kvm_handle_debug(PowerPCCPU
*cpu
, struct kvm_run
*run
)
1642 CPUState
*cs
= CPU(cpu
);
1643 CPUPPCState
*env
= &cpu
->env
;
1644 struct kvm_debug_exit_arch
*arch_info
= &run
->debug
.arch
;
1649 if (cs
->singlestep_enabled
) {
1651 } else if (arch_info
->status
) {
1652 if (nb_hw_breakpoint
+ nb_hw_watchpoint
> 0) {
1653 if (arch_info
->status
& KVMPPC_DEBUG_BREAKPOINT
) {
1654 n
= find_hw_breakpoint(arch_info
->address
, GDB_BREAKPOINT_HW
);
1658 } else if (arch_info
->status
& (KVMPPC_DEBUG_WATCH_READ
|
1659 KVMPPC_DEBUG_WATCH_WRITE
)) {
1660 n
= find_hw_watchpoint(arch_info
->address
, &flag
);
1663 cs
->watchpoint_hit
= &hw_watchpoint
;
1664 hw_watchpoint
.vaddr
= hw_debug_points
[n
].addr
;
1665 hw_watchpoint
.flags
= flag
;
1669 } else if (kvm_find_sw_breakpoint(cs
, arch_info
->address
)) {
1672 /* QEMU is not able to handle debug exception, so inject
1673 * program exception to guest;
1674 * Yes program exception NOT debug exception !!
1675 * When QEMU is using debug resources then debug exception must
1676 * be always set. To achieve this we set MSR_DE and also set
1677 * MSRP_DEP so guest cannot change MSR_DE.
1678 * When emulating debug resource for guest we want guest
1679 * to control MSR_DE (enable/disable debug interrupt on need).
1680 * Supporting both configurations are NOT possible.
1681 * So the result is that we cannot share debug resources
1682 * between QEMU and Guest on BOOKE architecture.
1683 * In the current design QEMU gets the priority over guest,
1684 * this means that if QEMU is using debug resources then guest
1686 * For software breakpoint QEMU uses a privileged instruction;
1687 * So there cannot be any reason that we are here for guest
1688 * set debug exception, only possibility is guest executed a
1689 * privileged / illegal instruction and that's why we are
1690 * injecting a program interrupt.
1693 cpu_synchronize_state(cs
);
1694 /* env->nip is PC, so increment this by 4 to use
1695 * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1698 cs
->exception_index
= POWERPC_EXCP_PROGRAM
;
1699 env
->error_code
= POWERPC_EXCP_INVAL
;
1700 ppc_cpu_do_interrupt(cs
);
1706 int kvm_arch_handle_exit(CPUState
*cs
, struct kvm_run
*run
)
1708 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
1709 CPUPPCState
*env
= &cpu
->env
;
1712 qemu_mutex_lock_iothread();
1714 switch (run
->exit_reason
) {
1716 if (run
->dcr
.is_write
) {
1717 DPRINTF("handle dcr write\n");
1718 ret
= kvmppc_handle_dcr_write(env
, run
->dcr
.dcrn
, run
->dcr
.data
);
1720 DPRINTF("handle dcr read\n");
1721 ret
= kvmppc_handle_dcr_read(env
, run
->dcr
.dcrn
, &run
->dcr
.data
);
1725 DPRINTF("handle halt\n");
1726 ret
= kvmppc_handle_halt(cpu
);
1728 #if defined(TARGET_PPC64)
1729 case KVM_EXIT_PAPR_HCALL
:
1730 DPRINTF("handle PAPR hypercall\n");
1731 run
->papr_hcall
.ret
= spapr_hypercall(cpu
,
1733 run
->papr_hcall
.args
);
1738 DPRINTF("handle epr\n");
1739 run
->epr
.epr
= ldl_phys(cs
->as
, env
->mpic_iack
);
1742 case KVM_EXIT_WATCHDOG
:
1743 DPRINTF("handle watchdog expiry\n");
1744 watchdog_perform_action();
1748 case KVM_EXIT_DEBUG
:
1749 DPRINTF("handle debug exception\n");
1750 if (kvm_handle_debug(cpu
, run
)) {
1754 /* re-enter, this exception was guest-internal */
1759 fprintf(stderr
, "KVM: unknown exit reason %d\n", run
->exit_reason
);
1764 qemu_mutex_unlock_iothread();
1768 int kvmppc_or_tsr_bits(PowerPCCPU
*cpu
, uint32_t tsr_bits
)
1770 CPUState
*cs
= CPU(cpu
);
1771 uint32_t bits
= tsr_bits
;
1772 struct kvm_one_reg reg
= {
1773 .id
= KVM_REG_PPC_OR_TSR
,
1774 .addr
= (uintptr_t) &bits
,
1777 return kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
1780 int kvmppc_clear_tsr_bits(PowerPCCPU
*cpu
, uint32_t tsr_bits
)
1783 CPUState
*cs
= CPU(cpu
);
1784 uint32_t bits
= tsr_bits
;
1785 struct kvm_one_reg reg
= {
1786 .id
= KVM_REG_PPC_CLEAR_TSR
,
1787 .addr
= (uintptr_t) &bits
,
1790 return kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
1793 int kvmppc_set_tcr(PowerPCCPU
*cpu
)
1795 CPUState
*cs
= CPU(cpu
);
1796 CPUPPCState
*env
= &cpu
->env
;
1797 uint32_t tcr
= env
->spr
[SPR_BOOKE_TCR
];
1799 struct kvm_one_reg reg
= {
1800 .id
= KVM_REG_PPC_TCR
,
1801 .addr
= (uintptr_t) &tcr
,
1804 return kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
1807 int kvmppc_booke_watchdog_enable(PowerPCCPU
*cpu
)
1809 CPUState
*cs
= CPU(cpu
);
1812 if (!kvm_enabled()) {
1816 if (!cap_ppc_watchdog
) {
1817 printf("warning: KVM does not support watchdog");
1821 ret
= kvm_vcpu_enable_cap(cs
, KVM_CAP_PPC_BOOKE_WATCHDOG
, 0);
1823 fprintf(stderr
, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1824 __func__
, strerror(-ret
));
1831 static int read_cpuinfo(const char *field
, char *value
, int len
)
1835 int field_len
= strlen(field
);
1838 f
= fopen("/proc/cpuinfo", "r");
1844 if (!fgets(line
, sizeof(line
), f
)) {
1847 if (!strncmp(line
, field
, field_len
)) {
1848 pstrcpy(value
, len
, line
);
1859 uint32_t kvmppc_get_tbfreq(void)
1863 uint32_t retval
= NANOSECONDS_PER_SECOND
;
1865 if (read_cpuinfo("timebase", line
, sizeof(line
))) {
1869 if (!(ns
= strchr(line
, ':'))) {
1878 bool kvmppc_get_host_serial(char **value
)
1880 return g_file_get_contents("/proc/device-tree/system-id", value
, NULL
,
1884 bool kvmppc_get_host_model(char **value
)
1886 return g_file_get_contents("/proc/device-tree/model", value
, NULL
, NULL
);
1889 /* Try to find a device tree node for a CPU with clock-frequency property */
1890 static int kvmppc_find_cpu_dt(char *buf
, int buf_len
)
1892 struct dirent
*dirp
;
1895 if ((dp
= opendir(PROC_DEVTREE_CPU
)) == NULL
) {
1896 printf("Can't open directory " PROC_DEVTREE_CPU
"\n");
1901 while ((dirp
= readdir(dp
)) != NULL
) {
1903 snprintf(buf
, buf_len
, "%s%s/clock-frequency", PROC_DEVTREE_CPU
,
1905 f
= fopen(buf
, "r");
1907 snprintf(buf
, buf_len
, "%s%s", PROC_DEVTREE_CPU
, dirp
->d_name
);
1914 if (buf
[0] == '\0') {
1915 printf("Unknown host!\n");
1922 static uint64_t kvmppc_read_int_dt(const char *filename
)
1931 f
= fopen(filename
, "rb");
1936 len
= fread(&u
, 1, sizeof(u
), f
);
1940 /* property is a 32-bit quantity */
1941 return be32_to_cpu(u
.v32
);
1943 return be64_to_cpu(u
.v64
);
1949 /* Read a CPU node property from the host device tree that's a single
1950 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1951 * (can't find or open the property, or doesn't understand the
1953 static uint64_t kvmppc_read_int_cpu_dt(const char *propname
)
1955 char buf
[PATH_MAX
], *tmp
;
1958 if (kvmppc_find_cpu_dt(buf
, sizeof(buf
))) {
1962 tmp
= g_strdup_printf("%s/%s", buf
, propname
);
1963 val
= kvmppc_read_int_dt(tmp
);
1969 uint64_t kvmppc_get_clockfreq(void)
1971 return kvmppc_read_int_cpu_dt("clock-frequency");
1974 uint32_t kvmppc_get_vmx(void)
1976 return kvmppc_read_int_cpu_dt("ibm,vmx");
1979 uint32_t kvmppc_get_dfp(void)
1981 return kvmppc_read_int_cpu_dt("ibm,dfp");
1984 static int kvmppc_get_pvinfo(CPUPPCState
*env
, struct kvm_ppc_pvinfo
*pvinfo
)
1986 PowerPCCPU
*cpu
= ppc_env_get_cpu(env
);
1987 CPUState
*cs
= CPU(cpu
);
1989 if (kvm_vm_check_extension(cs
->kvm_state
, KVM_CAP_PPC_GET_PVINFO
) &&
1990 !kvm_vm_ioctl(cs
->kvm_state
, KVM_PPC_GET_PVINFO
, pvinfo
)) {
1997 int kvmppc_get_hasidle(CPUPPCState
*env
)
1999 struct kvm_ppc_pvinfo pvinfo
;
2001 if (!kvmppc_get_pvinfo(env
, &pvinfo
) &&
2002 (pvinfo
.flags
& KVM_PPC_PVINFO_FLAGS_EV_IDLE
)) {
2009 int kvmppc_get_hypercall(CPUPPCState
*env
, uint8_t *buf
, int buf_len
)
2011 uint32_t *hc
= (uint32_t*)buf
;
2012 struct kvm_ppc_pvinfo pvinfo
;
2014 if (!kvmppc_get_pvinfo(env
, &pvinfo
)) {
2015 memcpy(buf
, pvinfo
.hcall
, buf_len
);
2020 * Fallback to always fail hypercalls regardless of endianness:
2022 * tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
2024 * b .+8 (becomes nop in wrong endian)
2025 * bswap32(li r3, -1)
2028 hc
[0] = cpu_to_be32(0x08000048);
2029 hc
[1] = cpu_to_be32(0x3860ffff);
2030 hc
[2] = cpu_to_be32(0x48000008);
2031 hc
[3] = cpu_to_be32(bswap32(0x3860ffff));
2036 static inline int kvmppc_enable_hcall(KVMState
*s
, target_ulong hcall
)
2038 return kvm_vm_enable_cap(s
, KVM_CAP_PPC_ENABLE_HCALL
, 0, hcall
, 1);
2041 void kvmppc_enable_logical_ci_hcalls(void)
2044 * FIXME: it would be nice if we could detect the cases where
2045 * we're using a device which requires the in kernel
2046 * implementation of these hcalls, but the kernel lacks them and
2047 * produce a warning.
2049 kvmppc_enable_hcall(kvm_state
, H_LOGICAL_CI_LOAD
);
2050 kvmppc_enable_hcall(kvm_state
, H_LOGICAL_CI_STORE
);
2053 void kvmppc_enable_set_mode_hcall(void)
2055 kvmppc_enable_hcall(kvm_state
, H_SET_MODE
);
2058 void kvmppc_set_papr(PowerPCCPU
*cpu
)
2060 CPUState
*cs
= CPU(cpu
);
2063 ret
= kvm_vcpu_enable_cap(cs
, KVM_CAP_PPC_PAPR
, 0);
2065 error_report("This vCPU type or KVM version does not support PAPR");
2069 /* Update the capability flag so we sync the right information
2074 int kvmppc_set_compat(PowerPCCPU
*cpu
, uint32_t cpu_version
)
2076 return kvm_set_one_reg(CPU(cpu
), KVM_REG_PPC_ARCH_COMPAT
, &cpu_version
);
2079 void kvmppc_set_mpic_proxy(PowerPCCPU
*cpu
, int mpic_proxy
)
2081 CPUState
*cs
= CPU(cpu
);
2084 ret
= kvm_vcpu_enable_cap(cs
, KVM_CAP_PPC_EPR
, 0, mpic_proxy
);
2085 if (ret
&& mpic_proxy
) {
2086 error_report("This KVM version does not support EPR");
2091 int kvmppc_smt_threads(void)
2093 return cap_ppc_smt
? cap_ppc_smt
: 1;
2097 off_t
kvmppc_alloc_rma(void **rma
)
2101 struct kvm_allocate_rma ret
;
2103 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2104 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2105 * not necessary on this hardware
2106 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2108 * FIXME: We should allow the user to force contiguous RMA
2109 * allocation in the cap_ppc_rma==1 case.
2111 if (cap_ppc_rma
< 2) {
2115 fd
= kvm_vm_ioctl(kvm_state
, KVM_ALLOCATE_RMA
, &ret
);
2117 fprintf(stderr
, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2122 size
= MIN(ret
.rma_size
, 256ul << 20);
2124 *rma
= mmap(NULL
, size
, PROT_READ
|PROT_WRITE
, MAP_SHARED
, fd
, 0);
2125 if (*rma
== MAP_FAILED
) {
2126 fprintf(stderr
, "KVM: Error mapping RMA: %s\n", strerror(errno
));
2133 uint64_t kvmppc_rma_size(uint64_t current_size
, unsigned int hash_shift
)
2135 struct kvm_ppc_smmu_info info
;
2136 long rampagesize
, best_page_shift
;
2139 if (cap_ppc_rma
>= 2) {
2140 return current_size
;
2143 /* Find the largest hardware supported page size that's less than
2144 * or equal to the (logical) backing page size of guest RAM */
2145 kvm_get_smmu_info(POWERPC_CPU(first_cpu
), &info
);
2146 rampagesize
= getrampagesize();
2147 best_page_shift
= 0;
2149 for (i
= 0; i
< KVM_PPC_PAGE_SIZES_MAX_SZ
; i
++) {
2150 struct kvm_ppc_one_seg_page_size
*sps
= &info
.sps
[i
];
2152 if (!sps
->page_shift
) {
2156 if ((sps
->page_shift
> best_page_shift
)
2157 && ((1UL << sps
->page_shift
) <= rampagesize
)) {
2158 best_page_shift
= sps
->page_shift
;
2162 return MIN(current_size
,
2163 1ULL << (best_page_shift
+ hash_shift
- 7));
2167 bool kvmppc_spapr_use_multitce(void)
2169 return cap_spapr_multitce
;
2172 void *kvmppc_create_spapr_tce(uint32_t liobn
, uint32_t window_size
, int *pfd
,
2175 struct kvm_create_spapr_tce args
= {
2177 .window_size
= window_size
,
2183 /* Must set fd to -1 so we don't try to munmap when called for
2184 * destroying the table, which the upper layers -will- do
2187 if (!cap_spapr_tce
|| (need_vfio
&& !cap_spapr_vfio
)) {
2191 fd
= kvm_vm_ioctl(kvm_state
, KVM_CREATE_SPAPR_TCE
, &args
);
2193 fprintf(stderr
, "KVM: Failed to create TCE table for liobn 0x%x\n",
2198 len
= (window_size
/ SPAPR_TCE_PAGE_SIZE
) * sizeof(uint64_t);
2199 /* FIXME: round this up to page size */
2201 table
= mmap(NULL
, len
, PROT_READ
|PROT_WRITE
, MAP_SHARED
, fd
, 0);
2202 if (table
== MAP_FAILED
) {
2203 fprintf(stderr
, "KVM: Failed to map TCE table for liobn 0x%x\n",
2213 int kvmppc_remove_spapr_tce(void *table
, int fd
, uint32_t nb_table
)
2221 len
= nb_table
* sizeof(uint64_t);
2222 if ((munmap(table
, len
) < 0) ||
2224 fprintf(stderr
, "KVM: Unexpected error removing TCE table: %s",
2226 /* Leak the table */
2232 int kvmppc_reset_htab(int shift_hint
)
2234 uint32_t shift
= shift_hint
;
2236 if (!kvm_enabled()) {
2237 /* Full emulation, tell caller to allocate htab itself */
2240 if (kvm_check_extension(kvm_state
, KVM_CAP_PPC_ALLOC_HTAB
)) {
2242 ret
= kvm_vm_ioctl(kvm_state
, KVM_PPC_ALLOCATE_HTAB
, &shift
);
2243 if (ret
== -ENOTTY
) {
2244 /* At least some versions of PR KVM advertise the
2245 * capability, but don't implement the ioctl(). Oops.
2246 * Return 0 so that we allocate the htab in qemu, as is
2247 * correct for PR. */
2249 } else if (ret
< 0) {
2255 /* We have a kernel that predates the htab reset calls. For PR
2256 * KVM, we need to allocate the htab ourselves, for an HV KVM of
2257 * this era, it has allocated a 16MB fixed size hash table
2258 * already. Kernels of this era have the GET_PVINFO capability
2259 * only on PR, so we use this hack to determine the right
2261 if (kvm_check_extension(kvm_state
, KVM_CAP_PPC_GET_PVINFO
)) {
2262 /* PR - tell caller to allocate htab */
2265 /* HV - assume 16MB kernel allocated htab */
2270 static inline uint32_t mfpvr(void)
2279 static void alter_insns(uint64_t *word
, uint64_t flags
, bool on
)
2288 static void kvmppc_host_cpu_initfn(Object
*obj
)
2290 assert(kvm_enabled());
2293 static void kvmppc_host_cpu_class_init(ObjectClass
*oc
, void *data
)
2295 DeviceClass
*dc
= DEVICE_CLASS(oc
);
2296 PowerPCCPUClass
*pcc
= POWERPC_CPU_CLASS(oc
);
2297 uint32_t vmx
= kvmppc_get_vmx();
2298 uint32_t dfp
= kvmppc_get_dfp();
2299 uint32_t dcache_size
= kvmppc_read_int_cpu_dt("d-cache-size");
2300 uint32_t icache_size
= kvmppc_read_int_cpu_dt("i-cache-size");
2302 /* Now fix up the class with information we can query from the host */
2306 /* Only override when we know what the host supports */
2307 alter_insns(&pcc
->insns_flags
, PPC_ALTIVEC
, vmx
> 0);
2308 alter_insns(&pcc
->insns_flags2
, PPC2_VSX
, vmx
> 1);
2311 /* Only override when we know what the host supports */
2312 alter_insns(&pcc
->insns_flags2
, PPC2_DFP
, dfp
);
2315 if (dcache_size
!= -1) {
2316 pcc
->l1_dcache_size
= dcache_size
;
2319 if (icache_size
!= -1) {
2320 pcc
->l1_icache_size
= icache_size
;
2323 /* Reason: kvmppc_host_cpu_initfn() dies when !kvm_enabled() */
2324 dc
->cannot_destroy_with_object_finalize_yet
= true;
2327 bool kvmppc_has_cap_epr(void)
2332 bool kvmppc_has_cap_htab_fd(void)
2337 bool kvmppc_has_cap_fixup_hcalls(void)
2339 return cap_fixup_hcalls
;
2342 static PowerPCCPUClass
*ppc_cpu_get_family_class(PowerPCCPUClass
*pcc
)
2344 ObjectClass
*oc
= OBJECT_CLASS(pcc
);
2346 while (oc
&& !object_class_is_abstract(oc
)) {
2347 oc
= object_class_get_parent(oc
);
2351 return POWERPC_CPU_CLASS(oc
);
2354 PowerPCCPUClass
*kvm_ppc_get_host_cpu_class(void)
2356 uint32_t host_pvr
= mfpvr();
2357 PowerPCCPUClass
*pvr_pcc
;
2359 pvr_pcc
= ppc_cpu_class_by_pvr(host_pvr
);
2360 if (pvr_pcc
== NULL
) {
2361 pvr_pcc
= ppc_cpu_class_by_pvr_mask(host_pvr
);
2367 #if defined(TARGET_PPC64)
2368 static void spapr_cpu_core_host_initfn(Object
*obj
)
2370 sPAPRCPUCore
*core
= SPAPR_CPU_CORE(obj
);
2371 char *name
= g_strdup_printf("%s-" TYPE_POWERPC_CPU
, "host");
2372 ObjectClass
*oc
= object_class_by_name(name
);
2375 g_free((void *)name
);
2376 core
->cpu_class
= oc
;
2380 static int kvm_ppc_register_host_cpu_type(void)
2382 TypeInfo type_info
= {
2383 .name
= TYPE_HOST_POWERPC_CPU
,
2384 .instance_init
= kvmppc_host_cpu_initfn
,
2385 .class_init
= kvmppc_host_cpu_class_init
,
2387 PowerPCCPUClass
*pvr_pcc
;
2390 pvr_pcc
= kvm_ppc_get_host_cpu_class();
2391 if (pvr_pcc
== NULL
) {
2394 type_info
.parent
= object_class_get_name(OBJECT_CLASS(pvr_pcc
));
2395 type_register(&type_info
);
2397 #if defined(TARGET_PPC64)
2398 type_info
.name
= g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE
, "host");
2399 type_info
.parent
= TYPE_SPAPR_CPU_CORE
,
2400 type_info
.instance_size
= sizeof(sPAPRCPUCore
),
2401 type_info
.instance_init
= spapr_cpu_core_host_initfn
,
2402 type_info
.class_init
= NULL
;
2403 type_register(&type_info
);
2404 g_free((void *)type_info
.name
);
2405 type_info
.instance_size
= 0;
2406 type_info
.instance_init
= NULL
;
2409 /* Register generic family CPU class for a family */
2410 pvr_pcc
= ppc_cpu_get_family_class(pvr_pcc
);
2411 dc
= DEVICE_CLASS(pvr_pcc
);
2412 type_info
.parent
= object_class_get_name(OBJECT_CLASS(pvr_pcc
));
2413 type_info
.name
= g_strdup_printf("%s-"TYPE_POWERPC_CPU
, dc
->desc
);
2414 type_register(&type_info
);
2419 int kvmppc_define_rtas_kernel_token(uint32_t token
, const char *function
)
2421 struct kvm_rtas_token_args args
= {
2425 if (!kvm_check_extension(kvm_state
, KVM_CAP_PPC_RTAS
)) {
2429 strncpy(args
.name
, function
, sizeof(args
.name
));
2431 return kvm_vm_ioctl(kvm_state
, KVM_PPC_RTAS_DEFINE_TOKEN
, &args
);
2434 int kvmppc_get_htab_fd(bool write
)
2436 struct kvm_get_htab_fd s
= {
2437 .flags
= write
? KVM_GET_HTAB_WRITE
: 0,
2442 fprintf(stderr
, "KVM version doesn't support saving the hash table\n");
2446 return kvm_vm_ioctl(kvm_state
, KVM_PPC_GET_HTAB_FD
, &s
);
2449 int kvmppc_save_htab(QEMUFile
*f
, int fd
, size_t bufsize
, int64_t max_ns
)
2451 int64_t starttime
= qemu_clock_get_ns(QEMU_CLOCK_REALTIME
);
2452 uint8_t buf
[bufsize
];
2456 rc
= read(fd
, buf
, bufsize
);
2458 fprintf(stderr
, "Error reading data from KVM HTAB fd: %s\n",
2462 uint8_t *buffer
= buf
;
2465 struct kvm_get_htab_header
*head
=
2466 (struct kvm_get_htab_header
*) buffer
;
2467 size_t chunksize
= sizeof(*head
) +
2468 HASH_PTE_SIZE_64
* head
->n_valid
;
2470 qemu_put_be32(f
, head
->index
);
2471 qemu_put_be16(f
, head
->n_valid
);
2472 qemu_put_be16(f
, head
->n_invalid
);
2473 qemu_put_buffer(f
, (void *)(head
+ 1),
2474 HASH_PTE_SIZE_64
* head
->n_valid
);
2476 buffer
+= chunksize
;
2482 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME
) - starttime
) < max_ns
)));
2484 return (rc
== 0) ? 1 : 0;
2487 int kvmppc_load_htab_chunk(QEMUFile
*f
, int fd
, uint32_t index
,
2488 uint16_t n_valid
, uint16_t n_invalid
)
2490 struct kvm_get_htab_header
*buf
;
2491 size_t chunksize
= sizeof(*buf
) + n_valid
*HASH_PTE_SIZE_64
;
2494 buf
= alloca(chunksize
);
2496 buf
->n_valid
= n_valid
;
2497 buf
->n_invalid
= n_invalid
;
2499 qemu_get_buffer(f
, (void *)(buf
+ 1), HASH_PTE_SIZE_64
*n_valid
);
2501 rc
= write(fd
, buf
, chunksize
);
2503 fprintf(stderr
, "Error writing KVM hash table: %s\n",
2507 if (rc
!= chunksize
) {
2508 /* We should never get a short write on a single chunk */
2509 fprintf(stderr
, "Short write, restoring KVM hash table\n");
2515 bool kvm_arch_stop_on_emulation_error(CPUState
*cpu
)
2520 int kvm_arch_on_sigbus_vcpu(CPUState
*cpu
, int code
, void *addr
)
2525 int kvm_arch_on_sigbus(int code
, void *addr
)
2530 void kvm_arch_init_irq_routing(KVMState
*s
)
2534 struct kvm_get_htab_buf
{
2535 struct kvm_get_htab_header header
;
2537 * We require one extra byte for read
2539 target_ulong hpte
[(HPTES_PER_GROUP
* 2) + 1];
2542 uint64_t kvmppc_hash64_read_pteg(PowerPCCPU
*cpu
, target_ulong pte_index
)
2545 struct kvm_get_htab_fd ghf
;
2546 struct kvm_get_htab_buf
*hpte_buf
;
2549 ghf
.start_index
= pte_index
;
2550 htab_fd
= kvm_vm_ioctl(kvm_state
, KVM_PPC_GET_HTAB_FD
, &ghf
);
2555 hpte_buf
= g_malloc0(sizeof(*hpte_buf
));
2557 * Read the hpte group
2559 if (read(htab_fd
, hpte_buf
, sizeof(*hpte_buf
)) < 0) {
2564 return (uint64_t)(uintptr_t) hpte_buf
->hpte
;
2573 void kvmppc_hash64_free_pteg(uint64_t token
)
2575 struct kvm_get_htab_buf
*htab_buf
;
2577 htab_buf
= container_of((void *)(uintptr_t) token
, struct kvm_get_htab_buf
,
2583 void kvmppc_hash64_write_pte(CPUPPCState
*env
, target_ulong pte_index
,
2584 target_ulong pte0
, target_ulong pte1
)
2587 struct kvm_get_htab_fd ghf
;
2588 struct kvm_get_htab_buf hpte_buf
;
2591 ghf
.start_index
= 0; /* Ignored */
2592 htab_fd
= kvm_vm_ioctl(kvm_state
, KVM_PPC_GET_HTAB_FD
, &ghf
);
2597 hpte_buf
.header
.n_valid
= 1;
2598 hpte_buf
.header
.n_invalid
= 0;
2599 hpte_buf
.header
.index
= pte_index
;
2600 hpte_buf
.hpte
[0] = pte0
;
2601 hpte_buf
.hpte
[1] = pte1
;
2603 * Write the hpte entry.
2604 * CAUTION: write() has the warn_unused_result attribute. Hence we
2605 * need to check the return value, even though we do nothing.
2607 if (write(htab_fd
, &hpte_buf
, sizeof(hpte_buf
)) < 0) {
2619 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry
*route
,
2620 uint64_t address
, uint32_t data
, PCIDevice
*dev
)
2625 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry
*route
,
2626 int vector
, PCIDevice
*dev
)
2631 int kvm_arch_release_virq_post(int virq
)
2636 int kvm_arch_msi_data_to_gsi(uint32_t data
)
2638 return data
& 0xffff;
2641 int kvmppc_enable_hwrng(void)
2643 if (!kvm_enabled() || !kvm_check_extension(kvm_state
, KVM_CAP_PPC_HWRNG
)) {
2647 return kvmppc_enable_hcall(kvm_state
, H_RANDOM
);