2 * PowerPC implementation of KVM hooks
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
17 #include "qemu/osdep.h"
19 #include <sys/ioctl.h>
22 #include <linux/kvm.h>
24 #include "qemu-common.h"
25 #include "qemu/error-report.h"
27 #include "qemu/timer.h"
28 #include "sysemu/sysemu.h"
29 #include "sysemu/kvm.h"
30 #include "sysemu/numa.h"
32 #include "sysemu/cpus.h"
33 #include "sysemu/device_tree.h"
34 #include "mmu-hash64.h"
36 #include "hw/sysbus.h"
37 #include "hw/ppc/spapr.h"
38 #include "hw/ppc/spapr_vio.h"
39 #include "hw/ppc/spapr_cpu_core.h"
40 #include "hw/ppc/ppc.h"
41 #include "sysemu/watchdog.h"
43 #include "exec/gdbstub.h"
44 #include "exec/memattrs.h"
45 #include "sysemu/hostmem.h"
46 #include "qemu/cutils.h"
47 #if defined(TARGET_PPC64)
48 #include "hw/ppc/spapr_cpu_core.h"
54 #define DPRINTF(fmt, ...) \
55 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
57 #define DPRINTF(fmt, ...) \
61 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
63 const KVMCapabilityInfo kvm_arch_required_capabilities
[] = {
67 static int cap_interrupt_unset
= false;
68 static int cap_interrupt_level
= false;
69 static int cap_segstate
;
70 static int cap_booke_sregs
;
71 static int cap_ppc_smt
;
72 static int cap_ppc_rma
;
73 static int cap_spapr_tce
;
74 static int cap_spapr_multitce
;
75 static int cap_spapr_vfio
;
77 static int cap_one_reg
;
79 static int cap_ppc_watchdog
;
81 static int cap_htab_fd
;
82 static int cap_fixup_hcalls
;
84 static uint32_t debug_inst_opcode
;
86 /* XXX We have a race condition where we actually have a level triggered
87 * interrupt, but the infrastructure can't expose that yet, so the guest
88 * takes but ignores it, goes to sleep and never gets notified that there's
89 * still an interrupt pending.
91 * As a quick workaround, let's just wake up again 20 ms after we injected
92 * an interrupt. That way we can assure that we're always reinjecting
93 * interrupts in case the guest swallowed them.
95 static QEMUTimer
*idle_timer
;
97 static void kvm_kick_cpu(void *opaque
)
99 PowerPCCPU
*cpu
= opaque
;
101 qemu_cpu_kick(CPU(cpu
));
104 static int kvm_ppc_register_host_cpu_type(void);
106 int kvm_arch_init(MachineState
*ms
, KVMState
*s
)
108 cap_interrupt_unset
= kvm_check_extension(s
, KVM_CAP_PPC_UNSET_IRQ
);
109 cap_interrupt_level
= kvm_check_extension(s
, KVM_CAP_PPC_IRQ_LEVEL
);
110 cap_segstate
= kvm_check_extension(s
, KVM_CAP_PPC_SEGSTATE
);
111 cap_booke_sregs
= kvm_check_extension(s
, KVM_CAP_PPC_BOOKE_SREGS
);
112 cap_ppc_smt
= kvm_check_extension(s
, KVM_CAP_PPC_SMT
);
113 cap_ppc_rma
= kvm_check_extension(s
, KVM_CAP_PPC_RMA
);
114 cap_spapr_tce
= kvm_check_extension(s
, KVM_CAP_SPAPR_TCE
);
115 cap_spapr_multitce
= kvm_check_extension(s
, KVM_CAP_SPAPR_MULTITCE
);
116 cap_spapr_vfio
= false;
117 cap_one_reg
= kvm_check_extension(s
, KVM_CAP_ONE_REG
);
118 cap_hior
= kvm_check_extension(s
, KVM_CAP_PPC_HIOR
);
119 cap_epr
= kvm_check_extension(s
, KVM_CAP_PPC_EPR
);
120 cap_ppc_watchdog
= kvm_check_extension(s
, KVM_CAP_PPC_BOOKE_WATCHDOG
);
121 /* Note: we don't set cap_papr here, because this capability is
122 * only activated after this by kvmppc_set_papr() */
123 cap_htab_fd
= kvm_check_extension(s
, KVM_CAP_PPC_HTAB_FD
);
124 cap_fixup_hcalls
= kvm_check_extension(s
, KVM_CAP_PPC_FIXUP_HCALL
);
126 if (!cap_interrupt_level
) {
127 fprintf(stderr
, "KVM: Couldn't find level irq capability. Expect the "
128 "VM to stall at times!\n");
131 kvm_ppc_register_host_cpu_type();
136 static int kvm_arch_sync_sregs(PowerPCCPU
*cpu
)
138 CPUPPCState
*cenv
= &cpu
->env
;
139 CPUState
*cs
= CPU(cpu
);
140 struct kvm_sregs sregs
;
143 if (cenv
->excp_model
== POWERPC_EXCP_BOOKE
) {
144 /* What we're really trying to say is "if we're on BookE, we use
145 the native PVR for now". This is the only sane way to check
146 it though, so we potentially confuse users that they can run
147 BookE guests on BookS. Let's hope nobody dares enough :) */
151 fprintf(stderr
, "kvm error: missing PVR setting capability\n");
156 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_SREGS
, &sregs
);
161 sregs
.pvr
= cenv
->spr
[SPR_PVR
];
162 return kvm_vcpu_ioctl(cs
, KVM_SET_SREGS
, &sregs
);
165 /* Set up a shared TLB array with KVM */
166 static int kvm_booke206_tlb_init(PowerPCCPU
*cpu
)
168 CPUPPCState
*env
= &cpu
->env
;
169 CPUState
*cs
= CPU(cpu
);
170 struct kvm_book3e_206_tlb_params params
= {};
171 struct kvm_config_tlb cfg
= {};
172 unsigned int entries
= 0;
175 if (!kvm_enabled() ||
176 !kvm_check_extension(cs
->kvm_state
, KVM_CAP_SW_TLB
)) {
180 assert(ARRAY_SIZE(params
.tlb_sizes
) == BOOKE206_MAX_TLBN
);
182 for (i
= 0; i
< BOOKE206_MAX_TLBN
; i
++) {
183 params
.tlb_sizes
[i
] = booke206_tlb_size(env
, i
);
184 params
.tlb_ways
[i
] = booke206_tlb_ways(env
, i
);
185 entries
+= params
.tlb_sizes
[i
];
188 assert(entries
== env
->nb_tlb
);
189 assert(sizeof(struct kvm_book3e_206_tlb_entry
) == sizeof(ppcmas_tlb_t
));
191 env
->tlb_dirty
= true;
193 cfg
.array
= (uintptr_t)env
->tlb
.tlbm
;
194 cfg
.array_len
= sizeof(ppcmas_tlb_t
) * entries
;
195 cfg
.params
= (uintptr_t)¶ms
;
196 cfg
.mmu_type
= KVM_MMU_FSL_BOOKE_NOHV
;
198 ret
= kvm_vcpu_enable_cap(cs
, KVM_CAP_SW_TLB
, 0, (uintptr_t)&cfg
);
200 fprintf(stderr
, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
201 __func__
, strerror(-ret
));
205 env
->kvm_sw_tlb
= true;
210 #if defined(TARGET_PPC64)
211 static void kvm_get_fallback_smmu_info(PowerPCCPU
*cpu
,
212 struct kvm_ppc_smmu_info
*info
)
214 CPUPPCState
*env
= &cpu
->env
;
215 CPUState
*cs
= CPU(cpu
);
217 memset(info
, 0, sizeof(*info
));
219 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
220 * need to "guess" what the supported page sizes are.
222 * For that to work we make a few assumptions:
224 * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
225 * KVM which only supports 4K and 16M pages, but supports them
226 * regardless of the backing store characteritics. We also don't
227 * support 1T segments.
229 * This is safe as if HV KVM ever supports that capability or PR
230 * KVM grows supports for more page/segment sizes, those versions
231 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
232 * will not hit this fallback
234 * - Else we are running HV KVM. This means we only support page
235 * sizes that fit in the backing store. Additionally we only
236 * advertize 64K pages if the processor is ARCH 2.06 and we assume
237 * P7 encodings for the SLB and hash table. Here too, we assume
238 * support for any newer processor will mean a kernel that
239 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
242 if (kvm_check_extension(cs
->kvm_state
, KVM_CAP_PPC_GET_PVINFO
)) {
247 /* Standard 4k base page size segment */
248 info
->sps
[0].page_shift
= 12;
249 info
->sps
[0].slb_enc
= 0;
250 info
->sps
[0].enc
[0].page_shift
= 12;
251 info
->sps
[0].enc
[0].pte_enc
= 0;
253 /* Standard 16M large page size segment */
254 info
->sps
[1].page_shift
= 24;
255 info
->sps
[1].slb_enc
= SLB_VSID_L
;
256 info
->sps
[1].enc
[0].page_shift
= 24;
257 info
->sps
[1].enc
[0].pte_enc
= 0;
261 /* HV KVM has backing store size restrictions */
262 info
->flags
= KVM_PPC_PAGE_SIZES_REAL
;
264 if (env
->mmu_model
& POWERPC_MMU_1TSEG
) {
265 info
->flags
|= KVM_PPC_1T_SEGMENTS
;
268 if (env
->mmu_model
== POWERPC_MMU_2_06
||
269 env
->mmu_model
== POWERPC_MMU_2_07
) {
275 /* Standard 4k base page size segment */
276 info
->sps
[i
].page_shift
= 12;
277 info
->sps
[i
].slb_enc
= 0;
278 info
->sps
[i
].enc
[0].page_shift
= 12;
279 info
->sps
[i
].enc
[0].pte_enc
= 0;
282 /* 64K on MMU 2.06 and later */
283 if (env
->mmu_model
== POWERPC_MMU_2_06
||
284 env
->mmu_model
== POWERPC_MMU_2_07
) {
285 info
->sps
[i
].page_shift
= 16;
286 info
->sps
[i
].slb_enc
= 0x110;
287 info
->sps
[i
].enc
[0].page_shift
= 16;
288 info
->sps
[i
].enc
[0].pte_enc
= 1;
292 /* Standard 16M large page size segment */
293 info
->sps
[i
].page_shift
= 24;
294 info
->sps
[i
].slb_enc
= SLB_VSID_L
;
295 info
->sps
[i
].enc
[0].page_shift
= 24;
296 info
->sps
[i
].enc
[0].pte_enc
= 0;
300 static void kvm_get_smmu_info(PowerPCCPU
*cpu
, struct kvm_ppc_smmu_info
*info
)
302 CPUState
*cs
= CPU(cpu
);
305 if (kvm_check_extension(cs
->kvm_state
, KVM_CAP_PPC_GET_SMMU_INFO
)) {
306 ret
= kvm_vm_ioctl(cs
->kvm_state
, KVM_PPC_GET_SMMU_INFO
, info
);
312 kvm_get_fallback_smmu_info(cpu
, info
);
315 static long gethugepagesize(const char *mem_path
)
321 ret
= statfs(mem_path
, &fs
);
322 } while (ret
!= 0 && errno
== EINTR
);
325 fprintf(stderr
, "Couldn't statfs() memory path: %s\n",
330 #define HUGETLBFS_MAGIC 0x958458f6
332 if (fs
.f_type
!= HUGETLBFS_MAGIC
) {
333 /* Explicit mempath, but it's ordinary pages */
334 return getpagesize();
337 /* It's hugepage, return the huge page size */
342 * FIXME TOCTTOU: this iterates over memory backends' mem-path, which
343 * may or may not name the same files / on the same filesystem now as
344 * when we actually open and map them. Iterate over the file
345 * descriptors instead, and use qemu_fd_getpagesize().
347 static int find_max_supported_pagesize(Object
*obj
, void *opaque
)
350 long *hpsize_min
= opaque
;
352 if (object_dynamic_cast(obj
, TYPE_MEMORY_BACKEND
)) {
353 mem_path
= object_property_get_str(obj
, "mem-path", NULL
);
355 long hpsize
= gethugepagesize(mem_path
);
356 if (hpsize
< *hpsize_min
) {
357 *hpsize_min
= hpsize
;
360 *hpsize_min
= getpagesize();
367 static long getrampagesize(void)
369 long hpsize
= LONG_MAX
;
370 long mainrampagesize
;
374 mainrampagesize
= gethugepagesize(mem_path
);
376 mainrampagesize
= getpagesize();
379 /* it's possible we have memory-backend objects with
380 * hugepage-backed RAM. these may get mapped into system
381 * address space via -numa parameters or memory hotplug
382 * hooks. we want to take these into account, but we
383 * also want to make sure these supported hugepage
384 * sizes are applicable across the entire range of memory
385 * we may boot from, so we take the min across all
386 * backends, and assume normal pages in cases where a
387 * backend isn't backed by hugepages.
389 memdev_root
= object_resolve_path("/objects", NULL
);
391 object_child_foreach(memdev_root
, find_max_supported_pagesize
, &hpsize
);
393 if (hpsize
== LONG_MAX
) {
394 /* No additional memory regions found ==> Report main RAM page size */
395 return mainrampagesize
;
398 /* If NUMA is disabled or the NUMA nodes are not backed with a
399 * memory-backend, then there is at least one node using "normal" RAM,
400 * so if its page size is smaller we have got to report that size instead.
402 if (hpsize
> mainrampagesize
&&
403 (nb_numa_nodes
== 0 || numa_info
[0].node_memdev
== NULL
)) {
406 error_report("Huge page support disabled (n/a for main memory).");
409 return mainrampagesize
;
415 static bool kvm_valid_page_size(uint32_t flags
, long rampgsize
, uint32_t shift
)
417 if (!(flags
& KVM_PPC_PAGE_SIZES_REAL
)) {
421 return (1ul << shift
) <= rampgsize
;
424 static void kvm_fixup_page_sizes(PowerPCCPU
*cpu
)
426 static struct kvm_ppc_smmu_info smmu_info
;
427 static bool has_smmu_info
;
428 CPUPPCState
*env
= &cpu
->env
;
431 bool has_64k_pages
= false;
433 /* We only handle page sizes for 64-bit server guests for now */
434 if (!(env
->mmu_model
& POWERPC_MMU_64
)) {
438 /* Collect MMU info from kernel if not already */
439 if (!has_smmu_info
) {
440 kvm_get_smmu_info(cpu
, &smmu_info
);
441 has_smmu_info
= true;
444 rampagesize
= getrampagesize();
446 /* Convert to QEMU form */
447 memset(&env
->sps
, 0, sizeof(env
->sps
));
449 /* If we have HV KVM, we need to forbid CI large pages if our
450 * host page size is smaller than 64K.
452 if (smmu_info
.flags
& KVM_PPC_PAGE_SIZES_REAL
) {
453 env
->ci_large_pages
= getpagesize() >= 0x10000;
457 * XXX This loop should be an entry wide AND of the capabilities that
458 * the selected CPU has with the capabilities that KVM supports.
460 for (ik
= iq
= 0; ik
< KVM_PPC_PAGE_SIZES_MAX_SZ
; ik
++) {
461 struct ppc_one_seg_page_size
*qsps
= &env
->sps
.sps
[iq
];
462 struct kvm_ppc_one_seg_page_size
*ksps
= &smmu_info
.sps
[ik
];
464 if (!kvm_valid_page_size(smmu_info
.flags
, rampagesize
,
468 qsps
->page_shift
= ksps
->page_shift
;
469 qsps
->slb_enc
= ksps
->slb_enc
;
470 for (jk
= jq
= 0; jk
< KVM_PPC_PAGE_SIZES_MAX_SZ
; jk
++) {
471 if (!kvm_valid_page_size(smmu_info
.flags
, rampagesize
,
472 ksps
->enc
[jk
].page_shift
)) {
475 if (ksps
->enc
[jk
].page_shift
== 16) {
476 has_64k_pages
= true;
478 qsps
->enc
[jq
].page_shift
= ksps
->enc
[jk
].page_shift
;
479 qsps
->enc
[jq
].pte_enc
= ksps
->enc
[jk
].pte_enc
;
480 if (++jq
>= PPC_PAGE_SIZES_MAX_SZ
) {
484 if (++iq
>= PPC_PAGE_SIZES_MAX_SZ
) {
488 env
->slb_nr
= smmu_info
.slb_size
;
489 if (!(smmu_info
.flags
& KVM_PPC_1T_SEGMENTS
)) {
490 env
->mmu_model
&= ~POWERPC_MMU_1TSEG
;
492 if (!has_64k_pages
) {
493 env
->mmu_model
&= ~POWERPC_MMU_64K
;
496 #else /* defined (TARGET_PPC64) */
498 static inline void kvm_fixup_page_sizes(PowerPCCPU
*cpu
)
502 #endif /* !defined (TARGET_PPC64) */
504 unsigned long kvm_arch_vcpu_id(CPUState
*cpu
)
506 return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu
));
509 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
510 * book3s supports only 1 watchpoint, so array size
511 * of 4 is sufficient for now.
513 #define MAX_HW_BKPTS 4
515 static struct HWBreakpoint
{
518 } hw_debug_points
[MAX_HW_BKPTS
];
520 static CPUWatchpoint hw_watchpoint
;
522 /* Default there is no breakpoint and watchpoint supported */
523 static int max_hw_breakpoint
;
524 static int max_hw_watchpoint
;
525 static int nb_hw_breakpoint
;
526 static int nb_hw_watchpoint
;
528 static void kvmppc_hw_debug_points_init(CPUPPCState
*cenv
)
530 if (cenv
->excp_model
== POWERPC_EXCP_BOOKE
) {
531 max_hw_breakpoint
= 2;
532 max_hw_watchpoint
= 2;
535 if ((max_hw_breakpoint
+ max_hw_watchpoint
) > MAX_HW_BKPTS
) {
536 fprintf(stderr
, "Error initializing h/w breakpoints\n");
541 int kvm_arch_init_vcpu(CPUState
*cs
)
543 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
544 CPUPPCState
*cenv
= &cpu
->env
;
547 /* Gather server mmu info from KVM and update the CPU state */
548 kvm_fixup_page_sizes(cpu
);
550 /* Synchronize sregs with kvm */
551 ret
= kvm_arch_sync_sregs(cpu
);
553 if (ret
== -EINVAL
) {
554 error_report("Register sync failed... If you're using kvm-hv.ko,"
555 " only \"-cpu host\" is possible");
560 idle_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL
, kvm_kick_cpu
, cpu
);
562 /* Some targets support access to KVM's guest TLB. */
563 switch (cenv
->mmu_model
) {
564 case POWERPC_MMU_BOOKE206
:
565 ret
= kvm_booke206_tlb_init(cpu
);
571 kvm_get_one_reg(cs
, KVM_REG_PPC_DEBUG_INST
, &debug_inst_opcode
);
572 kvmppc_hw_debug_points_init(cenv
);
577 static void kvm_sw_tlb_put(PowerPCCPU
*cpu
)
579 CPUPPCState
*env
= &cpu
->env
;
580 CPUState
*cs
= CPU(cpu
);
581 struct kvm_dirty_tlb dirty_tlb
;
582 unsigned char *bitmap
;
585 if (!env
->kvm_sw_tlb
) {
589 bitmap
= g_malloc((env
->nb_tlb
+ 7) / 8);
590 memset(bitmap
, 0xFF, (env
->nb_tlb
+ 7) / 8);
592 dirty_tlb
.bitmap
= (uintptr_t)bitmap
;
593 dirty_tlb
.num_dirty
= env
->nb_tlb
;
595 ret
= kvm_vcpu_ioctl(cs
, KVM_DIRTY_TLB
, &dirty_tlb
);
597 fprintf(stderr
, "%s: KVM_DIRTY_TLB: %s\n",
598 __func__
, strerror(-ret
));
604 static void kvm_get_one_spr(CPUState
*cs
, uint64_t id
, int spr
)
606 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
607 CPUPPCState
*env
= &cpu
->env
;
612 struct kvm_one_reg reg
= {
614 .addr
= (uintptr_t) &val
,
618 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
620 trace_kvm_failed_spr_get(spr
, strerror(errno
));
622 switch (id
& KVM_REG_SIZE_MASK
) {
623 case KVM_REG_SIZE_U32
:
624 env
->spr
[spr
] = val
.u32
;
627 case KVM_REG_SIZE_U64
:
628 env
->spr
[spr
] = val
.u64
;
632 /* Don't handle this size yet */
638 static void kvm_put_one_spr(CPUState
*cs
, uint64_t id
, int spr
)
640 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
641 CPUPPCState
*env
= &cpu
->env
;
646 struct kvm_one_reg reg
= {
648 .addr
= (uintptr_t) &val
,
652 switch (id
& KVM_REG_SIZE_MASK
) {
653 case KVM_REG_SIZE_U32
:
654 val
.u32
= env
->spr
[spr
];
657 case KVM_REG_SIZE_U64
:
658 val
.u64
= env
->spr
[spr
];
662 /* Don't handle this size yet */
666 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
668 trace_kvm_failed_spr_set(spr
, strerror(errno
));
672 static int kvm_put_fp(CPUState
*cs
)
674 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
675 CPUPPCState
*env
= &cpu
->env
;
676 struct kvm_one_reg reg
;
680 if (env
->insns_flags
& PPC_FLOAT
) {
681 uint64_t fpscr
= env
->fpscr
;
682 bool vsx
= !!(env
->insns_flags2
& PPC2_VSX
);
684 reg
.id
= KVM_REG_PPC_FPSCR
;
685 reg
.addr
= (uintptr_t)&fpscr
;
686 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
688 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno
));
692 for (i
= 0; i
< 32; i
++) {
695 #ifdef HOST_WORDS_BIGENDIAN
696 vsr
[0] = float64_val(env
->fpr
[i
]);
697 vsr
[1] = env
->vsr
[i
];
699 vsr
[0] = env
->vsr
[i
];
700 vsr
[1] = float64_val(env
->fpr
[i
]);
702 reg
.addr
= (uintptr_t) &vsr
;
703 reg
.id
= vsx
? KVM_REG_PPC_VSR(i
) : KVM_REG_PPC_FPR(i
);
705 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
707 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx
? "VSR" : "FPR",
714 if (env
->insns_flags
& PPC_ALTIVEC
) {
715 reg
.id
= KVM_REG_PPC_VSCR
;
716 reg
.addr
= (uintptr_t)&env
->vscr
;
717 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
719 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno
));
723 for (i
= 0; i
< 32; i
++) {
724 reg
.id
= KVM_REG_PPC_VR(i
);
725 reg
.addr
= (uintptr_t)&env
->avr
[i
];
726 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
728 DPRINTF("Unable to set VR%d to KVM: %s\n", i
, strerror(errno
));
737 static int kvm_get_fp(CPUState
*cs
)
739 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
740 CPUPPCState
*env
= &cpu
->env
;
741 struct kvm_one_reg reg
;
745 if (env
->insns_flags
& PPC_FLOAT
) {
747 bool vsx
= !!(env
->insns_flags2
& PPC2_VSX
);
749 reg
.id
= KVM_REG_PPC_FPSCR
;
750 reg
.addr
= (uintptr_t)&fpscr
;
751 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
753 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno
));
759 for (i
= 0; i
< 32; i
++) {
762 reg
.addr
= (uintptr_t) &vsr
;
763 reg
.id
= vsx
? KVM_REG_PPC_VSR(i
) : KVM_REG_PPC_FPR(i
);
765 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
767 DPRINTF("Unable to get %s%d from KVM: %s\n",
768 vsx
? "VSR" : "FPR", i
, strerror(errno
));
771 #ifdef HOST_WORDS_BIGENDIAN
772 env
->fpr
[i
] = vsr
[0];
774 env
->vsr
[i
] = vsr
[1];
777 env
->fpr
[i
] = vsr
[1];
779 env
->vsr
[i
] = vsr
[0];
786 if (env
->insns_flags
& PPC_ALTIVEC
) {
787 reg
.id
= KVM_REG_PPC_VSCR
;
788 reg
.addr
= (uintptr_t)&env
->vscr
;
789 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
791 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno
));
795 for (i
= 0; i
< 32; i
++) {
796 reg
.id
= KVM_REG_PPC_VR(i
);
797 reg
.addr
= (uintptr_t)&env
->avr
[i
];
798 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
800 DPRINTF("Unable to get VR%d from KVM: %s\n",
810 #if defined(TARGET_PPC64)
811 static int kvm_get_vpa(CPUState
*cs
)
813 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
814 CPUPPCState
*env
= &cpu
->env
;
815 struct kvm_one_reg reg
;
818 reg
.id
= KVM_REG_PPC_VPA_ADDR
;
819 reg
.addr
= (uintptr_t)&env
->vpa_addr
;
820 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
822 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno
));
826 assert((uintptr_t)&env
->slb_shadow_size
827 == ((uintptr_t)&env
->slb_shadow_addr
+ 8));
828 reg
.id
= KVM_REG_PPC_VPA_SLB
;
829 reg
.addr
= (uintptr_t)&env
->slb_shadow_addr
;
830 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
832 DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
837 assert((uintptr_t)&env
->dtl_size
== ((uintptr_t)&env
->dtl_addr
+ 8));
838 reg
.id
= KVM_REG_PPC_VPA_DTL
;
839 reg
.addr
= (uintptr_t)&env
->dtl_addr
;
840 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
842 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
850 static int kvm_put_vpa(CPUState
*cs
)
852 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
853 CPUPPCState
*env
= &cpu
->env
;
854 struct kvm_one_reg reg
;
857 /* SLB shadow or DTL can't be registered unless a master VPA is
858 * registered. That means when restoring state, if a VPA *is*
859 * registered, we need to set that up first. If not, we need to
860 * deregister the others before deregistering the master VPA */
861 assert(env
->vpa_addr
|| !(env
->slb_shadow_addr
|| env
->dtl_addr
));
864 reg
.id
= KVM_REG_PPC_VPA_ADDR
;
865 reg
.addr
= (uintptr_t)&env
->vpa_addr
;
866 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
868 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno
));
873 assert((uintptr_t)&env
->slb_shadow_size
874 == ((uintptr_t)&env
->slb_shadow_addr
+ 8));
875 reg
.id
= KVM_REG_PPC_VPA_SLB
;
876 reg
.addr
= (uintptr_t)&env
->slb_shadow_addr
;
877 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
879 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno
));
883 assert((uintptr_t)&env
->dtl_size
== ((uintptr_t)&env
->dtl_addr
+ 8));
884 reg
.id
= KVM_REG_PPC_VPA_DTL
;
885 reg
.addr
= (uintptr_t)&env
->dtl_addr
;
886 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
888 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
893 if (!env
->vpa_addr
) {
894 reg
.id
= KVM_REG_PPC_VPA_ADDR
;
895 reg
.addr
= (uintptr_t)&env
->vpa_addr
;
896 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
898 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno
));
905 #endif /* TARGET_PPC64 */
907 int kvmppc_put_books_sregs(PowerPCCPU
*cpu
)
909 CPUPPCState
*env
= &cpu
->env
;
910 struct kvm_sregs sregs
;
913 sregs
.pvr
= env
->spr
[SPR_PVR
];
915 sregs
.u
.s
.sdr1
= env
->spr
[SPR_SDR1
];
919 for (i
= 0; i
< ARRAY_SIZE(env
->slb
); i
++) {
920 sregs
.u
.s
.ppc64
.slb
[i
].slbe
= env
->slb
[i
].esid
;
921 if (env
->slb
[i
].esid
& SLB_ESID_V
) {
922 sregs
.u
.s
.ppc64
.slb
[i
].slbe
|= i
;
924 sregs
.u
.s
.ppc64
.slb
[i
].slbv
= env
->slb
[i
].vsid
;
929 for (i
= 0; i
< 16; i
++) {
930 sregs
.u
.s
.ppc32
.sr
[i
] = env
->sr
[i
];
934 for (i
= 0; i
< 8; i
++) {
935 /* Beware. We have to swap upper and lower bits here */
936 sregs
.u
.s
.ppc32
.dbat
[i
] = ((uint64_t)env
->DBAT
[0][i
] << 32)
938 sregs
.u
.s
.ppc32
.ibat
[i
] = ((uint64_t)env
->IBAT
[0][i
] << 32)
942 return kvm_vcpu_ioctl(CPU(cpu
), KVM_SET_SREGS
, &sregs
);
945 int kvm_arch_put_registers(CPUState
*cs
, int level
)
947 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
948 CPUPPCState
*env
= &cpu
->env
;
949 struct kvm_regs regs
;
953 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_REGS
, ®s
);
960 regs
.xer
= cpu_read_xer(env
);
964 regs
.srr0
= env
->spr
[SPR_SRR0
];
965 regs
.srr1
= env
->spr
[SPR_SRR1
];
967 regs
.sprg0
= env
->spr
[SPR_SPRG0
];
968 regs
.sprg1
= env
->spr
[SPR_SPRG1
];
969 regs
.sprg2
= env
->spr
[SPR_SPRG2
];
970 regs
.sprg3
= env
->spr
[SPR_SPRG3
];
971 regs
.sprg4
= env
->spr
[SPR_SPRG4
];
972 regs
.sprg5
= env
->spr
[SPR_SPRG5
];
973 regs
.sprg6
= env
->spr
[SPR_SPRG6
];
974 regs
.sprg7
= env
->spr
[SPR_SPRG7
];
976 regs
.pid
= env
->spr
[SPR_BOOKE_PID
];
978 for (i
= 0;i
< 32; i
++)
979 regs
.gpr
[i
] = env
->gpr
[i
];
982 for (i
= 0; i
< 8; i
++) {
983 regs
.cr
|= (env
->crf
[i
] & 15) << (4 * (7 - i
));
986 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_REGS
, ®s
);
992 if (env
->tlb_dirty
) {
994 env
->tlb_dirty
= false;
997 if (cap_segstate
&& (level
>= KVM_PUT_RESET_STATE
)) {
998 ret
= kvmppc_put_books_sregs(cpu
);
1004 if (cap_hior
&& (level
>= KVM_PUT_RESET_STATE
)) {
1005 kvm_put_one_spr(cs
, KVM_REG_PPC_HIOR
, SPR_HIOR
);
1011 /* We deliberately ignore errors here, for kernels which have
1012 * the ONE_REG calls, but don't support the specific
1013 * registers, there's a reasonable chance things will still
1014 * work, at least until we try to migrate. */
1015 for (i
= 0; i
< 1024; i
++) {
1016 uint64_t id
= env
->spr_cb
[i
].one_reg_id
;
1019 kvm_put_one_spr(cs
, id
, i
);
1025 for (i
= 0; i
< ARRAY_SIZE(env
->tm_gpr
); i
++) {
1026 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_GPR(i
), &env
->tm_gpr
[i
]);
1028 for (i
= 0; i
< ARRAY_SIZE(env
->tm_vsr
); i
++) {
1029 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_VSR(i
), &env
->tm_vsr
[i
]);
1031 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_CR
, &env
->tm_cr
);
1032 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_LR
, &env
->tm_lr
);
1033 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_CTR
, &env
->tm_ctr
);
1034 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_FPSCR
, &env
->tm_fpscr
);
1035 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_AMR
, &env
->tm_amr
);
1036 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_PPR
, &env
->tm_ppr
);
1037 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_VRSAVE
, &env
->tm_vrsave
);
1038 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_VSCR
, &env
->tm_vscr
);
1039 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_DSCR
, &env
->tm_dscr
);
1040 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_TAR
, &env
->tm_tar
);
1044 if (kvm_put_vpa(cs
) < 0) {
1045 DPRINTF("Warning: Unable to set VPA information to KVM\n");
1049 kvm_set_one_reg(cs
, KVM_REG_PPC_TB_OFFSET
, &env
->tb_env
->tb_offset
);
1050 #endif /* TARGET_PPC64 */
1056 static void kvm_sync_excp(CPUPPCState
*env
, int vector
, int ivor
)
1058 env
->excp_vectors
[vector
] = env
->spr
[ivor
] + env
->spr
[SPR_BOOKE_IVPR
];
1061 static int kvmppc_get_booke_sregs(PowerPCCPU
*cpu
)
1063 CPUPPCState
*env
= &cpu
->env
;
1064 struct kvm_sregs sregs
;
1067 ret
= kvm_vcpu_ioctl(CPU(cpu
), KVM_GET_SREGS
, &sregs
);
1072 if (sregs
.u
.e
.features
& KVM_SREGS_E_BASE
) {
1073 env
->spr
[SPR_BOOKE_CSRR0
] = sregs
.u
.e
.csrr0
;
1074 env
->spr
[SPR_BOOKE_CSRR1
] = sregs
.u
.e
.csrr1
;
1075 env
->spr
[SPR_BOOKE_ESR
] = sregs
.u
.e
.esr
;
1076 env
->spr
[SPR_BOOKE_DEAR
] = sregs
.u
.e
.dear
;
1077 env
->spr
[SPR_BOOKE_MCSR
] = sregs
.u
.e
.mcsr
;
1078 env
->spr
[SPR_BOOKE_TSR
] = sregs
.u
.e
.tsr
;
1079 env
->spr
[SPR_BOOKE_TCR
] = sregs
.u
.e
.tcr
;
1080 env
->spr
[SPR_DECR
] = sregs
.u
.e
.dec
;
1081 env
->spr
[SPR_TBL
] = sregs
.u
.e
.tb
& 0xffffffff;
1082 env
->spr
[SPR_TBU
] = sregs
.u
.e
.tb
>> 32;
1083 env
->spr
[SPR_VRSAVE
] = sregs
.u
.e
.vrsave
;
1086 if (sregs
.u
.e
.features
& KVM_SREGS_E_ARCH206
) {
1087 env
->spr
[SPR_BOOKE_PIR
] = sregs
.u
.e
.pir
;
1088 env
->spr
[SPR_BOOKE_MCSRR0
] = sregs
.u
.e
.mcsrr0
;
1089 env
->spr
[SPR_BOOKE_MCSRR1
] = sregs
.u
.e
.mcsrr1
;
1090 env
->spr
[SPR_BOOKE_DECAR
] = sregs
.u
.e
.decar
;
1091 env
->spr
[SPR_BOOKE_IVPR
] = sregs
.u
.e
.ivpr
;
1094 if (sregs
.u
.e
.features
& KVM_SREGS_E_64
) {
1095 env
->spr
[SPR_BOOKE_EPCR
] = sregs
.u
.e
.epcr
;
1098 if (sregs
.u
.e
.features
& KVM_SREGS_E_SPRG8
) {
1099 env
->spr
[SPR_BOOKE_SPRG8
] = sregs
.u
.e
.sprg8
;
1102 if (sregs
.u
.e
.features
& KVM_SREGS_E_IVOR
) {
1103 env
->spr
[SPR_BOOKE_IVOR0
] = sregs
.u
.e
.ivor_low
[0];
1104 kvm_sync_excp(env
, POWERPC_EXCP_CRITICAL
, SPR_BOOKE_IVOR0
);
1105 env
->spr
[SPR_BOOKE_IVOR1
] = sregs
.u
.e
.ivor_low
[1];
1106 kvm_sync_excp(env
, POWERPC_EXCP_MCHECK
, SPR_BOOKE_IVOR1
);
1107 env
->spr
[SPR_BOOKE_IVOR2
] = sregs
.u
.e
.ivor_low
[2];
1108 kvm_sync_excp(env
, POWERPC_EXCP_DSI
, SPR_BOOKE_IVOR2
);
1109 env
->spr
[SPR_BOOKE_IVOR3
] = sregs
.u
.e
.ivor_low
[3];
1110 kvm_sync_excp(env
, POWERPC_EXCP_ISI
, SPR_BOOKE_IVOR3
);
1111 env
->spr
[SPR_BOOKE_IVOR4
] = sregs
.u
.e
.ivor_low
[4];
1112 kvm_sync_excp(env
, POWERPC_EXCP_EXTERNAL
, SPR_BOOKE_IVOR4
);
1113 env
->spr
[SPR_BOOKE_IVOR5
] = sregs
.u
.e
.ivor_low
[5];
1114 kvm_sync_excp(env
, POWERPC_EXCP_ALIGN
, SPR_BOOKE_IVOR5
);
1115 env
->spr
[SPR_BOOKE_IVOR6
] = sregs
.u
.e
.ivor_low
[6];
1116 kvm_sync_excp(env
, POWERPC_EXCP_PROGRAM
, SPR_BOOKE_IVOR6
);
1117 env
->spr
[SPR_BOOKE_IVOR7
] = sregs
.u
.e
.ivor_low
[7];
1118 kvm_sync_excp(env
, POWERPC_EXCP_FPU
, SPR_BOOKE_IVOR7
);
1119 env
->spr
[SPR_BOOKE_IVOR8
] = sregs
.u
.e
.ivor_low
[8];
1120 kvm_sync_excp(env
, POWERPC_EXCP_SYSCALL
, SPR_BOOKE_IVOR8
);
1121 env
->spr
[SPR_BOOKE_IVOR9
] = sregs
.u
.e
.ivor_low
[9];
1122 kvm_sync_excp(env
, POWERPC_EXCP_APU
, SPR_BOOKE_IVOR9
);
1123 env
->spr
[SPR_BOOKE_IVOR10
] = sregs
.u
.e
.ivor_low
[10];
1124 kvm_sync_excp(env
, POWERPC_EXCP_DECR
, SPR_BOOKE_IVOR10
);
1125 env
->spr
[SPR_BOOKE_IVOR11
] = sregs
.u
.e
.ivor_low
[11];
1126 kvm_sync_excp(env
, POWERPC_EXCP_FIT
, SPR_BOOKE_IVOR11
);
1127 env
->spr
[SPR_BOOKE_IVOR12
] = sregs
.u
.e
.ivor_low
[12];
1128 kvm_sync_excp(env
, POWERPC_EXCP_WDT
, SPR_BOOKE_IVOR12
);
1129 env
->spr
[SPR_BOOKE_IVOR13
] = sregs
.u
.e
.ivor_low
[13];
1130 kvm_sync_excp(env
, POWERPC_EXCP_DTLB
, SPR_BOOKE_IVOR13
);
1131 env
->spr
[SPR_BOOKE_IVOR14
] = sregs
.u
.e
.ivor_low
[14];
1132 kvm_sync_excp(env
, POWERPC_EXCP_ITLB
, SPR_BOOKE_IVOR14
);
1133 env
->spr
[SPR_BOOKE_IVOR15
] = sregs
.u
.e
.ivor_low
[15];
1134 kvm_sync_excp(env
, POWERPC_EXCP_DEBUG
, SPR_BOOKE_IVOR15
);
1136 if (sregs
.u
.e
.features
& KVM_SREGS_E_SPE
) {
1137 env
->spr
[SPR_BOOKE_IVOR32
] = sregs
.u
.e
.ivor_high
[0];
1138 kvm_sync_excp(env
, POWERPC_EXCP_SPEU
, SPR_BOOKE_IVOR32
);
1139 env
->spr
[SPR_BOOKE_IVOR33
] = sregs
.u
.e
.ivor_high
[1];
1140 kvm_sync_excp(env
, POWERPC_EXCP_EFPDI
, SPR_BOOKE_IVOR33
);
1141 env
->spr
[SPR_BOOKE_IVOR34
] = sregs
.u
.e
.ivor_high
[2];
1142 kvm_sync_excp(env
, POWERPC_EXCP_EFPRI
, SPR_BOOKE_IVOR34
);
1145 if (sregs
.u
.e
.features
& KVM_SREGS_E_PM
) {
1146 env
->spr
[SPR_BOOKE_IVOR35
] = sregs
.u
.e
.ivor_high
[3];
1147 kvm_sync_excp(env
, POWERPC_EXCP_EPERFM
, SPR_BOOKE_IVOR35
);
1150 if (sregs
.u
.e
.features
& KVM_SREGS_E_PC
) {
1151 env
->spr
[SPR_BOOKE_IVOR36
] = sregs
.u
.e
.ivor_high
[4];
1152 kvm_sync_excp(env
, POWERPC_EXCP_DOORI
, SPR_BOOKE_IVOR36
);
1153 env
->spr
[SPR_BOOKE_IVOR37
] = sregs
.u
.e
.ivor_high
[5];
1154 kvm_sync_excp(env
, POWERPC_EXCP_DOORCI
, SPR_BOOKE_IVOR37
);
1158 if (sregs
.u
.e
.features
& KVM_SREGS_E_ARCH206_MMU
) {
1159 env
->spr
[SPR_BOOKE_MAS0
] = sregs
.u
.e
.mas0
;
1160 env
->spr
[SPR_BOOKE_MAS1
] = sregs
.u
.e
.mas1
;
1161 env
->spr
[SPR_BOOKE_MAS2
] = sregs
.u
.e
.mas2
;
1162 env
->spr
[SPR_BOOKE_MAS3
] = sregs
.u
.e
.mas7_3
& 0xffffffff;
1163 env
->spr
[SPR_BOOKE_MAS4
] = sregs
.u
.e
.mas4
;
1164 env
->spr
[SPR_BOOKE_MAS6
] = sregs
.u
.e
.mas6
;
1165 env
->spr
[SPR_BOOKE_MAS7
] = sregs
.u
.e
.mas7_3
>> 32;
1166 env
->spr
[SPR_MMUCFG
] = sregs
.u
.e
.mmucfg
;
1167 env
->spr
[SPR_BOOKE_TLB0CFG
] = sregs
.u
.e
.tlbcfg
[0];
1168 env
->spr
[SPR_BOOKE_TLB1CFG
] = sregs
.u
.e
.tlbcfg
[1];
1171 if (sregs
.u
.e
.features
& KVM_SREGS_EXP
) {
1172 env
->spr
[SPR_BOOKE_EPR
] = sregs
.u
.e
.epr
;
1175 if (sregs
.u
.e
.features
& KVM_SREGS_E_PD
) {
1176 env
->spr
[SPR_BOOKE_EPLC
] = sregs
.u
.e
.eplc
;
1177 env
->spr
[SPR_BOOKE_EPSC
] = sregs
.u
.e
.epsc
;
1180 if (sregs
.u
.e
.impl_id
== KVM_SREGS_E_IMPL_FSL
) {
1181 env
->spr
[SPR_E500_SVR
] = sregs
.u
.e
.impl
.fsl
.svr
;
1182 env
->spr
[SPR_Exxx_MCAR
] = sregs
.u
.e
.impl
.fsl
.mcar
;
1183 env
->spr
[SPR_HID0
] = sregs
.u
.e
.impl
.fsl
.hid0
;
1185 if (sregs
.u
.e
.impl
.fsl
.features
& KVM_SREGS_E_FSL_PIDn
) {
1186 env
->spr
[SPR_BOOKE_PID1
] = sregs
.u
.e
.impl
.fsl
.pid1
;
1187 env
->spr
[SPR_BOOKE_PID2
] = sregs
.u
.e
.impl
.fsl
.pid2
;
1194 static int kvmppc_get_books_sregs(PowerPCCPU
*cpu
)
1196 CPUPPCState
*env
= &cpu
->env
;
1197 struct kvm_sregs sregs
;
1201 ret
= kvm_vcpu_ioctl(CPU(cpu
), KVM_GET_SREGS
, &sregs
);
1206 if (!env
->external_htab
) {
1207 ppc_store_sdr1(env
, sregs
.u
.s
.sdr1
);
1213 * The packed SLB array we get from KVM_GET_SREGS only contains
1214 * information about valid entries. So we flush our internal copy
1215 * to get rid of stale ones, then put all valid SLB entries back
1218 memset(env
->slb
, 0, sizeof(env
->slb
));
1219 for (i
= 0; i
< ARRAY_SIZE(env
->slb
); i
++) {
1220 target_ulong rb
= sregs
.u
.s
.ppc64
.slb
[i
].slbe
;
1221 target_ulong rs
= sregs
.u
.s
.ppc64
.slb
[i
].slbv
;
1223 * Only restore valid entries
1225 if (rb
& SLB_ESID_V
) {
1226 ppc_store_slb(cpu
, rb
& 0xfff, rb
& ~0xfffULL
, rs
);
1232 for (i
= 0; i
< 16; i
++) {
1233 env
->sr
[i
] = sregs
.u
.s
.ppc32
.sr
[i
];
1237 for (i
= 0; i
< 8; i
++) {
1238 env
->DBAT
[0][i
] = sregs
.u
.s
.ppc32
.dbat
[i
] & 0xffffffff;
1239 env
->DBAT
[1][i
] = sregs
.u
.s
.ppc32
.dbat
[i
] >> 32;
1240 env
->IBAT
[0][i
] = sregs
.u
.s
.ppc32
.ibat
[i
] & 0xffffffff;
1241 env
->IBAT
[1][i
] = sregs
.u
.s
.ppc32
.ibat
[i
] >> 32;
1247 int kvm_arch_get_registers(CPUState
*cs
)
1249 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
1250 CPUPPCState
*env
= &cpu
->env
;
1251 struct kvm_regs regs
;
1255 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_REGS
, ®s
);
1260 for (i
= 7; i
>= 0; i
--) {
1261 env
->crf
[i
] = cr
& 15;
1265 env
->ctr
= regs
.ctr
;
1267 cpu_write_xer(env
, regs
.xer
);
1268 env
->msr
= regs
.msr
;
1271 env
->spr
[SPR_SRR0
] = regs
.srr0
;
1272 env
->spr
[SPR_SRR1
] = regs
.srr1
;
1274 env
->spr
[SPR_SPRG0
] = regs
.sprg0
;
1275 env
->spr
[SPR_SPRG1
] = regs
.sprg1
;
1276 env
->spr
[SPR_SPRG2
] = regs
.sprg2
;
1277 env
->spr
[SPR_SPRG3
] = regs
.sprg3
;
1278 env
->spr
[SPR_SPRG4
] = regs
.sprg4
;
1279 env
->spr
[SPR_SPRG5
] = regs
.sprg5
;
1280 env
->spr
[SPR_SPRG6
] = regs
.sprg6
;
1281 env
->spr
[SPR_SPRG7
] = regs
.sprg7
;
1283 env
->spr
[SPR_BOOKE_PID
] = regs
.pid
;
1285 for (i
= 0;i
< 32; i
++)
1286 env
->gpr
[i
] = regs
.gpr
[i
];
1290 if (cap_booke_sregs
) {
1291 ret
= kvmppc_get_booke_sregs(cpu
);
1298 ret
= kvmppc_get_books_sregs(cpu
);
1305 kvm_get_one_spr(cs
, KVM_REG_PPC_HIOR
, SPR_HIOR
);
1311 /* We deliberately ignore errors here, for kernels which have
1312 * the ONE_REG calls, but don't support the specific
1313 * registers, there's a reasonable chance things will still
1314 * work, at least until we try to migrate. */
1315 for (i
= 0; i
< 1024; i
++) {
1316 uint64_t id
= env
->spr_cb
[i
].one_reg_id
;
1319 kvm_get_one_spr(cs
, id
, i
);
1325 for (i
= 0; i
< ARRAY_SIZE(env
->tm_gpr
); i
++) {
1326 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_GPR(i
), &env
->tm_gpr
[i
]);
1328 for (i
= 0; i
< ARRAY_SIZE(env
->tm_vsr
); i
++) {
1329 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_VSR(i
), &env
->tm_vsr
[i
]);
1331 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_CR
, &env
->tm_cr
);
1332 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_LR
, &env
->tm_lr
);
1333 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_CTR
, &env
->tm_ctr
);
1334 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_FPSCR
, &env
->tm_fpscr
);
1335 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_AMR
, &env
->tm_amr
);
1336 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_PPR
, &env
->tm_ppr
);
1337 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_VRSAVE
, &env
->tm_vrsave
);
1338 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_VSCR
, &env
->tm_vscr
);
1339 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_DSCR
, &env
->tm_dscr
);
1340 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_TAR
, &env
->tm_tar
);
1344 if (kvm_get_vpa(cs
) < 0) {
1345 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1349 kvm_get_one_reg(cs
, KVM_REG_PPC_TB_OFFSET
, &env
->tb_env
->tb_offset
);
1356 int kvmppc_set_interrupt(PowerPCCPU
*cpu
, int irq
, int level
)
1358 unsigned virq
= level
? KVM_INTERRUPT_SET_LEVEL
: KVM_INTERRUPT_UNSET
;
1360 if (irq
!= PPC_INTERRUPT_EXT
) {
1364 if (!kvm_enabled() || !cap_interrupt_unset
|| !cap_interrupt_level
) {
1368 kvm_vcpu_ioctl(CPU(cpu
), KVM_INTERRUPT
, &virq
);
1373 #if defined(TARGET_PPCEMB)
1374 #define PPC_INPUT_INT PPC40x_INPUT_INT
1375 #elif defined(TARGET_PPC64)
1376 #define PPC_INPUT_INT PPC970_INPUT_INT
1378 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1381 void kvm_arch_pre_run(CPUState
*cs
, struct kvm_run
*run
)
1383 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
1384 CPUPPCState
*env
= &cpu
->env
;
1388 qemu_mutex_lock_iothread();
1390 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1391 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1392 if (!cap_interrupt_level
&&
1393 run
->ready_for_interrupt_injection
&&
1394 (cs
->interrupt_request
& CPU_INTERRUPT_HARD
) &&
1395 (env
->irq_input_state
& (1<<PPC_INPUT_INT
)))
1397 /* For now KVM disregards the 'irq' argument. However, in the
1398 * future KVM could cache it in-kernel to avoid a heavyweight exit
1399 * when reading the UIC.
1401 irq
= KVM_INTERRUPT_SET
;
1403 DPRINTF("injected interrupt %d\n", irq
);
1404 r
= kvm_vcpu_ioctl(cs
, KVM_INTERRUPT
, &irq
);
1406 printf("cpu %d fail inject %x\n", cs
->cpu_index
, irq
);
1409 /* Always wake up soon in case the interrupt was level based */
1410 timer_mod(idle_timer
, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) +
1411 (NANOSECONDS_PER_SECOND
/ 50));
1414 /* We don't know if there are more interrupts pending after this. However,
1415 * the guest will return to userspace in the course of handling this one
1416 * anyways, so we will get a chance to deliver the rest. */
1418 qemu_mutex_unlock_iothread();
1421 MemTxAttrs
kvm_arch_post_run(CPUState
*cs
, struct kvm_run
*run
)
1423 return MEMTXATTRS_UNSPECIFIED
;
1426 int kvm_arch_process_async_events(CPUState
*cs
)
1431 static int kvmppc_handle_halt(PowerPCCPU
*cpu
)
1433 CPUState
*cs
= CPU(cpu
);
1434 CPUPPCState
*env
= &cpu
->env
;
1436 if (!(cs
->interrupt_request
& CPU_INTERRUPT_HARD
) && (msr_ee
)) {
1438 cs
->exception_index
= EXCP_HLT
;
1444 /* map dcr access to existing qemu dcr emulation */
1445 static int kvmppc_handle_dcr_read(CPUPPCState
*env
, uint32_t dcrn
, uint32_t *data
)
1447 if (ppc_dcr_read(env
->dcr_env
, dcrn
, data
) < 0)
1448 fprintf(stderr
, "Read to unhandled DCR (0x%x)\n", dcrn
);
1453 static int kvmppc_handle_dcr_write(CPUPPCState
*env
, uint32_t dcrn
, uint32_t data
)
1455 if (ppc_dcr_write(env
->dcr_env
, dcrn
, data
) < 0)
1456 fprintf(stderr
, "Write to unhandled DCR (0x%x)\n", dcrn
);
1461 int kvm_arch_insert_sw_breakpoint(CPUState
*cs
, struct kvm_sw_breakpoint
*bp
)
1463 /* Mixed endian case is not handled */
1464 uint32_t sc
= debug_inst_opcode
;
1466 if (cpu_memory_rw_debug(cs
, bp
->pc
, (uint8_t *)&bp
->saved_insn
,
1468 cpu_memory_rw_debug(cs
, bp
->pc
, (uint8_t *)&sc
, sizeof(sc
), 1)) {
1475 int kvm_arch_remove_sw_breakpoint(CPUState
*cs
, struct kvm_sw_breakpoint
*bp
)
1479 if (cpu_memory_rw_debug(cs
, bp
->pc
, (uint8_t *)&sc
, sizeof(sc
), 0) ||
1480 sc
!= debug_inst_opcode
||
1481 cpu_memory_rw_debug(cs
, bp
->pc
, (uint8_t *)&bp
->saved_insn
,
1489 static int find_hw_breakpoint(target_ulong addr
, int type
)
1493 assert((nb_hw_breakpoint
+ nb_hw_watchpoint
)
1494 <= ARRAY_SIZE(hw_debug_points
));
1496 for (n
= 0; n
< nb_hw_breakpoint
+ nb_hw_watchpoint
; n
++) {
1497 if (hw_debug_points
[n
].addr
== addr
&&
1498 hw_debug_points
[n
].type
== type
) {
1506 static int find_hw_watchpoint(target_ulong addr
, int *flag
)
1510 n
= find_hw_breakpoint(addr
, GDB_WATCHPOINT_ACCESS
);
1512 *flag
= BP_MEM_ACCESS
;
1516 n
= find_hw_breakpoint(addr
, GDB_WATCHPOINT_WRITE
);
1518 *flag
= BP_MEM_WRITE
;
1522 n
= find_hw_breakpoint(addr
, GDB_WATCHPOINT_READ
);
1524 *flag
= BP_MEM_READ
;
1531 int kvm_arch_insert_hw_breakpoint(target_ulong addr
,
1532 target_ulong len
, int type
)
1534 if ((nb_hw_breakpoint
+ nb_hw_watchpoint
) >= ARRAY_SIZE(hw_debug_points
)) {
1538 hw_debug_points
[nb_hw_breakpoint
+ nb_hw_watchpoint
].addr
= addr
;
1539 hw_debug_points
[nb_hw_breakpoint
+ nb_hw_watchpoint
].type
= type
;
1542 case GDB_BREAKPOINT_HW
:
1543 if (nb_hw_breakpoint
>= max_hw_breakpoint
) {
1547 if (find_hw_breakpoint(addr
, type
) >= 0) {
1554 case GDB_WATCHPOINT_WRITE
:
1555 case GDB_WATCHPOINT_READ
:
1556 case GDB_WATCHPOINT_ACCESS
:
1557 if (nb_hw_watchpoint
>= max_hw_watchpoint
) {
1561 if (find_hw_breakpoint(addr
, type
) >= 0) {
1575 int kvm_arch_remove_hw_breakpoint(target_ulong addr
,
1576 target_ulong len
, int type
)
1580 n
= find_hw_breakpoint(addr
, type
);
1586 case GDB_BREAKPOINT_HW
:
1590 case GDB_WATCHPOINT_WRITE
:
1591 case GDB_WATCHPOINT_READ
:
1592 case GDB_WATCHPOINT_ACCESS
:
1599 hw_debug_points
[n
] = hw_debug_points
[nb_hw_breakpoint
+ nb_hw_watchpoint
];
1604 void kvm_arch_remove_all_hw_breakpoints(void)
1606 nb_hw_breakpoint
= nb_hw_watchpoint
= 0;
1609 void kvm_arch_update_guest_debug(CPUState
*cs
, struct kvm_guest_debug
*dbg
)
1613 /* Software Breakpoint updates */
1614 if (kvm_sw_breakpoints_active(cs
)) {
1615 dbg
->control
|= KVM_GUESTDBG_ENABLE
| KVM_GUESTDBG_USE_SW_BP
;
1618 assert((nb_hw_breakpoint
+ nb_hw_watchpoint
)
1619 <= ARRAY_SIZE(hw_debug_points
));
1620 assert((nb_hw_breakpoint
+ nb_hw_watchpoint
) <= ARRAY_SIZE(dbg
->arch
.bp
));
1622 if (nb_hw_breakpoint
+ nb_hw_watchpoint
> 0) {
1623 dbg
->control
|= KVM_GUESTDBG_ENABLE
| KVM_GUESTDBG_USE_HW_BP
;
1624 memset(dbg
->arch
.bp
, 0, sizeof(dbg
->arch
.bp
));
1625 for (n
= 0; n
< nb_hw_breakpoint
+ nb_hw_watchpoint
; n
++) {
1626 switch (hw_debug_points
[n
].type
) {
1627 case GDB_BREAKPOINT_HW
:
1628 dbg
->arch
.bp
[n
].type
= KVMPPC_DEBUG_BREAKPOINT
;
1630 case GDB_WATCHPOINT_WRITE
:
1631 dbg
->arch
.bp
[n
].type
= KVMPPC_DEBUG_WATCH_WRITE
;
1633 case GDB_WATCHPOINT_READ
:
1634 dbg
->arch
.bp
[n
].type
= KVMPPC_DEBUG_WATCH_READ
;
1636 case GDB_WATCHPOINT_ACCESS
:
1637 dbg
->arch
.bp
[n
].type
= KVMPPC_DEBUG_WATCH_WRITE
|
1638 KVMPPC_DEBUG_WATCH_READ
;
1641 cpu_abort(cs
, "Unsupported breakpoint type\n");
1643 dbg
->arch
.bp
[n
].addr
= hw_debug_points
[n
].addr
;
1648 static int kvm_handle_debug(PowerPCCPU
*cpu
, struct kvm_run
*run
)
1650 CPUState
*cs
= CPU(cpu
);
1651 CPUPPCState
*env
= &cpu
->env
;
1652 struct kvm_debug_exit_arch
*arch_info
= &run
->debug
.arch
;
1657 if (cs
->singlestep_enabled
) {
1659 } else if (arch_info
->status
) {
1660 if (nb_hw_breakpoint
+ nb_hw_watchpoint
> 0) {
1661 if (arch_info
->status
& KVMPPC_DEBUG_BREAKPOINT
) {
1662 n
= find_hw_breakpoint(arch_info
->address
, GDB_BREAKPOINT_HW
);
1666 } else if (arch_info
->status
& (KVMPPC_DEBUG_WATCH_READ
|
1667 KVMPPC_DEBUG_WATCH_WRITE
)) {
1668 n
= find_hw_watchpoint(arch_info
->address
, &flag
);
1671 cs
->watchpoint_hit
= &hw_watchpoint
;
1672 hw_watchpoint
.vaddr
= hw_debug_points
[n
].addr
;
1673 hw_watchpoint
.flags
= flag
;
1677 } else if (kvm_find_sw_breakpoint(cs
, arch_info
->address
)) {
1680 /* QEMU is not able to handle debug exception, so inject
1681 * program exception to guest;
1682 * Yes program exception NOT debug exception !!
1683 * When QEMU is using debug resources then debug exception must
1684 * be always set. To achieve this we set MSR_DE and also set
1685 * MSRP_DEP so guest cannot change MSR_DE.
1686 * When emulating debug resource for guest we want guest
1687 * to control MSR_DE (enable/disable debug interrupt on need).
1688 * Supporting both configurations are NOT possible.
1689 * So the result is that we cannot share debug resources
1690 * between QEMU and Guest on BOOKE architecture.
1691 * In the current design QEMU gets the priority over guest,
1692 * this means that if QEMU is using debug resources then guest
1694 * For software breakpoint QEMU uses a privileged instruction;
1695 * So there cannot be any reason that we are here for guest
1696 * set debug exception, only possibility is guest executed a
1697 * privileged / illegal instruction and that's why we are
1698 * injecting a program interrupt.
1701 cpu_synchronize_state(cs
);
1702 /* env->nip is PC, so increment this by 4 to use
1703 * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1706 cs
->exception_index
= POWERPC_EXCP_PROGRAM
;
1707 env
->error_code
= POWERPC_EXCP_INVAL
;
1708 ppc_cpu_do_interrupt(cs
);
1714 int kvm_arch_handle_exit(CPUState
*cs
, struct kvm_run
*run
)
1716 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
1717 CPUPPCState
*env
= &cpu
->env
;
1720 qemu_mutex_lock_iothread();
1722 switch (run
->exit_reason
) {
1724 if (run
->dcr
.is_write
) {
1725 DPRINTF("handle dcr write\n");
1726 ret
= kvmppc_handle_dcr_write(env
, run
->dcr
.dcrn
, run
->dcr
.data
);
1728 DPRINTF("handle dcr read\n");
1729 ret
= kvmppc_handle_dcr_read(env
, run
->dcr
.dcrn
, &run
->dcr
.data
);
1733 DPRINTF("handle halt\n");
1734 ret
= kvmppc_handle_halt(cpu
);
1736 #if defined(TARGET_PPC64)
1737 case KVM_EXIT_PAPR_HCALL
:
1738 DPRINTF("handle PAPR hypercall\n");
1739 run
->papr_hcall
.ret
= spapr_hypercall(cpu
,
1741 run
->papr_hcall
.args
);
1746 DPRINTF("handle epr\n");
1747 run
->epr
.epr
= ldl_phys(cs
->as
, env
->mpic_iack
);
1750 case KVM_EXIT_WATCHDOG
:
1751 DPRINTF("handle watchdog expiry\n");
1752 watchdog_perform_action();
1756 case KVM_EXIT_DEBUG
:
1757 DPRINTF("handle debug exception\n");
1758 if (kvm_handle_debug(cpu
, run
)) {
1762 /* re-enter, this exception was guest-internal */
1767 fprintf(stderr
, "KVM: unknown exit reason %d\n", run
->exit_reason
);
1772 qemu_mutex_unlock_iothread();
1776 int kvmppc_or_tsr_bits(PowerPCCPU
*cpu
, uint32_t tsr_bits
)
1778 CPUState
*cs
= CPU(cpu
);
1779 uint32_t bits
= tsr_bits
;
1780 struct kvm_one_reg reg
= {
1781 .id
= KVM_REG_PPC_OR_TSR
,
1782 .addr
= (uintptr_t) &bits
,
1785 return kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
1788 int kvmppc_clear_tsr_bits(PowerPCCPU
*cpu
, uint32_t tsr_bits
)
1791 CPUState
*cs
= CPU(cpu
);
1792 uint32_t bits
= tsr_bits
;
1793 struct kvm_one_reg reg
= {
1794 .id
= KVM_REG_PPC_CLEAR_TSR
,
1795 .addr
= (uintptr_t) &bits
,
1798 return kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
1801 int kvmppc_set_tcr(PowerPCCPU
*cpu
)
1803 CPUState
*cs
= CPU(cpu
);
1804 CPUPPCState
*env
= &cpu
->env
;
1805 uint32_t tcr
= env
->spr
[SPR_BOOKE_TCR
];
1807 struct kvm_one_reg reg
= {
1808 .id
= KVM_REG_PPC_TCR
,
1809 .addr
= (uintptr_t) &tcr
,
1812 return kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
1815 int kvmppc_booke_watchdog_enable(PowerPCCPU
*cpu
)
1817 CPUState
*cs
= CPU(cpu
);
1820 if (!kvm_enabled()) {
1824 if (!cap_ppc_watchdog
) {
1825 printf("warning: KVM does not support watchdog");
1829 ret
= kvm_vcpu_enable_cap(cs
, KVM_CAP_PPC_BOOKE_WATCHDOG
, 0);
1831 fprintf(stderr
, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1832 __func__
, strerror(-ret
));
1839 static int read_cpuinfo(const char *field
, char *value
, int len
)
1843 int field_len
= strlen(field
);
1846 f
= fopen("/proc/cpuinfo", "r");
1852 if (!fgets(line
, sizeof(line
), f
)) {
1855 if (!strncmp(line
, field
, field_len
)) {
1856 pstrcpy(value
, len
, line
);
1867 uint32_t kvmppc_get_tbfreq(void)
1871 uint32_t retval
= NANOSECONDS_PER_SECOND
;
1873 if (read_cpuinfo("timebase", line
, sizeof(line
))) {
1877 if (!(ns
= strchr(line
, ':'))) {
1886 bool kvmppc_get_host_serial(char **value
)
1888 return g_file_get_contents("/proc/device-tree/system-id", value
, NULL
,
1892 bool kvmppc_get_host_model(char **value
)
1894 return g_file_get_contents("/proc/device-tree/model", value
, NULL
, NULL
);
1897 /* Try to find a device tree node for a CPU with clock-frequency property */
1898 static int kvmppc_find_cpu_dt(char *buf
, int buf_len
)
1900 struct dirent
*dirp
;
1903 if ((dp
= opendir(PROC_DEVTREE_CPU
)) == NULL
) {
1904 printf("Can't open directory " PROC_DEVTREE_CPU
"\n");
1909 while ((dirp
= readdir(dp
)) != NULL
) {
1911 snprintf(buf
, buf_len
, "%s%s/clock-frequency", PROC_DEVTREE_CPU
,
1913 f
= fopen(buf
, "r");
1915 snprintf(buf
, buf_len
, "%s%s", PROC_DEVTREE_CPU
, dirp
->d_name
);
1922 if (buf
[0] == '\0') {
1923 printf("Unknown host!\n");
1930 static uint64_t kvmppc_read_int_dt(const char *filename
)
1939 f
= fopen(filename
, "rb");
1944 len
= fread(&u
, 1, sizeof(u
), f
);
1948 /* property is a 32-bit quantity */
1949 return be32_to_cpu(u
.v32
);
1951 return be64_to_cpu(u
.v64
);
1957 /* Read a CPU node property from the host device tree that's a single
1958 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1959 * (can't find or open the property, or doesn't understand the
1961 static uint64_t kvmppc_read_int_cpu_dt(const char *propname
)
1963 char buf
[PATH_MAX
], *tmp
;
1966 if (kvmppc_find_cpu_dt(buf
, sizeof(buf
))) {
1970 tmp
= g_strdup_printf("%s/%s", buf
, propname
);
1971 val
= kvmppc_read_int_dt(tmp
);
1977 uint64_t kvmppc_get_clockfreq(void)
1979 return kvmppc_read_int_cpu_dt("clock-frequency");
1982 uint32_t kvmppc_get_vmx(void)
1984 return kvmppc_read_int_cpu_dt("ibm,vmx");
1987 uint32_t kvmppc_get_dfp(void)
1989 return kvmppc_read_int_cpu_dt("ibm,dfp");
1992 static int kvmppc_get_pvinfo(CPUPPCState
*env
, struct kvm_ppc_pvinfo
*pvinfo
)
1994 PowerPCCPU
*cpu
= ppc_env_get_cpu(env
);
1995 CPUState
*cs
= CPU(cpu
);
1997 if (kvm_vm_check_extension(cs
->kvm_state
, KVM_CAP_PPC_GET_PVINFO
) &&
1998 !kvm_vm_ioctl(cs
->kvm_state
, KVM_PPC_GET_PVINFO
, pvinfo
)) {
2005 int kvmppc_get_hasidle(CPUPPCState
*env
)
2007 struct kvm_ppc_pvinfo pvinfo
;
2009 if (!kvmppc_get_pvinfo(env
, &pvinfo
) &&
2010 (pvinfo
.flags
& KVM_PPC_PVINFO_FLAGS_EV_IDLE
)) {
2017 int kvmppc_get_hypercall(CPUPPCState
*env
, uint8_t *buf
, int buf_len
)
2019 uint32_t *hc
= (uint32_t*)buf
;
2020 struct kvm_ppc_pvinfo pvinfo
;
2022 if (!kvmppc_get_pvinfo(env
, &pvinfo
)) {
2023 memcpy(buf
, pvinfo
.hcall
, buf_len
);
2028 * Fallback to always fail hypercalls regardless of endianness:
2030 * tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
2032 * b .+8 (becomes nop in wrong endian)
2033 * bswap32(li r3, -1)
2036 hc
[0] = cpu_to_be32(0x08000048);
2037 hc
[1] = cpu_to_be32(0x3860ffff);
2038 hc
[2] = cpu_to_be32(0x48000008);
2039 hc
[3] = cpu_to_be32(bswap32(0x3860ffff));
2044 static inline int kvmppc_enable_hcall(KVMState
*s
, target_ulong hcall
)
2046 return kvm_vm_enable_cap(s
, KVM_CAP_PPC_ENABLE_HCALL
, 0, hcall
, 1);
2049 void kvmppc_enable_logical_ci_hcalls(void)
2052 * FIXME: it would be nice if we could detect the cases where
2053 * we're using a device which requires the in kernel
2054 * implementation of these hcalls, but the kernel lacks them and
2055 * produce a warning.
2057 kvmppc_enable_hcall(kvm_state
, H_LOGICAL_CI_LOAD
);
2058 kvmppc_enable_hcall(kvm_state
, H_LOGICAL_CI_STORE
);
2061 void kvmppc_enable_set_mode_hcall(void)
2063 kvmppc_enable_hcall(kvm_state
, H_SET_MODE
);
2066 void kvmppc_enable_clear_ref_mod_hcalls(void)
2068 kvmppc_enable_hcall(kvm_state
, H_CLEAR_REF
);
2069 kvmppc_enable_hcall(kvm_state
, H_CLEAR_MOD
);
2072 void kvmppc_set_papr(PowerPCCPU
*cpu
)
2074 CPUState
*cs
= CPU(cpu
);
2077 ret
= kvm_vcpu_enable_cap(cs
, KVM_CAP_PPC_PAPR
, 0);
2079 error_report("This vCPU type or KVM version does not support PAPR");
2083 /* Update the capability flag so we sync the right information
2088 int kvmppc_set_compat(PowerPCCPU
*cpu
, uint32_t cpu_version
)
2090 return kvm_set_one_reg(CPU(cpu
), KVM_REG_PPC_ARCH_COMPAT
, &cpu_version
);
2093 void kvmppc_set_mpic_proxy(PowerPCCPU
*cpu
, int mpic_proxy
)
2095 CPUState
*cs
= CPU(cpu
);
2098 ret
= kvm_vcpu_enable_cap(cs
, KVM_CAP_PPC_EPR
, 0, mpic_proxy
);
2099 if (ret
&& mpic_proxy
) {
2100 error_report("This KVM version does not support EPR");
2105 int kvmppc_smt_threads(void)
2107 return cap_ppc_smt
? cap_ppc_smt
: 1;
2111 off_t
kvmppc_alloc_rma(void **rma
)
2115 struct kvm_allocate_rma ret
;
2117 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2118 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2119 * not necessary on this hardware
2120 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2122 * FIXME: We should allow the user to force contiguous RMA
2123 * allocation in the cap_ppc_rma==1 case.
2125 if (cap_ppc_rma
< 2) {
2129 fd
= kvm_vm_ioctl(kvm_state
, KVM_ALLOCATE_RMA
, &ret
);
2131 fprintf(stderr
, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2136 size
= MIN(ret
.rma_size
, 256ul << 20);
2138 *rma
= mmap(NULL
, size
, PROT_READ
|PROT_WRITE
, MAP_SHARED
, fd
, 0);
2139 if (*rma
== MAP_FAILED
) {
2140 fprintf(stderr
, "KVM: Error mapping RMA: %s\n", strerror(errno
));
2147 uint64_t kvmppc_rma_size(uint64_t current_size
, unsigned int hash_shift
)
2149 struct kvm_ppc_smmu_info info
;
2150 long rampagesize
, best_page_shift
;
2153 if (cap_ppc_rma
>= 2) {
2154 return current_size
;
2157 /* Find the largest hardware supported page size that's less than
2158 * or equal to the (logical) backing page size of guest RAM */
2159 kvm_get_smmu_info(POWERPC_CPU(first_cpu
), &info
);
2160 rampagesize
= getrampagesize();
2161 best_page_shift
= 0;
2163 for (i
= 0; i
< KVM_PPC_PAGE_SIZES_MAX_SZ
; i
++) {
2164 struct kvm_ppc_one_seg_page_size
*sps
= &info
.sps
[i
];
2166 if (!sps
->page_shift
) {
2170 if ((sps
->page_shift
> best_page_shift
)
2171 && ((1UL << sps
->page_shift
) <= rampagesize
)) {
2172 best_page_shift
= sps
->page_shift
;
2176 return MIN(current_size
,
2177 1ULL << (best_page_shift
+ hash_shift
- 7));
2181 bool kvmppc_spapr_use_multitce(void)
2183 return cap_spapr_multitce
;
2186 void *kvmppc_create_spapr_tce(uint32_t liobn
, uint32_t window_size
, int *pfd
,
2189 struct kvm_create_spapr_tce args
= {
2191 .window_size
= window_size
,
2197 /* Must set fd to -1 so we don't try to munmap when called for
2198 * destroying the table, which the upper layers -will- do
2201 if (!cap_spapr_tce
|| (need_vfio
&& !cap_spapr_vfio
)) {
2205 fd
= kvm_vm_ioctl(kvm_state
, KVM_CREATE_SPAPR_TCE
, &args
);
2207 fprintf(stderr
, "KVM: Failed to create TCE table for liobn 0x%x\n",
2212 len
= (window_size
/ SPAPR_TCE_PAGE_SIZE
) * sizeof(uint64_t);
2213 /* FIXME: round this up to page size */
2215 table
= mmap(NULL
, len
, PROT_READ
|PROT_WRITE
, MAP_SHARED
, fd
, 0);
2216 if (table
== MAP_FAILED
) {
2217 fprintf(stderr
, "KVM: Failed to map TCE table for liobn 0x%x\n",
2227 int kvmppc_remove_spapr_tce(void *table
, int fd
, uint32_t nb_table
)
2235 len
= nb_table
* sizeof(uint64_t);
2236 if ((munmap(table
, len
) < 0) ||
2238 fprintf(stderr
, "KVM: Unexpected error removing TCE table: %s",
2240 /* Leak the table */
2246 int kvmppc_reset_htab(int shift_hint
)
2248 uint32_t shift
= shift_hint
;
2250 if (!kvm_enabled()) {
2251 /* Full emulation, tell caller to allocate htab itself */
2254 if (kvm_check_extension(kvm_state
, KVM_CAP_PPC_ALLOC_HTAB
)) {
2256 ret
= kvm_vm_ioctl(kvm_state
, KVM_PPC_ALLOCATE_HTAB
, &shift
);
2257 if (ret
== -ENOTTY
) {
2258 /* At least some versions of PR KVM advertise the
2259 * capability, but don't implement the ioctl(). Oops.
2260 * Return 0 so that we allocate the htab in qemu, as is
2261 * correct for PR. */
2263 } else if (ret
< 0) {
2269 /* We have a kernel that predates the htab reset calls. For PR
2270 * KVM, we need to allocate the htab ourselves, for an HV KVM of
2271 * this era, it has allocated a 16MB fixed size hash table
2272 * already. Kernels of this era have the GET_PVINFO capability
2273 * only on PR, so we use this hack to determine the right
2275 if (kvm_check_extension(kvm_state
, KVM_CAP_PPC_GET_PVINFO
)) {
2276 /* PR - tell caller to allocate htab */
2279 /* HV - assume 16MB kernel allocated htab */
2284 static inline uint32_t mfpvr(void)
2293 static void alter_insns(uint64_t *word
, uint64_t flags
, bool on
)
2302 static void kvmppc_host_cpu_initfn(Object
*obj
)
2304 assert(kvm_enabled());
2307 static void kvmppc_host_cpu_class_init(ObjectClass
*oc
, void *data
)
2309 DeviceClass
*dc
= DEVICE_CLASS(oc
);
2310 PowerPCCPUClass
*pcc
= POWERPC_CPU_CLASS(oc
);
2311 uint32_t vmx
= kvmppc_get_vmx();
2312 uint32_t dfp
= kvmppc_get_dfp();
2313 uint32_t dcache_size
= kvmppc_read_int_cpu_dt("d-cache-size");
2314 uint32_t icache_size
= kvmppc_read_int_cpu_dt("i-cache-size");
2316 /* Now fix up the class with information we can query from the host */
2320 /* Only override when we know what the host supports */
2321 alter_insns(&pcc
->insns_flags
, PPC_ALTIVEC
, vmx
> 0);
2322 alter_insns(&pcc
->insns_flags2
, PPC2_VSX
, vmx
> 1);
2325 /* Only override when we know what the host supports */
2326 alter_insns(&pcc
->insns_flags2
, PPC2_DFP
, dfp
);
2329 if (dcache_size
!= -1) {
2330 pcc
->l1_dcache_size
= dcache_size
;
2333 if (icache_size
!= -1) {
2334 pcc
->l1_icache_size
= icache_size
;
2337 /* Reason: kvmppc_host_cpu_initfn() dies when !kvm_enabled() */
2338 dc
->cannot_destroy_with_object_finalize_yet
= true;
2341 bool kvmppc_has_cap_epr(void)
2346 bool kvmppc_has_cap_htab_fd(void)
2351 bool kvmppc_has_cap_fixup_hcalls(void)
2353 return cap_fixup_hcalls
;
2356 static PowerPCCPUClass
*ppc_cpu_get_family_class(PowerPCCPUClass
*pcc
)
2358 ObjectClass
*oc
= OBJECT_CLASS(pcc
);
2360 while (oc
&& !object_class_is_abstract(oc
)) {
2361 oc
= object_class_get_parent(oc
);
2365 return POWERPC_CPU_CLASS(oc
);
2368 PowerPCCPUClass
*kvm_ppc_get_host_cpu_class(void)
2370 uint32_t host_pvr
= mfpvr();
2371 PowerPCCPUClass
*pvr_pcc
;
2373 pvr_pcc
= ppc_cpu_class_by_pvr(host_pvr
);
2374 if (pvr_pcc
== NULL
) {
2375 pvr_pcc
= ppc_cpu_class_by_pvr_mask(host_pvr
);
2381 static int kvm_ppc_register_host_cpu_type(void)
2383 TypeInfo type_info
= {
2384 .name
= TYPE_HOST_POWERPC_CPU
,
2385 .instance_init
= kvmppc_host_cpu_initfn
,
2386 .class_init
= kvmppc_host_cpu_class_init
,
2388 PowerPCCPUClass
*pvr_pcc
;
2391 pvr_pcc
= kvm_ppc_get_host_cpu_class();
2392 if (pvr_pcc
== NULL
) {
2395 type_info
.parent
= object_class_get_name(OBJECT_CLASS(pvr_pcc
));
2396 type_register(&type_info
);
2398 /* Register generic family CPU class for a family */
2399 pvr_pcc
= ppc_cpu_get_family_class(pvr_pcc
);
2400 dc
= DEVICE_CLASS(pvr_pcc
);
2401 type_info
.parent
= object_class_get_name(OBJECT_CLASS(pvr_pcc
));
2402 type_info
.name
= g_strdup_printf("%s-"TYPE_POWERPC_CPU
, dc
->desc
);
2403 type_register(&type_info
);
2405 #if defined(TARGET_PPC64)
2406 type_info
.name
= g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE
, "host");
2407 type_info
.parent
= TYPE_SPAPR_CPU_CORE
,
2408 type_info
.instance_size
= sizeof(sPAPRCPUCore
);
2409 type_info
.instance_init
= NULL
;
2410 type_info
.class_init
= spapr_cpu_core_class_init
;
2411 type_info
.class_data
= (void *) "host";
2412 type_register(&type_info
);
2413 g_free((void *)type_info
.name
);
2415 /* Register generic spapr CPU family class for current host CPU type */
2416 type_info
.name
= g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE
, dc
->desc
);
2417 type_info
.class_data
= (void *) dc
->desc
;
2418 type_register(&type_info
);
2419 g_free((void *)type_info
.name
);
2425 int kvmppc_define_rtas_kernel_token(uint32_t token
, const char *function
)
2427 struct kvm_rtas_token_args args
= {
2431 if (!kvm_check_extension(kvm_state
, KVM_CAP_PPC_RTAS
)) {
2435 strncpy(args
.name
, function
, sizeof(args
.name
));
2437 return kvm_vm_ioctl(kvm_state
, KVM_PPC_RTAS_DEFINE_TOKEN
, &args
);
2440 int kvmppc_get_htab_fd(bool write
)
2442 struct kvm_get_htab_fd s
= {
2443 .flags
= write
? KVM_GET_HTAB_WRITE
: 0,
2448 fprintf(stderr
, "KVM version doesn't support saving the hash table\n");
2452 return kvm_vm_ioctl(kvm_state
, KVM_PPC_GET_HTAB_FD
, &s
);
2455 int kvmppc_save_htab(QEMUFile
*f
, int fd
, size_t bufsize
, int64_t max_ns
)
2457 int64_t starttime
= qemu_clock_get_ns(QEMU_CLOCK_REALTIME
);
2458 uint8_t buf
[bufsize
];
2462 rc
= read(fd
, buf
, bufsize
);
2464 fprintf(stderr
, "Error reading data from KVM HTAB fd: %s\n",
2468 uint8_t *buffer
= buf
;
2471 struct kvm_get_htab_header
*head
=
2472 (struct kvm_get_htab_header
*) buffer
;
2473 size_t chunksize
= sizeof(*head
) +
2474 HASH_PTE_SIZE_64
* head
->n_valid
;
2476 qemu_put_be32(f
, head
->index
);
2477 qemu_put_be16(f
, head
->n_valid
);
2478 qemu_put_be16(f
, head
->n_invalid
);
2479 qemu_put_buffer(f
, (void *)(head
+ 1),
2480 HASH_PTE_SIZE_64
* head
->n_valid
);
2482 buffer
+= chunksize
;
2488 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME
) - starttime
) < max_ns
)));
2490 return (rc
== 0) ? 1 : 0;
2493 int kvmppc_load_htab_chunk(QEMUFile
*f
, int fd
, uint32_t index
,
2494 uint16_t n_valid
, uint16_t n_invalid
)
2496 struct kvm_get_htab_header
*buf
;
2497 size_t chunksize
= sizeof(*buf
) + n_valid
*HASH_PTE_SIZE_64
;
2500 buf
= alloca(chunksize
);
2502 buf
->n_valid
= n_valid
;
2503 buf
->n_invalid
= n_invalid
;
2505 qemu_get_buffer(f
, (void *)(buf
+ 1), HASH_PTE_SIZE_64
*n_valid
);
2507 rc
= write(fd
, buf
, chunksize
);
2509 fprintf(stderr
, "Error writing KVM hash table: %s\n",
2513 if (rc
!= chunksize
) {
2514 /* We should never get a short write on a single chunk */
2515 fprintf(stderr
, "Short write, restoring KVM hash table\n");
2521 bool kvm_arch_stop_on_emulation_error(CPUState
*cpu
)
2526 int kvm_arch_on_sigbus_vcpu(CPUState
*cpu
, int code
, void *addr
)
2531 int kvm_arch_on_sigbus(int code
, void *addr
)
2536 void kvm_arch_init_irq_routing(KVMState
*s
)
2540 struct kvm_get_htab_buf
{
2541 struct kvm_get_htab_header header
;
2543 * We require one extra byte for read
2545 target_ulong hpte
[(HPTES_PER_GROUP
* 2) + 1];
2548 uint64_t kvmppc_hash64_read_pteg(PowerPCCPU
*cpu
, target_ulong pte_index
)
2551 struct kvm_get_htab_fd ghf
;
2552 struct kvm_get_htab_buf
*hpte_buf
;
2555 ghf
.start_index
= pte_index
;
2556 htab_fd
= kvm_vm_ioctl(kvm_state
, KVM_PPC_GET_HTAB_FD
, &ghf
);
2561 hpte_buf
= g_malloc0(sizeof(*hpte_buf
));
2563 * Read the hpte group
2565 if (read(htab_fd
, hpte_buf
, sizeof(*hpte_buf
)) < 0) {
2570 return (uint64_t)(uintptr_t) hpte_buf
->hpte
;
2579 void kvmppc_hash64_free_pteg(uint64_t token
)
2581 struct kvm_get_htab_buf
*htab_buf
;
2583 htab_buf
= container_of((void *)(uintptr_t) token
, struct kvm_get_htab_buf
,
2589 void kvmppc_hash64_write_pte(CPUPPCState
*env
, target_ulong pte_index
,
2590 target_ulong pte0
, target_ulong pte1
)
2593 struct kvm_get_htab_fd ghf
;
2594 struct kvm_get_htab_buf hpte_buf
;
2597 ghf
.start_index
= 0; /* Ignored */
2598 htab_fd
= kvm_vm_ioctl(kvm_state
, KVM_PPC_GET_HTAB_FD
, &ghf
);
2603 hpte_buf
.header
.n_valid
= 1;
2604 hpte_buf
.header
.n_invalid
= 0;
2605 hpte_buf
.header
.index
= pte_index
;
2606 hpte_buf
.hpte
[0] = pte0
;
2607 hpte_buf
.hpte
[1] = pte1
;
2609 * Write the hpte entry.
2610 * CAUTION: write() has the warn_unused_result attribute. Hence we
2611 * need to check the return value, even though we do nothing.
2613 if (write(htab_fd
, &hpte_buf
, sizeof(hpte_buf
)) < 0) {
2625 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry
*route
,
2626 uint64_t address
, uint32_t data
, PCIDevice
*dev
)
2631 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry
*route
,
2632 int vector
, PCIDevice
*dev
)
2637 int kvm_arch_release_virq_post(int virq
)
2642 int kvm_arch_msi_data_to_gsi(uint32_t data
)
2644 return data
& 0xffff;
2647 int kvmppc_enable_hwrng(void)
2649 if (!kvm_enabled() || !kvm_check_extension(kvm_state
, KVM_CAP_PPC_HWRNG
)) {
2653 return kvmppc_enable_hcall(kvm_state
, H_RANDOM
);