2 * PowerPC implementation of KVM hooks
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
17 #include "qemu/osdep.h"
19 #include <sys/ioctl.h>
22 #include <linux/kvm.h>
24 #include "qemu-common.h"
25 #include "qemu/error-report.h"
27 #include "qemu/timer.h"
28 #include "sysemu/sysemu.h"
29 #include "sysemu/kvm.h"
30 #include "sysemu/numa.h"
32 #include "sysemu/cpus.h"
33 #include "sysemu/device_tree.h"
34 #include "mmu-hash64.h"
36 #include "hw/sysbus.h"
37 #include "hw/ppc/spapr.h"
38 #include "hw/ppc/spapr_vio.h"
39 #include "hw/ppc/spapr_cpu_core.h"
40 #include "hw/ppc/ppc.h"
41 #include "sysemu/watchdog.h"
43 #include "exec/gdbstub.h"
44 #include "exec/memattrs.h"
45 #include "sysemu/hostmem.h"
46 #include "qemu/cutils.h"
47 #if defined(TARGET_PPC64)
48 #include "hw/ppc/spapr_cpu_core.h"
54 #define DPRINTF(fmt, ...) \
55 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
57 #define DPRINTF(fmt, ...) \
61 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
63 const KVMCapabilityInfo kvm_arch_required_capabilities
[] = {
67 static int cap_interrupt_unset
= false;
68 static int cap_interrupt_level
= false;
69 static int cap_segstate
;
70 static int cap_booke_sregs
;
71 static int cap_ppc_smt
;
72 static int cap_ppc_rma
;
73 static int cap_spapr_tce
;
74 static int cap_spapr_multitce
;
75 static int cap_spapr_vfio
;
77 static int cap_one_reg
;
79 static int cap_ppc_watchdog
;
81 static int cap_htab_fd
;
82 static int cap_fixup_hcalls
;
83 static int cap_htm
; /* Hardware transactional memory support */
85 static uint32_t debug_inst_opcode
;
87 /* XXX We have a race condition where we actually have a level triggered
88 * interrupt, but the infrastructure can't expose that yet, so the guest
89 * takes but ignores it, goes to sleep and never gets notified that there's
90 * still an interrupt pending.
92 * As a quick workaround, let's just wake up again 20 ms after we injected
93 * an interrupt. That way we can assure that we're always reinjecting
94 * interrupts in case the guest swallowed them.
96 static QEMUTimer
*idle_timer
;
98 static void kvm_kick_cpu(void *opaque
)
100 PowerPCCPU
*cpu
= opaque
;
102 qemu_cpu_kick(CPU(cpu
));
105 /* Check whether we are running with KVM-PR (instead of KVM-HV). This
106 * should only be used for fallback tests - generally we should use
107 * explicit capabilities for the features we want, rather than
108 * assuming what is/isn't available depending on the KVM variant. */
109 static bool kvmppc_is_pr(KVMState
*ks
)
111 /* Assume KVM-PR if the GET_PVINFO capability is available */
112 return kvm_check_extension(ks
, KVM_CAP_PPC_GET_PVINFO
) != 0;
115 static int kvm_ppc_register_host_cpu_type(void);
117 int kvm_arch_init(MachineState
*ms
, KVMState
*s
)
119 cap_interrupt_unset
= kvm_check_extension(s
, KVM_CAP_PPC_UNSET_IRQ
);
120 cap_interrupt_level
= kvm_check_extension(s
, KVM_CAP_PPC_IRQ_LEVEL
);
121 cap_segstate
= kvm_check_extension(s
, KVM_CAP_PPC_SEGSTATE
);
122 cap_booke_sregs
= kvm_check_extension(s
, KVM_CAP_PPC_BOOKE_SREGS
);
123 cap_ppc_smt
= kvm_check_extension(s
, KVM_CAP_PPC_SMT
);
124 cap_ppc_rma
= kvm_check_extension(s
, KVM_CAP_PPC_RMA
);
125 cap_spapr_tce
= kvm_check_extension(s
, KVM_CAP_SPAPR_TCE
);
126 cap_spapr_multitce
= kvm_check_extension(s
, KVM_CAP_SPAPR_MULTITCE
);
127 cap_spapr_vfio
= false;
128 cap_one_reg
= kvm_check_extension(s
, KVM_CAP_ONE_REG
);
129 cap_hior
= kvm_check_extension(s
, KVM_CAP_PPC_HIOR
);
130 cap_epr
= kvm_check_extension(s
, KVM_CAP_PPC_EPR
);
131 cap_ppc_watchdog
= kvm_check_extension(s
, KVM_CAP_PPC_BOOKE_WATCHDOG
);
132 /* Note: we don't set cap_papr here, because this capability is
133 * only activated after this by kvmppc_set_papr() */
134 cap_htab_fd
= kvm_check_extension(s
, KVM_CAP_PPC_HTAB_FD
);
135 cap_fixup_hcalls
= kvm_check_extension(s
, KVM_CAP_PPC_FIXUP_HCALL
);
136 cap_htm
= kvm_vm_check_extension(s
, KVM_CAP_PPC_HTM
);
138 if (!cap_interrupt_level
) {
139 fprintf(stderr
, "KVM: Couldn't find level irq capability. Expect the "
140 "VM to stall at times!\n");
143 kvm_ppc_register_host_cpu_type();
148 static int kvm_arch_sync_sregs(PowerPCCPU
*cpu
)
150 CPUPPCState
*cenv
= &cpu
->env
;
151 CPUState
*cs
= CPU(cpu
);
152 struct kvm_sregs sregs
;
155 if (cenv
->excp_model
== POWERPC_EXCP_BOOKE
) {
156 /* What we're really trying to say is "if we're on BookE, we use
157 the native PVR for now". This is the only sane way to check
158 it though, so we potentially confuse users that they can run
159 BookE guests on BookS. Let's hope nobody dares enough :) */
163 fprintf(stderr
, "kvm error: missing PVR setting capability\n");
168 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_SREGS
, &sregs
);
173 sregs
.pvr
= cenv
->spr
[SPR_PVR
];
174 return kvm_vcpu_ioctl(cs
, KVM_SET_SREGS
, &sregs
);
177 /* Set up a shared TLB array with KVM */
178 static int kvm_booke206_tlb_init(PowerPCCPU
*cpu
)
180 CPUPPCState
*env
= &cpu
->env
;
181 CPUState
*cs
= CPU(cpu
);
182 struct kvm_book3e_206_tlb_params params
= {};
183 struct kvm_config_tlb cfg
= {};
184 unsigned int entries
= 0;
187 if (!kvm_enabled() ||
188 !kvm_check_extension(cs
->kvm_state
, KVM_CAP_SW_TLB
)) {
192 assert(ARRAY_SIZE(params
.tlb_sizes
) == BOOKE206_MAX_TLBN
);
194 for (i
= 0; i
< BOOKE206_MAX_TLBN
; i
++) {
195 params
.tlb_sizes
[i
] = booke206_tlb_size(env
, i
);
196 params
.tlb_ways
[i
] = booke206_tlb_ways(env
, i
);
197 entries
+= params
.tlb_sizes
[i
];
200 assert(entries
== env
->nb_tlb
);
201 assert(sizeof(struct kvm_book3e_206_tlb_entry
) == sizeof(ppcmas_tlb_t
));
203 env
->tlb_dirty
= true;
205 cfg
.array
= (uintptr_t)env
->tlb
.tlbm
;
206 cfg
.array_len
= sizeof(ppcmas_tlb_t
) * entries
;
207 cfg
.params
= (uintptr_t)¶ms
;
208 cfg
.mmu_type
= KVM_MMU_FSL_BOOKE_NOHV
;
210 ret
= kvm_vcpu_enable_cap(cs
, KVM_CAP_SW_TLB
, 0, (uintptr_t)&cfg
);
212 fprintf(stderr
, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
213 __func__
, strerror(-ret
));
217 env
->kvm_sw_tlb
= true;
222 #if defined(TARGET_PPC64)
223 static void kvm_get_fallback_smmu_info(PowerPCCPU
*cpu
,
224 struct kvm_ppc_smmu_info
*info
)
226 CPUPPCState
*env
= &cpu
->env
;
227 CPUState
*cs
= CPU(cpu
);
229 memset(info
, 0, sizeof(*info
));
231 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
232 * need to "guess" what the supported page sizes are.
234 * For that to work we make a few assumptions:
236 * - Check whether we are running "PR" KVM which only supports 4K
237 * and 16M pages, but supports them regardless of the backing
238 * store characteritics. We also don't support 1T segments.
240 * This is safe as if HV KVM ever supports that capability or PR
241 * KVM grows supports for more page/segment sizes, those versions
242 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
243 * will not hit this fallback
245 * - Else we are running HV KVM. This means we only support page
246 * sizes that fit in the backing store. Additionally we only
247 * advertize 64K pages if the processor is ARCH 2.06 and we assume
248 * P7 encodings for the SLB and hash table. Here too, we assume
249 * support for any newer processor will mean a kernel that
250 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
253 if (kvmppc_is_pr(cs
->kvm_state
)) {
258 /* Standard 4k base page size segment */
259 info
->sps
[0].page_shift
= 12;
260 info
->sps
[0].slb_enc
= 0;
261 info
->sps
[0].enc
[0].page_shift
= 12;
262 info
->sps
[0].enc
[0].pte_enc
= 0;
264 /* Standard 16M large page size segment */
265 info
->sps
[1].page_shift
= 24;
266 info
->sps
[1].slb_enc
= SLB_VSID_L
;
267 info
->sps
[1].enc
[0].page_shift
= 24;
268 info
->sps
[1].enc
[0].pte_enc
= 0;
272 /* HV KVM has backing store size restrictions */
273 info
->flags
= KVM_PPC_PAGE_SIZES_REAL
;
275 if (env
->mmu_model
& POWERPC_MMU_1TSEG
) {
276 info
->flags
|= KVM_PPC_1T_SEGMENTS
;
279 if (env
->mmu_model
== POWERPC_MMU_2_06
||
280 env
->mmu_model
== POWERPC_MMU_2_07
) {
286 /* Standard 4k base page size segment */
287 info
->sps
[i
].page_shift
= 12;
288 info
->sps
[i
].slb_enc
= 0;
289 info
->sps
[i
].enc
[0].page_shift
= 12;
290 info
->sps
[i
].enc
[0].pte_enc
= 0;
293 /* 64K on MMU 2.06 and later */
294 if (env
->mmu_model
== POWERPC_MMU_2_06
||
295 env
->mmu_model
== POWERPC_MMU_2_07
) {
296 info
->sps
[i
].page_shift
= 16;
297 info
->sps
[i
].slb_enc
= 0x110;
298 info
->sps
[i
].enc
[0].page_shift
= 16;
299 info
->sps
[i
].enc
[0].pte_enc
= 1;
303 /* Standard 16M large page size segment */
304 info
->sps
[i
].page_shift
= 24;
305 info
->sps
[i
].slb_enc
= SLB_VSID_L
;
306 info
->sps
[i
].enc
[0].page_shift
= 24;
307 info
->sps
[i
].enc
[0].pte_enc
= 0;
311 static void kvm_get_smmu_info(PowerPCCPU
*cpu
, struct kvm_ppc_smmu_info
*info
)
313 CPUState
*cs
= CPU(cpu
);
316 if (kvm_check_extension(cs
->kvm_state
, KVM_CAP_PPC_GET_SMMU_INFO
)) {
317 ret
= kvm_vm_ioctl(cs
->kvm_state
, KVM_PPC_GET_SMMU_INFO
, info
);
323 kvm_get_fallback_smmu_info(cpu
, info
);
326 static long gethugepagesize(const char *mem_path
)
332 ret
= statfs(mem_path
, &fs
);
333 } while (ret
!= 0 && errno
== EINTR
);
336 fprintf(stderr
, "Couldn't statfs() memory path: %s\n",
341 #define HUGETLBFS_MAGIC 0x958458f6
343 if (fs
.f_type
!= HUGETLBFS_MAGIC
) {
344 /* Explicit mempath, but it's ordinary pages */
345 return getpagesize();
348 /* It's hugepage, return the huge page size */
353 * FIXME TOCTTOU: this iterates over memory backends' mem-path, which
354 * may or may not name the same files / on the same filesystem now as
355 * when we actually open and map them. Iterate over the file
356 * descriptors instead, and use qemu_fd_getpagesize().
358 static int find_max_supported_pagesize(Object
*obj
, void *opaque
)
361 long *hpsize_min
= opaque
;
363 if (object_dynamic_cast(obj
, TYPE_MEMORY_BACKEND
)) {
364 mem_path
= object_property_get_str(obj
, "mem-path", NULL
);
366 long hpsize
= gethugepagesize(mem_path
);
367 if (hpsize
< *hpsize_min
) {
368 *hpsize_min
= hpsize
;
371 *hpsize_min
= getpagesize();
378 static long getrampagesize(void)
380 long hpsize
= LONG_MAX
;
381 long mainrampagesize
;
385 mainrampagesize
= gethugepagesize(mem_path
);
387 mainrampagesize
= getpagesize();
390 /* it's possible we have memory-backend objects with
391 * hugepage-backed RAM. these may get mapped into system
392 * address space via -numa parameters or memory hotplug
393 * hooks. we want to take these into account, but we
394 * also want to make sure these supported hugepage
395 * sizes are applicable across the entire range of memory
396 * we may boot from, so we take the min across all
397 * backends, and assume normal pages in cases where a
398 * backend isn't backed by hugepages.
400 memdev_root
= object_resolve_path("/objects", NULL
);
402 object_child_foreach(memdev_root
, find_max_supported_pagesize
, &hpsize
);
404 if (hpsize
== LONG_MAX
) {
405 /* No additional memory regions found ==> Report main RAM page size */
406 return mainrampagesize
;
409 /* If NUMA is disabled or the NUMA nodes are not backed with a
410 * memory-backend, then there is at least one node using "normal" RAM,
411 * so if its page size is smaller we have got to report that size instead.
413 if (hpsize
> mainrampagesize
&&
414 (nb_numa_nodes
== 0 || numa_info
[0].node_memdev
== NULL
)) {
417 error_report("Huge page support disabled (n/a for main memory).");
420 return mainrampagesize
;
426 static bool kvm_valid_page_size(uint32_t flags
, long rampgsize
, uint32_t shift
)
428 if (!(flags
& KVM_PPC_PAGE_SIZES_REAL
)) {
432 return (1ul << shift
) <= rampgsize
;
435 static void kvm_fixup_page_sizes(PowerPCCPU
*cpu
)
437 static struct kvm_ppc_smmu_info smmu_info
;
438 static bool has_smmu_info
;
439 CPUPPCState
*env
= &cpu
->env
;
442 bool has_64k_pages
= false;
444 /* We only handle page sizes for 64-bit server guests for now */
445 if (!(env
->mmu_model
& POWERPC_MMU_64
)) {
449 /* Collect MMU info from kernel if not already */
450 if (!has_smmu_info
) {
451 kvm_get_smmu_info(cpu
, &smmu_info
);
452 has_smmu_info
= true;
455 rampagesize
= getrampagesize();
457 /* Convert to QEMU form */
458 memset(&env
->sps
, 0, sizeof(env
->sps
));
460 /* If we have HV KVM, we need to forbid CI large pages if our
461 * host page size is smaller than 64K.
463 if (smmu_info
.flags
& KVM_PPC_PAGE_SIZES_REAL
) {
464 env
->ci_large_pages
= getpagesize() >= 0x10000;
468 * XXX This loop should be an entry wide AND of the capabilities that
469 * the selected CPU has with the capabilities that KVM supports.
471 for (ik
= iq
= 0; ik
< KVM_PPC_PAGE_SIZES_MAX_SZ
; ik
++) {
472 struct ppc_one_seg_page_size
*qsps
= &env
->sps
.sps
[iq
];
473 struct kvm_ppc_one_seg_page_size
*ksps
= &smmu_info
.sps
[ik
];
475 if (!kvm_valid_page_size(smmu_info
.flags
, rampagesize
,
479 qsps
->page_shift
= ksps
->page_shift
;
480 qsps
->slb_enc
= ksps
->slb_enc
;
481 for (jk
= jq
= 0; jk
< KVM_PPC_PAGE_SIZES_MAX_SZ
; jk
++) {
482 if (!kvm_valid_page_size(smmu_info
.flags
, rampagesize
,
483 ksps
->enc
[jk
].page_shift
)) {
486 if (ksps
->enc
[jk
].page_shift
== 16) {
487 has_64k_pages
= true;
489 qsps
->enc
[jq
].page_shift
= ksps
->enc
[jk
].page_shift
;
490 qsps
->enc
[jq
].pte_enc
= ksps
->enc
[jk
].pte_enc
;
491 if (++jq
>= PPC_PAGE_SIZES_MAX_SZ
) {
495 if (++iq
>= PPC_PAGE_SIZES_MAX_SZ
) {
499 env
->slb_nr
= smmu_info
.slb_size
;
500 if (!(smmu_info
.flags
& KVM_PPC_1T_SEGMENTS
)) {
501 env
->mmu_model
&= ~POWERPC_MMU_1TSEG
;
503 if (!has_64k_pages
) {
504 env
->mmu_model
&= ~POWERPC_MMU_64K
;
507 #else /* defined (TARGET_PPC64) */
509 static inline void kvm_fixup_page_sizes(PowerPCCPU
*cpu
)
513 #endif /* !defined (TARGET_PPC64) */
515 unsigned long kvm_arch_vcpu_id(CPUState
*cpu
)
517 return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu
));
520 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
521 * book3s supports only 1 watchpoint, so array size
522 * of 4 is sufficient for now.
524 #define MAX_HW_BKPTS 4
526 static struct HWBreakpoint
{
529 } hw_debug_points
[MAX_HW_BKPTS
];
531 static CPUWatchpoint hw_watchpoint
;
533 /* Default there is no breakpoint and watchpoint supported */
534 static int max_hw_breakpoint
;
535 static int max_hw_watchpoint
;
536 static int nb_hw_breakpoint
;
537 static int nb_hw_watchpoint
;
539 static void kvmppc_hw_debug_points_init(CPUPPCState
*cenv
)
541 if (cenv
->excp_model
== POWERPC_EXCP_BOOKE
) {
542 max_hw_breakpoint
= 2;
543 max_hw_watchpoint
= 2;
546 if ((max_hw_breakpoint
+ max_hw_watchpoint
) > MAX_HW_BKPTS
) {
547 fprintf(stderr
, "Error initializing h/w breakpoints\n");
552 int kvm_arch_init_vcpu(CPUState
*cs
)
554 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
555 CPUPPCState
*cenv
= &cpu
->env
;
558 /* Gather server mmu info from KVM and update the CPU state */
559 kvm_fixup_page_sizes(cpu
);
561 /* Synchronize sregs with kvm */
562 ret
= kvm_arch_sync_sregs(cpu
);
564 if (ret
== -EINVAL
) {
565 error_report("Register sync failed... If you're using kvm-hv.ko,"
566 " only \"-cpu host\" is possible");
571 idle_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL
, kvm_kick_cpu
, cpu
);
573 switch (cenv
->mmu_model
) {
574 case POWERPC_MMU_BOOKE206
:
575 /* This target supports access to KVM's guest TLB */
576 ret
= kvm_booke206_tlb_init(cpu
);
578 case POWERPC_MMU_2_07
:
579 if (!cap_htm
&& !kvmppc_is_pr(cs
->kvm_state
)) {
580 /* KVM-HV has transactional memory on POWER8 also without the
581 * KVM_CAP_PPC_HTM extension, so enable it here instead. */
589 kvm_get_one_reg(cs
, KVM_REG_PPC_DEBUG_INST
, &debug_inst_opcode
);
590 kvmppc_hw_debug_points_init(cenv
);
595 static void kvm_sw_tlb_put(PowerPCCPU
*cpu
)
597 CPUPPCState
*env
= &cpu
->env
;
598 CPUState
*cs
= CPU(cpu
);
599 struct kvm_dirty_tlb dirty_tlb
;
600 unsigned char *bitmap
;
603 if (!env
->kvm_sw_tlb
) {
607 bitmap
= g_malloc((env
->nb_tlb
+ 7) / 8);
608 memset(bitmap
, 0xFF, (env
->nb_tlb
+ 7) / 8);
610 dirty_tlb
.bitmap
= (uintptr_t)bitmap
;
611 dirty_tlb
.num_dirty
= env
->nb_tlb
;
613 ret
= kvm_vcpu_ioctl(cs
, KVM_DIRTY_TLB
, &dirty_tlb
);
615 fprintf(stderr
, "%s: KVM_DIRTY_TLB: %s\n",
616 __func__
, strerror(-ret
));
622 static void kvm_get_one_spr(CPUState
*cs
, uint64_t id
, int spr
)
624 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
625 CPUPPCState
*env
= &cpu
->env
;
630 struct kvm_one_reg reg
= {
632 .addr
= (uintptr_t) &val
,
636 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
638 trace_kvm_failed_spr_get(spr
, strerror(errno
));
640 switch (id
& KVM_REG_SIZE_MASK
) {
641 case KVM_REG_SIZE_U32
:
642 env
->spr
[spr
] = val
.u32
;
645 case KVM_REG_SIZE_U64
:
646 env
->spr
[spr
] = val
.u64
;
650 /* Don't handle this size yet */
656 static void kvm_put_one_spr(CPUState
*cs
, uint64_t id
, int spr
)
658 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
659 CPUPPCState
*env
= &cpu
->env
;
664 struct kvm_one_reg reg
= {
666 .addr
= (uintptr_t) &val
,
670 switch (id
& KVM_REG_SIZE_MASK
) {
671 case KVM_REG_SIZE_U32
:
672 val
.u32
= env
->spr
[spr
];
675 case KVM_REG_SIZE_U64
:
676 val
.u64
= env
->spr
[spr
];
680 /* Don't handle this size yet */
684 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
686 trace_kvm_failed_spr_set(spr
, strerror(errno
));
690 static int kvm_put_fp(CPUState
*cs
)
692 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
693 CPUPPCState
*env
= &cpu
->env
;
694 struct kvm_one_reg reg
;
698 if (env
->insns_flags
& PPC_FLOAT
) {
699 uint64_t fpscr
= env
->fpscr
;
700 bool vsx
= !!(env
->insns_flags2
& PPC2_VSX
);
702 reg
.id
= KVM_REG_PPC_FPSCR
;
703 reg
.addr
= (uintptr_t)&fpscr
;
704 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
706 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno
));
710 for (i
= 0; i
< 32; i
++) {
713 #ifdef HOST_WORDS_BIGENDIAN
714 vsr
[0] = float64_val(env
->fpr
[i
]);
715 vsr
[1] = env
->vsr
[i
];
717 vsr
[0] = env
->vsr
[i
];
718 vsr
[1] = float64_val(env
->fpr
[i
]);
720 reg
.addr
= (uintptr_t) &vsr
;
721 reg
.id
= vsx
? KVM_REG_PPC_VSR(i
) : KVM_REG_PPC_FPR(i
);
723 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
725 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx
? "VSR" : "FPR",
732 if (env
->insns_flags
& PPC_ALTIVEC
) {
733 reg
.id
= KVM_REG_PPC_VSCR
;
734 reg
.addr
= (uintptr_t)&env
->vscr
;
735 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
737 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno
));
741 for (i
= 0; i
< 32; i
++) {
742 reg
.id
= KVM_REG_PPC_VR(i
);
743 reg
.addr
= (uintptr_t)&env
->avr
[i
];
744 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
746 DPRINTF("Unable to set VR%d to KVM: %s\n", i
, strerror(errno
));
755 static int kvm_get_fp(CPUState
*cs
)
757 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
758 CPUPPCState
*env
= &cpu
->env
;
759 struct kvm_one_reg reg
;
763 if (env
->insns_flags
& PPC_FLOAT
) {
765 bool vsx
= !!(env
->insns_flags2
& PPC2_VSX
);
767 reg
.id
= KVM_REG_PPC_FPSCR
;
768 reg
.addr
= (uintptr_t)&fpscr
;
769 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
771 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno
));
777 for (i
= 0; i
< 32; i
++) {
780 reg
.addr
= (uintptr_t) &vsr
;
781 reg
.id
= vsx
? KVM_REG_PPC_VSR(i
) : KVM_REG_PPC_FPR(i
);
783 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
785 DPRINTF("Unable to get %s%d from KVM: %s\n",
786 vsx
? "VSR" : "FPR", i
, strerror(errno
));
789 #ifdef HOST_WORDS_BIGENDIAN
790 env
->fpr
[i
] = vsr
[0];
792 env
->vsr
[i
] = vsr
[1];
795 env
->fpr
[i
] = vsr
[1];
797 env
->vsr
[i
] = vsr
[0];
804 if (env
->insns_flags
& PPC_ALTIVEC
) {
805 reg
.id
= KVM_REG_PPC_VSCR
;
806 reg
.addr
= (uintptr_t)&env
->vscr
;
807 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
809 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno
));
813 for (i
= 0; i
< 32; i
++) {
814 reg
.id
= KVM_REG_PPC_VR(i
);
815 reg
.addr
= (uintptr_t)&env
->avr
[i
];
816 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
818 DPRINTF("Unable to get VR%d from KVM: %s\n",
828 #if defined(TARGET_PPC64)
829 static int kvm_get_vpa(CPUState
*cs
)
831 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
832 CPUPPCState
*env
= &cpu
->env
;
833 struct kvm_one_reg reg
;
836 reg
.id
= KVM_REG_PPC_VPA_ADDR
;
837 reg
.addr
= (uintptr_t)&env
->vpa_addr
;
838 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
840 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno
));
844 assert((uintptr_t)&env
->slb_shadow_size
845 == ((uintptr_t)&env
->slb_shadow_addr
+ 8));
846 reg
.id
= KVM_REG_PPC_VPA_SLB
;
847 reg
.addr
= (uintptr_t)&env
->slb_shadow_addr
;
848 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
850 DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
855 assert((uintptr_t)&env
->dtl_size
== ((uintptr_t)&env
->dtl_addr
+ 8));
856 reg
.id
= KVM_REG_PPC_VPA_DTL
;
857 reg
.addr
= (uintptr_t)&env
->dtl_addr
;
858 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
860 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
868 static int kvm_put_vpa(CPUState
*cs
)
870 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
871 CPUPPCState
*env
= &cpu
->env
;
872 struct kvm_one_reg reg
;
875 /* SLB shadow or DTL can't be registered unless a master VPA is
876 * registered. That means when restoring state, if a VPA *is*
877 * registered, we need to set that up first. If not, we need to
878 * deregister the others before deregistering the master VPA */
879 assert(env
->vpa_addr
|| !(env
->slb_shadow_addr
|| env
->dtl_addr
));
882 reg
.id
= KVM_REG_PPC_VPA_ADDR
;
883 reg
.addr
= (uintptr_t)&env
->vpa_addr
;
884 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
886 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno
));
891 assert((uintptr_t)&env
->slb_shadow_size
892 == ((uintptr_t)&env
->slb_shadow_addr
+ 8));
893 reg
.id
= KVM_REG_PPC_VPA_SLB
;
894 reg
.addr
= (uintptr_t)&env
->slb_shadow_addr
;
895 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
897 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno
));
901 assert((uintptr_t)&env
->dtl_size
== ((uintptr_t)&env
->dtl_addr
+ 8));
902 reg
.id
= KVM_REG_PPC_VPA_DTL
;
903 reg
.addr
= (uintptr_t)&env
->dtl_addr
;
904 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
906 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
911 if (!env
->vpa_addr
) {
912 reg
.id
= KVM_REG_PPC_VPA_ADDR
;
913 reg
.addr
= (uintptr_t)&env
->vpa_addr
;
914 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
916 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno
));
923 #endif /* TARGET_PPC64 */
925 int kvmppc_put_books_sregs(PowerPCCPU
*cpu
)
927 CPUPPCState
*env
= &cpu
->env
;
928 struct kvm_sregs sregs
;
931 sregs
.pvr
= env
->spr
[SPR_PVR
];
933 sregs
.u
.s
.sdr1
= env
->spr
[SPR_SDR1
];
937 for (i
= 0; i
< ARRAY_SIZE(env
->slb
); i
++) {
938 sregs
.u
.s
.ppc64
.slb
[i
].slbe
= env
->slb
[i
].esid
;
939 if (env
->slb
[i
].esid
& SLB_ESID_V
) {
940 sregs
.u
.s
.ppc64
.slb
[i
].slbe
|= i
;
942 sregs
.u
.s
.ppc64
.slb
[i
].slbv
= env
->slb
[i
].vsid
;
947 for (i
= 0; i
< 16; i
++) {
948 sregs
.u
.s
.ppc32
.sr
[i
] = env
->sr
[i
];
952 for (i
= 0; i
< 8; i
++) {
953 /* Beware. We have to swap upper and lower bits here */
954 sregs
.u
.s
.ppc32
.dbat
[i
] = ((uint64_t)env
->DBAT
[0][i
] << 32)
956 sregs
.u
.s
.ppc32
.ibat
[i
] = ((uint64_t)env
->IBAT
[0][i
] << 32)
960 return kvm_vcpu_ioctl(CPU(cpu
), KVM_SET_SREGS
, &sregs
);
963 int kvm_arch_put_registers(CPUState
*cs
, int level
)
965 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
966 CPUPPCState
*env
= &cpu
->env
;
967 struct kvm_regs regs
;
971 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_REGS
, ®s
);
978 regs
.xer
= cpu_read_xer(env
);
982 regs
.srr0
= env
->spr
[SPR_SRR0
];
983 regs
.srr1
= env
->spr
[SPR_SRR1
];
985 regs
.sprg0
= env
->spr
[SPR_SPRG0
];
986 regs
.sprg1
= env
->spr
[SPR_SPRG1
];
987 regs
.sprg2
= env
->spr
[SPR_SPRG2
];
988 regs
.sprg3
= env
->spr
[SPR_SPRG3
];
989 regs
.sprg4
= env
->spr
[SPR_SPRG4
];
990 regs
.sprg5
= env
->spr
[SPR_SPRG5
];
991 regs
.sprg6
= env
->spr
[SPR_SPRG6
];
992 regs
.sprg7
= env
->spr
[SPR_SPRG7
];
994 regs
.pid
= env
->spr
[SPR_BOOKE_PID
];
996 for (i
= 0;i
< 32; i
++)
997 regs
.gpr
[i
] = env
->gpr
[i
];
1000 for (i
= 0; i
< 8; i
++) {
1001 regs
.cr
|= (env
->crf
[i
] & 15) << (4 * (7 - i
));
1004 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_REGS
, ®s
);
1010 if (env
->tlb_dirty
) {
1011 kvm_sw_tlb_put(cpu
);
1012 env
->tlb_dirty
= false;
1015 if (cap_segstate
&& (level
>= KVM_PUT_RESET_STATE
)) {
1016 ret
= kvmppc_put_books_sregs(cpu
);
1022 if (cap_hior
&& (level
>= KVM_PUT_RESET_STATE
)) {
1023 kvm_put_one_spr(cs
, KVM_REG_PPC_HIOR
, SPR_HIOR
);
1029 /* We deliberately ignore errors here, for kernels which have
1030 * the ONE_REG calls, but don't support the specific
1031 * registers, there's a reasonable chance things will still
1032 * work, at least until we try to migrate. */
1033 for (i
= 0; i
< 1024; i
++) {
1034 uint64_t id
= env
->spr_cb
[i
].one_reg_id
;
1037 kvm_put_one_spr(cs
, id
, i
);
1043 for (i
= 0; i
< ARRAY_SIZE(env
->tm_gpr
); i
++) {
1044 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_GPR(i
), &env
->tm_gpr
[i
]);
1046 for (i
= 0; i
< ARRAY_SIZE(env
->tm_vsr
); i
++) {
1047 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_VSR(i
), &env
->tm_vsr
[i
]);
1049 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_CR
, &env
->tm_cr
);
1050 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_LR
, &env
->tm_lr
);
1051 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_CTR
, &env
->tm_ctr
);
1052 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_FPSCR
, &env
->tm_fpscr
);
1053 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_AMR
, &env
->tm_amr
);
1054 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_PPR
, &env
->tm_ppr
);
1055 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_VRSAVE
, &env
->tm_vrsave
);
1056 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_VSCR
, &env
->tm_vscr
);
1057 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_DSCR
, &env
->tm_dscr
);
1058 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_TAR
, &env
->tm_tar
);
1062 if (kvm_put_vpa(cs
) < 0) {
1063 DPRINTF("Warning: Unable to set VPA information to KVM\n");
1067 kvm_set_one_reg(cs
, KVM_REG_PPC_TB_OFFSET
, &env
->tb_env
->tb_offset
);
1068 #endif /* TARGET_PPC64 */
1074 static void kvm_sync_excp(CPUPPCState
*env
, int vector
, int ivor
)
1076 env
->excp_vectors
[vector
] = env
->spr
[ivor
] + env
->spr
[SPR_BOOKE_IVPR
];
1079 static int kvmppc_get_booke_sregs(PowerPCCPU
*cpu
)
1081 CPUPPCState
*env
= &cpu
->env
;
1082 struct kvm_sregs sregs
;
1085 ret
= kvm_vcpu_ioctl(CPU(cpu
), KVM_GET_SREGS
, &sregs
);
1090 if (sregs
.u
.e
.features
& KVM_SREGS_E_BASE
) {
1091 env
->spr
[SPR_BOOKE_CSRR0
] = sregs
.u
.e
.csrr0
;
1092 env
->spr
[SPR_BOOKE_CSRR1
] = sregs
.u
.e
.csrr1
;
1093 env
->spr
[SPR_BOOKE_ESR
] = sregs
.u
.e
.esr
;
1094 env
->spr
[SPR_BOOKE_DEAR
] = sregs
.u
.e
.dear
;
1095 env
->spr
[SPR_BOOKE_MCSR
] = sregs
.u
.e
.mcsr
;
1096 env
->spr
[SPR_BOOKE_TSR
] = sregs
.u
.e
.tsr
;
1097 env
->spr
[SPR_BOOKE_TCR
] = sregs
.u
.e
.tcr
;
1098 env
->spr
[SPR_DECR
] = sregs
.u
.e
.dec
;
1099 env
->spr
[SPR_TBL
] = sregs
.u
.e
.tb
& 0xffffffff;
1100 env
->spr
[SPR_TBU
] = sregs
.u
.e
.tb
>> 32;
1101 env
->spr
[SPR_VRSAVE
] = sregs
.u
.e
.vrsave
;
1104 if (sregs
.u
.e
.features
& KVM_SREGS_E_ARCH206
) {
1105 env
->spr
[SPR_BOOKE_PIR
] = sregs
.u
.e
.pir
;
1106 env
->spr
[SPR_BOOKE_MCSRR0
] = sregs
.u
.e
.mcsrr0
;
1107 env
->spr
[SPR_BOOKE_MCSRR1
] = sregs
.u
.e
.mcsrr1
;
1108 env
->spr
[SPR_BOOKE_DECAR
] = sregs
.u
.e
.decar
;
1109 env
->spr
[SPR_BOOKE_IVPR
] = sregs
.u
.e
.ivpr
;
1112 if (sregs
.u
.e
.features
& KVM_SREGS_E_64
) {
1113 env
->spr
[SPR_BOOKE_EPCR
] = sregs
.u
.e
.epcr
;
1116 if (sregs
.u
.e
.features
& KVM_SREGS_E_SPRG8
) {
1117 env
->spr
[SPR_BOOKE_SPRG8
] = sregs
.u
.e
.sprg8
;
1120 if (sregs
.u
.e
.features
& KVM_SREGS_E_IVOR
) {
1121 env
->spr
[SPR_BOOKE_IVOR0
] = sregs
.u
.e
.ivor_low
[0];
1122 kvm_sync_excp(env
, POWERPC_EXCP_CRITICAL
, SPR_BOOKE_IVOR0
);
1123 env
->spr
[SPR_BOOKE_IVOR1
] = sregs
.u
.e
.ivor_low
[1];
1124 kvm_sync_excp(env
, POWERPC_EXCP_MCHECK
, SPR_BOOKE_IVOR1
);
1125 env
->spr
[SPR_BOOKE_IVOR2
] = sregs
.u
.e
.ivor_low
[2];
1126 kvm_sync_excp(env
, POWERPC_EXCP_DSI
, SPR_BOOKE_IVOR2
);
1127 env
->spr
[SPR_BOOKE_IVOR3
] = sregs
.u
.e
.ivor_low
[3];
1128 kvm_sync_excp(env
, POWERPC_EXCP_ISI
, SPR_BOOKE_IVOR3
);
1129 env
->spr
[SPR_BOOKE_IVOR4
] = sregs
.u
.e
.ivor_low
[4];
1130 kvm_sync_excp(env
, POWERPC_EXCP_EXTERNAL
, SPR_BOOKE_IVOR4
);
1131 env
->spr
[SPR_BOOKE_IVOR5
] = sregs
.u
.e
.ivor_low
[5];
1132 kvm_sync_excp(env
, POWERPC_EXCP_ALIGN
, SPR_BOOKE_IVOR5
);
1133 env
->spr
[SPR_BOOKE_IVOR6
] = sregs
.u
.e
.ivor_low
[6];
1134 kvm_sync_excp(env
, POWERPC_EXCP_PROGRAM
, SPR_BOOKE_IVOR6
);
1135 env
->spr
[SPR_BOOKE_IVOR7
] = sregs
.u
.e
.ivor_low
[7];
1136 kvm_sync_excp(env
, POWERPC_EXCP_FPU
, SPR_BOOKE_IVOR7
);
1137 env
->spr
[SPR_BOOKE_IVOR8
] = sregs
.u
.e
.ivor_low
[8];
1138 kvm_sync_excp(env
, POWERPC_EXCP_SYSCALL
, SPR_BOOKE_IVOR8
);
1139 env
->spr
[SPR_BOOKE_IVOR9
] = sregs
.u
.e
.ivor_low
[9];
1140 kvm_sync_excp(env
, POWERPC_EXCP_APU
, SPR_BOOKE_IVOR9
);
1141 env
->spr
[SPR_BOOKE_IVOR10
] = sregs
.u
.e
.ivor_low
[10];
1142 kvm_sync_excp(env
, POWERPC_EXCP_DECR
, SPR_BOOKE_IVOR10
);
1143 env
->spr
[SPR_BOOKE_IVOR11
] = sregs
.u
.e
.ivor_low
[11];
1144 kvm_sync_excp(env
, POWERPC_EXCP_FIT
, SPR_BOOKE_IVOR11
);
1145 env
->spr
[SPR_BOOKE_IVOR12
] = sregs
.u
.e
.ivor_low
[12];
1146 kvm_sync_excp(env
, POWERPC_EXCP_WDT
, SPR_BOOKE_IVOR12
);
1147 env
->spr
[SPR_BOOKE_IVOR13
] = sregs
.u
.e
.ivor_low
[13];
1148 kvm_sync_excp(env
, POWERPC_EXCP_DTLB
, SPR_BOOKE_IVOR13
);
1149 env
->spr
[SPR_BOOKE_IVOR14
] = sregs
.u
.e
.ivor_low
[14];
1150 kvm_sync_excp(env
, POWERPC_EXCP_ITLB
, SPR_BOOKE_IVOR14
);
1151 env
->spr
[SPR_BOOKE_IVOR15
] = sregs
.u
.e
.ivor_low
[15];
1152 kvm_sync_excp(env
, POWERPC_EXCP_DEBUG
, SPR_BOOKE_IVOR15
);
1154 if (sregs
.u
.e
.features
& KVM_SREGS_E_SPE
) {
1155 env
->spr
[SPR_BOOKE_IVOR32
] = sregs
.u
.e
.ivor_high
[0];
1156 kvm_sync_excp(env
, POWERPC_EXCP_SPEU
, SPR_BOOKE_IVOR32
);
1157 env
->spr
[SPR_BOOKE_IVOR33
] = sregs
.u
.e
.ivor_high
[1];
1158 kvm_sync_excp(env
, POWERPC_EXCP_EFPDI
, SPR_BOOKE_IVOR33
);
1159 env
->spr
[SPR_BOOKE_IVOR34
] = sregs
.u
.e
.ivor_high
[2];
1160 kvm_sync_excp(env
, POWERPC_EXCP_EFPRI
, SPR_BOOKE_IVOR34
);
1163 if (sregs
.u
.e
.features
& KVM_SREGS_E_PM
) {
1164 env
->spr
[SPR_BOOKE_IVOR35
] = sregs
.u
.e
.ivor_high
[3];
1165 kvm_sync_excp(env
, POWERPC_EXCP_EPERFM
, SPR_BOOKE_IVOR35
);
1168 if (sregs
.u
.e
.features
& KVM_SREGS_E_PC
) {
1169 env
->spr
[SPR_BOOKE_IVOR36
] = sregs
.u
.e
.ivor_high
[4];
1170 kvm_sync_excp(env
, POWERPC_EXCP_DOORI
, SPR_BOOKE_IVOR36
);
1171 env
->spr
[SPR_BOOKE_IVOR37
] = sregs
.u
.e
.ivor_high
[5];
1172 kvm_sync_excp(env
, POWERPC_EXCP_DOORCI
, SPR_BOOKE_IVOR37
);
1176 if (sregs
.u
.e
.features
& KVM_SREGS_E_ARCH206_MMU
) {
1177 env
->spr
[SPR_BOOKE_MAS0
] = sregs
.u
.e
.mas0
;
1178 env
->spr
[SPR_BOOKE_MAS1
] = sregs
.u
.e
.mas1
;
1179 env
->spr
[SPR_BOOKE_MAS2
] = sregs
.u
.e
.mas2
;
1180 env
->spr
[SPR_BOOKE_MAS3
] = sregs
.u
.e
.mas7_3
& 0xffffffff;
1181 env
->spr
[SPR_BOOKE_MAS4
] = sregs
.u
.e
.mas4
;
1182 env
->spr
[SPR_BOOKE_MAS6
] = sregs
.u
.e
.mas6
;
1183 env
->spr
[SPR_BOOKE_MAS7
] = sregs
.u
.e
.mas7_3
>> 32;
1184 env
->spr
[SPR_MMUCFG
] = sregs
.u
.e
.mmucfg
;
1185 env
->spr
[SPR_BOOKE_TLB0CFG
] = sregs
.u
.e
.tlbcfg
[0];
1186 env
->spr
[SPR_BOOKE_TLB1CFG
] = sregs
.u
.e
.tlbcfg
[1];
1189 if (sregs
.u
.e
.features
& KVM_SREGS_EXP
) {
1190 env
->spr
[SPR_BOOKE_EPR
] = sregs
.u
.e
.epr
;
1193 if (sregs
.u
.e
.features
& KVM_SREGS_E_PD
) {
1194 env
->spr
[SPR_BOOKE_EPLC
] = sregs
.u
.e
.eplc
;
1195 env
->spr
[SPR_BOOKE_EPSC
] = sregs
.u
.e
.epsc
;
1198 if (sregs
.u
.e
.impl_id
== KVM_SREGS_E_IMPL_FSL
) {
1199 env
->spr
[SPR_E500_SVR
] = sregs
.u
.e
.impl
.fsl
.svr
;
1200 env
->spr
[SPR_Exxx_MCAR
] = sregs
.u
.e
.impl
.fsl
.mcar
;
1201 env
->spr
[SPR_HID0
] = sregs
.u
.e
.impl
.fsl
.hid0
;
1203 if (sregs
.u
.e
.impl
.fsl
.features
& KVM_SREGS_E_FSL_PIDn
) {
1204 env
->spr
[SPR_BOOKE_PID1
] = sregs
.u
.e
.impl
.fsl
.pid1
;
1205 env
->spr
[SPR_BOOKE_PID2
] = sregs
.u
.e
.impl
.fsl
.pid2
;
1212 static int kvmppc_get_books_sregs(PowerPCCPU
*cpu
)
1214 CPUPPCState
*env
= &cpu
->env
;
1215 struct kvm_sregs sregs
;
1219 ret
= kvm_vcpu_ioctl(CPU(cpu
), KVM_GET_SREGS
, &sregs
);
1224 if (!env
->external_htab
) {
1225 ppc_store_sdr1(env
, sregs
.u
.s
.sdr1
);
1231 * The packed SLB array we get from KVM_GET_SREGS only contains
1232 * information about valid entries. So we flush our internal copy
1233 * to get rid of stale ones, then put all valid SLB entries back
1236 memset(env
->slb
, 0, sizeof(env
->slb
));
1237 for (i
= 0; i
< ARRAY_SIZE(env
->slb
); i
++) {
1238 target_ulong rb
= sregs
.u
.s
.ppc64
.slb
[i
].slbe
;
1239 target_ulong rs
= sregs
.u
.s
.ppc64
.slb
[i
].slbv
;
1241 * Only restore valid entries
1243 if (rb
& SLB_ESID_V
) {
1244 ppc_store_slb(cpu
, rb
& 0xfff, rb
& ~0xfffULL
, rs
);
1250 for (i
= 0; i
< 16; i
++) {
1251 env
->sr
[i
] = sregs
.u
.s
.ppc32
.sr
[i
];
1255 for (i
= 0; i
< 8; i
++) {
1256 env
->DBAT
[0][i
] = sregs
.u
.s
.ppc32
.dbat
[i
] & 0xffffffff;
1257 env
->DBAT
[1][i
] = sregs
.u
.s
.ppc32
.dbat
[i
] >> 32;
1258 env
->IBAT
[0][i
] = sregs
.u
.s
.ppc32
.ibat
[i
] & 0xffffffff;
1259 env
->IBAT
[1][i
] = sregs
.u
.s
.ppc32
.ibat
[i
] >> 32;
1265 int kvm_arch_get_registers(CPUState
*cs
)
1267 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
1268 CPUPPCState
*env
= &cpu
->env
;
1269 struct kvm_regs regs
;
1273 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_REGS
, ®s
);
1278 for (i
= 7; i
>= 0; i
--) {
1279 env
->crf
[i
] = cr
& 15;
1283 env
->ctr
= regs
.ctr
;
1285 cpu_write_xer(env
, regs
.xer
);
1286 env
->msr
= regs
.msr
;
1289 env
->spr
[SPR_SRR0
] = regs
.srr0
;
1290 env
->spr
[SPR_SRR1
] = regs
.srr1
;
1292 env
->spr
[SPR_SPRG0
] = regs
.sprg0
;
1293 env
->spr
[SPR_SPRG1
] = regs
.sprg1
;
1294 env
->spr
[SPR_SPRG2
] = regs
.sprg2
;
1295 env
->spr
[SPR_SPRG3
] = regs
.sprg3
;
1296 env
->spr
[SPR_SPRG4
] = regs
.sprg4
;
1297 env
->spr
[SPR_SPRG5
] = regs
.sprg5
;
1298 env
->spr
[SPR_SPRG6
] = regs
.sprg6
;
1299 env
->spr
[SPR_SPRG7
] = regs
.sprg7
;
1301 env
->spr
[SPR_BOOKE_PID
] = regs
.pid
;
1303 for (i
= 0;i
< 32; i
++)
1304 env
->gpr
[i
] = regs
.gpr
[i
];
1308 if (cap_booke_sregs
) {
1309 ret
= kvmppc_get_booke_sregs(cpu
);
1316 ret
= kvmppc_get_books_sregs(cpu
);
1323 kvm_get_one_spr(cs
, KVM_REG_PPC_HIOR
, SPR_HIOR
);
1329 /* We deliberately ignore errors here, for kernels which have
1330 * the ONE_REG calls, but don't support the specific
1331 * registers, there's a reasonable chance things will still
1332 * work, at least until we try to migrate. */
1333 for (i
= 0; i
< 1024; i
++) {
1334 uint64_t id
= env
->spr_cb
[i
].one_reg_id
;
1337 kvm_get_one_spr(cs
, id
, i
);
1343 for (i
= 0; i
< ARRAY_SIZE(env
->tm_gpr
); i
++) {
1344 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_GPR(i
), &env
->tm_gpr
[i
]);
1346 for (i
= 0; i
< ARRAY_SIZE(env
->tm_vsr
); i
++) {
1347 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_VSR(i
), &env
->tm_vsr
[i
]);
1349 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_CR
, &env
->tm_cr
);
1350 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_LR
, &env
->tm_lr
);
1351 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_CTR
, &env
->tm_ctr
);
1352 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_FPSCR
, &env
->tm_fpscr
);
1353 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_AMR
, &env
->tm_amr
);
1354 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_PPR
, &env
->tm_ppr
);
1355 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_VRSAVE
, &env
->tm_vrsave
);
1356 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_VSCR
, &env
->tm_vscr
);
1357 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_DSCR
, &env
->tm_dscr
);
1358 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_TAR
, &env
->tm_tar
);
1362 if (kvm_get_vpa(cs
) < 0) {
1363 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1367 kvm_get_one_reg(cs
, KVM_REG_PPC_TB_OFFSET
, &env
->tb_env
->tb_offset
);
1374 int kvmppc_set_interrupt(PowerPCCPU
*cpu
, int irq
, int level
)
1376 unsigned virq
= level
? KVM_INTERRUPT_SET_LEVEL
: KVM_INTERRUPT_UNSET
;
1378 if (irq
!= PPC_INTERRUPT_EXT
) {
1382 if (!kvm_enabled() || !cap_interrupt_unset
|| !cap_interrupt_level
) {
1386 kvm_vcpu_ioctl(CPU(cpu
), KVM_INTERRUPT
, &virq
);
1391 #if defined(TARGET_PPCEMB)
1392 #define PPC_INPUT_INT PPC40x_INPUT_INT
1393 #elif defined(TARGET_PPC64)
1394 #define PPC_INPUT_INT PPC970_INPUT_INT
1396 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1399 void kvm_arch_pre_run(CPUState
*cs
, struct kvm_run
*run
)
1401 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
1402 CPUPPCState
*env
= &cpu
->env
;
1406 qemu_mutex_lock_iothread();
1408 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1409 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1410 if (!cap_interrupt_level
&&
1411 run
->ready_for_interrupt_injection
&&
1412 (cs
->interrupt_request
& CPU_INTERRUPT_HARD
) &&
1413 (env
->irq_input_state
& (1<<PPC_INPUT_INT
)))
1415 /* For now KVM disregards the 'irq' argument. However, in the
1416 * future KVM could cache it in-kernel to avoid a heavyweight exit
1417 * when reading the UIC.
1419 irq
= KVM_INTERRUPT_SET
;
1421 DPRINTF("injected interrupt %d\n", irq
);
1422 r
= kvm_vcpu_ioctl(cs
, KVM_INTERRUPT
, &irq
);
1424 printf("cpu %d fail inject %x\n", cs
->cpu_index
, irq
);
1427 /* Always wake up soon in case the interrupt was level based */
1428 timer_mod(idle_timer
, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) +
1429 (NANOSECONDS_PER_SECOND
/ 50));
1432 /* We don't know if there are more interrupts pending after this. However,
1433 * the guest will return to userspace in the course of handling this one
1434 * anyways, so we will get a chance to deliver the rest. */
1436 qemu_mutex_unlock_iothread();
1439 MemTxAttrs
kvm_arch_post_run(CPUState
*cs
, struct kvm_run
*run
)
1441 return MEMTXATTRS_UNSPECIFIED
;
1444 int kvm_arch_process_async_events(CPUState
*cs
)
1449 static int kvmppc_handle_halt(PowerPCCPU
*cpu
)
1451 CPUState
*cs
= CPU(cpu
);
1452 CPUPPCState
*env
= &cpu
->env
;
1454 if (!(cs
->interrupt_request
& CPU_INTERRUPT_HARD
) && (msr_ee
)) {
1456 cs
->exception_index
= EXCP_HLT
;
1462 /* map dcr access to existing qemu dcr emulation */
1463 static int kvmppc_handle_dcr_read(CPUPPCState
*env
, uint32_t dcrn
, uint32_t *data
)
1465 if (ppc_dcr_read(env
->dcr_env
, dcrn
, data
) < 0)
1466 fprintf(stderr
, "Read to unhandled DCR (0x%x)\n", dcrn
);
1471 static int kvmppc_handle_dcr_write(CPUPPCState
*env
, uint32_t dcrn
, uint32_t data
)
1473 if (ppc_dcr_write(env
->dcr_env
, dcrn
, data
) < 0)
1474 fprintf(stderr
, "Write to unhandled DCR (0x%x)\n", dcrn
);
1479 int kvm_arch_insert_sw_breakpoint(CPUState
*cs
, struct kvm_sw_breakpoint
*bp
)
1481 /* Mixed endian case is not handled */
1482 uint32_t sc
= debug_inst_opcode
;
1484 if (cpu_memory_rw_debug(cs
, bp
->pc
, (uint8_t *)&bp
->saved_insn
,
1486 cpu_memory_rw_debug(cs
, bp
->pc
, (uint8_t *)&sc
, sizeof(sc
), 1)) {
1493 int kvm_arch_remove_sw_breakpoint(CPUState
*cs
, struct kvm_sw_breakpoint
*bp
)
1497 if (cpu_memory_rw_debug(cs
, bp
->pc
, (uint8_t *)&sc
, sizeof(sc
), 0) ||
1498 sc
!= debug_inst_opcode
||
1499 cpu_memory_rw_debug(cs
, bp
->pc
, (uint8_t *)&bp
->saved_insn
,
1507 static int find_hw_breakpoint(target_ulong addr
, int type
)
1511 assert((nb_hw_breakpoint
+ nb_hw_watchpoint
)
1512 <= ARRAY_SIZE(hw_debug_points
));
1514 for (n
= 0; n
< nb_hw_breakpoint
+ nb_hw_watchpoint
; n
++) {
1515 if (hw_debug_points
[n
].addr
== addr
&&
1516 hw_debug_points
[n
].type
== type
) {
1524 static int find_hw_watchpoint(target_ulong addr
, int *flag
)
1528 n
= find_hw_breakpoint(addr
, GDB_WATCHPOINT_ACCESS
);
1530 *flag
= BP_MEM_ACCESS
;
1534 n
= find_hw_breakpoint(addr
, GDB_WATCHPOINT_WRITE
);
1536 *flag
= BP_MEM_WRITE
;
1540 n
= find_hw_breakpoint(addr
, GDB_WATCHPOINT_READ
);
1542 *flag
= BP_MEM_READ
;
1549 int kvm_arch_insert_hw_breakpoint(target_ulong addr
,
1550 target_ulong len
, int type
)
1552 if ((nb_hw_breakpoint
+ nb_hw_watchpoint
) >= ARRAY_SIZE(hw_debug_points
)) {
1556 hw_debug_points
[nb_hw_breakpoint
+ nb_hw_watchpoint
].addr
= addr
;
1557 hw_debug_points
[nb_hw_breakpoint
+ nb_hw_watchpoint
].type
= type
;
1560 case GDB_BREAKPOINT_HW
:
1561 if (nb_hw_breakpoint
>= max_hw_breakpoint
) {
1565 if (find_hw_breakpoint(addr
, type
) >= 0) {
1572 case GDB_WATCHPOINT_WRITE
:
1573 case GDB_WATCHPOINT_READ
:
1574 case GDB_WATCHPOINT_ACCESS
:
1575 if (nb_hw_watchpoint
>= max_hw_watchpoint
) {
1579 if (find_hw_breakpoint(addr
, type
) >= 0) {
1593 int kvm_arch_remove_hw_breakpoint(target_ulong addr
,
1594 target_ulong len
, int type
)
1598 n
= find_hw_breakpoint(addr
, type
);
1604 case GDB_BREAKPOINT_HW
:
1608 case GDB_WATCHPOINT_WRITE
:
1609 case GDB_WATCHPOINT_READ
:
1610 case GDB_WATCHPOINT_ACCESS
:
1617 hw_debug_points
[n
] = hw_debug_points
[nb_hw_breakpoint
+ nb_hw_watchpoint
];
1622 void kvm_arch_remove_all_hw_breakpoints(void)
1624 nb_hw_breakpoint
= nb_hw_watchpoint
= 0;
1627 void kvm_arch_update_guest_debug(CPUState
*cs
, struct kvm_guest_debug
*dbg
)
1631 /* Software Breakpoint updates */
1632 if (kvm_sw_breakpoints_active(cs
)) {
1633 dbg
->control
|= KVM_GUESTDBG_ENABLE
| KVM_GUESTDBG_USE_SW_BP
;
1636 assert((nb_hw_breakpoint
+ nb_hw_watchpoint
)
1637 <= ARRAY_SIZE(hw_debug_points
));
1638 assert((nb_hw_breakpoint
+ nb_hw_watchpoint
) <= ARRAY_SIZE(dbg
->arch
.bp
));
1640 if (nb_hw_breakpoint
+ nb_hw_watchpoint
> 0) {
1641 dbg
->control
|= KVM_GUESTDBG_ENABLE
| KVM_GUESTDBG_USE_HW_BP
;
1642 memset(dbg
->arch
.bp
, 0, sizeof(dbg
->arch
.bp
));
1643 for (n
= 0; n
< nb_hw_breakpoint
+ nb_hw_watchpoint
; n
++) {
1644 switch (hw_debug_points
[n
].type
) {
1645 case GDB_BREAKPOINT_HW
:
1646 dbg
->arch
.bp
[n
].type
= KVMPPC_DEBUG_BREAKPOINT
;
1648 case GDB_WATCHPOINT_WRITE
:
1649 dbg
->arch
.bp
[n
].type
= KVMPPC_DEBUG_WATCH_WRITE
;
1651 case GDB_WATCHPOINT_READ
:
1652 dbg
->arch
.bp
[n
].type
= KVMPPC_DEBUG_WATCH_READ
;
1654 case GDB_WATCHPOINT_ACCESS
:
1655 dbg
->arch
.bp
[n
].type
= KVMPPC_DEBUG_WATCH_WRITE
|
1656 KVMPPC_DEBUG_WATCH_READ
;
1659 cpu_abort(cs
, "Unsupported breakpoint type\n");
1661 dbg
->arch
.bp
[n
].addr
= hw_debug_points
[n
].addr
;
1666 static int kvm_handle_debug(PowerPCCPU
*cpu
, struct kvm_run
*run
)
1668 CPUState
*cs
= CPU(cpu
);
1669 CPUPPCState
*env
= &cpu
->env
;
1670 struct kvm_debug_exit_arch
*arch_info
= &run
->debug
.arch
;
1675 if (cs
->singlestep_enabled
) {
1677 } else if (arch_info
->status
) {
1678 if (nb_hw_breakpoint
+ nb_hw_watchpoint
> 0) {
1679 if (arch_info
->status
& KVMPPC_DEBUG_BREAKPOINT
) {
1680 n
= find_hw_breakpoint(arch_info
->address
, GDB_BREAKPOINT_HW
);
1684 } else if (arch_info
->status
& (KVMPPC_DEBUG_WATCH_READ
|
1685 KVMPPC_DEBUG_WATCH_WRITE
)) {
1686 n
= find_hw_watchpoint(arch_info
->address
, &flag
);
1689 cs
->watchpoint_hit
= &hw_watchpoint
;
1690 hw_watchpoint
.vaddr
= hw_debug_points
[n
].addr
;
1691 hw_watchpoint
.flags
= flag
;
1695 } else if (kvm_find_sw_breakpoint(cs
, arch_info
->address
)) {
1698 /* QEMU is not able to handle debug exception, so inject
1699 * program exception to guest;
1700 * Yes program exception NOT debug exception !!
1701 * When QEMU is using debug resources then debug exception must
1702 * be always set. To achieve this we set MSR_DE and also set
1703 * MSRP_DEP so guest cannot change MSR_DE.
1704 * When emulating debug resource for guest we want guest
1705 * to control MSR_DE (enable/disable debug interrupt on need).
1706 * Supporting both configurations are NOT possible.
1707 * So the result is that we cannot share debug resources
1708 * between QEMU and Guest on BOOKE architecture.
1709 * In the current design QEMU gets the priority over guest,
1710 * this means that if QEMU is using debug resources then guest
1712 * For software breakpoint QEMU uses a privileged instruction;
1713 * So there cannot be any reason that we are here for guest
1714 * set debug exception, only possibility is guest executed a
1715 * privileged / illegal instruction and that's why we are
1716 * injecting a program interrupt.
1719 cpu_synchronize_state(cs
);
1720 /* env->nip is PC, so increment this by 4 to use
1721 * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1724 cs
->exception_index
= POWERPC_EXCP_PROGRAM
;
1725 env
->error_code
= POWERPC_EXCP_INVAL
;
1726 ppc_cpu_do_interrupt(cs
);
1732 int kvm_arch_handle_exit(CPUState
*cs
, struct kvm_run
*run
)
1734 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
1735 CPUPPCState
*env
= &cpu
->env
;
1738 qemu_mutex_lock_iothread();
1740 switch (run
->exit_reason
) {
1742 if (run
->dcr
.is_write
) {
1743 DPRINTF("handle dcr write\n");
1744 ret
= kvmppc_handle_dcr_write(env
, run
->dcr
.dcrn
, run
->dcr
.data
);
1746 DPRINTF("handle dcr read\n");
1747 ret
= kvmppc_handle_dcr_read(env
, run
->dcr
.dcrn
, &run
->dcr
.data
);
1751 DPRINTF("handle halt\n");
1752 ret
= kvmppc_handle_halt(cpu
);
1754 #if defined(TARGET_PPC64)
1755 case KVM_EXIT_PAPR_HCALL
:
1756 DPRINTF("handle PAPR hypercall\n");
1757 run
->papr_hcall
.ret
= spapr_hypercall(cpu
,
1759 run
->papr_hcall
.args
);
1764 DPRINTF("handle epr\n");
1765 run
->epr
.epr
= ldl_phys(cs
->as
, env
->mpic_iack
);
1768 case KVM_EXIT_WATCHDOG
:
1769 DPRINTF("handle watchdog expiry\n");
1770 watchdog_perform_action();
1774 case KVM_EXIT_DEBUG
:
1775 DPRINTF("handle debug exception\n");
1776 if (kvm_handle_debug(cpu
, run
)) {
1780 /* re-enter, this exception was guest-internal */
1785 fprintf(stderr
, "KVM: unknown exit reason %d\n", run
->exit_reason
);
1790 qemu_mutex_unlock_iothread();
1794 int kvmppc_or_tsr_bits(PowerPCCPU
*cpu
, uint32_t tsr_bits
)
1796 CPUState
*cs
= CPU(cpu
);
1797 uint32_t bits
= tsr_bits
;
1798 struct kvm_one_reg reg
= {
1799 .id
= KVM_REG_PPC_OR_TSR
,
1800 .addr
= (uintptr_t) &bits
,
1803 return kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
1806 int kvmppc_clear_tsr_bits(PowerPCCPU
*cpu
, uint32_t tsr_bits
)
1809 CPUState
*cs
= CPU(cpu
);
1810 uint32_t bits
= tsr_bits
;
1811 struct kvm_one_reg reg
= {
1812 .id
= KVM_REG_PPC_CLEAR_TSR
,
1813 .addr
= (uintptr_t) &bits
,
1816 return kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
1819 int kvmppc_set_tcr(PowerPCCPU
*cpu
)
1821 CPUState
*cs
= CPU(cpu
);
1822 CPUPPCState
*env
= &cpu
->env
;
1823 uint32_t tcr
= env
->spr
[SPR_BOOKE_TCR
];
1825 struct kvm_one_reg reg
= {
1826 .id
= KVM_REG_PPC_TCR
,
1827 .addr
= (uintptr_t) &tcr
,
1830 return kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
1833 int kvmppc_booke_watchdog_enable(PowerPCCPU
*cpu
)
1835 CPUState
*cs
= CPU(cpu
);
1838 if (!kvm_enabled()) {
1842 if (!cap_ppc_watchdog
) {
1843 printf("warning: KVM does not support watchdog");
1847 ret
= kvm_vcpu_enable_cap(cs
, KVM_CAP_PPC_BOOKE_WATCHDOG
, 0);
1849 fprintf(stderr
, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1850 __func__
, strerror(-ret
));
1857 static int read_cpuinfo(const char *field
, char *value
, int len
)
1861 int field_len
= strlen(field
);
1864 f
= fopen("/proc/cpuinfo", "r");
1870 if (!fgets(line
, sizeof(line
), f
)) {
1873 if (!strncmp(line
, field
, field_len
)) {
1874 pstrcpy(value
, len
, line
);
1885 uint32_t kvmppc_get_tbfreq(void)
1889 uint32_t retval
= NANOSECONDS_PER_SECOND
;
1891 if (read_cpuinfo("timebase", line
, sizeof(line
))) {
1895 if (!(ns
= strchr(line
, ':'))) {
1904 bool kvmppc_get_host_serial(char **value
)
1906 return g_file_get_contents("/proc/device-tree/system-id", value
, NULL
,
1910 bool kvmppc_get_host_model(char **value
)
1912 return g_file_get_contents("/proc/device-tree/model", value
, NULL
, NULL
);
1915 /* Try to find a device tree node for a CPU with clock-frequency property */
1916 static int kvmppc_find_cpu_dt(char *buf
, int buf_len
)
1918 struct dirent
*dirp
;
1921 if ((dp
= opendir(PROC_DEVTREE_CPU
)) == NULL
) {
1922 printf("Can't open directory " PROC_DEVTREE_CPU
"\n");
1927 while ((dirp
= readdir(dp
)) != NULL
) {
1929 snprintf(buf
, buf_len
, "%s%s/clock-frequency", PROC_DEVTREE_CPU
,
1931 f
= fopen(buf
, "r");
1933 snprintf(buf
, buf_len
, "%s%s", PROC_DEVTREE_CPU
, dirp
->d_name
);
1940 if (buf
[0] == '\0') {
1941 printf("Unknown host!\n");
1948 static uint64_t kvmppc_read_int_dt(const char *filename
)
1957 f
= fopen(filename
, "rb");
1962 len
= fread(&u
, 1, sizeof(u
), f
);
1966 /* property is a 32-bit quantity */
1967 return be32_to_cpu(u
.v32
);
1969 return be64_to_cpu(u
.v64
);
1975 /* Read a CPU node property from the host device tree that's a single
1976 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1977 * (can't find or open the property, or doesn't understand the
1979 static uint64_t kvmppc_read_int_cpu_dt(const char *propname
)
1981 char buf
[PATH_MAX
], *tmp
;
1984 if (kvmppc_find_cpu_dt(buf
, sizeof(buf
))) {
1988 tmp
= g_strdup_printf("%s/%s", buf
, propname
);
1989 val
= kvmppc_read_int_dt(tmp
);
1995 uint64_t kvmppc_get_clockfreq(void)
1997 return kvmppc_read_int_cpu_dt("clock-frequency");
2000 uint32_t kvmppc_get_vmx(void)
2002 return kvmppc_read_int_cpu_dt("ibm,vmx");
2005 uint32_t kvmppc_get_dfp(void)
2007 return kvmppc_read_int_cpu_dt("ibm,dfp");
2010 static int kvmppc_get_pvinfo(CPUPPCState
*env
, struct kvm_ppc_pvinfo
*pvinfo
)
2012 PowerPCCPU
*cpu
= ppc_env_get_cpu(env
);
2013 CPUState
*cs
= CPU(cpu
);
2015 if (kvm_vm_check_extension(cs
->kvm_state
, KVM_CAP_PPC_GET_PVINFO
) &&
2016 !kvm_vm_ioctl(cs
->kvm_state
, KVM_PPC_GET_PVINFO
, pvinfo
)) {
2023 int kvmppc_get_hasidle(CPUPPCState
*env
)
2025 struct kvm_ppc_pvinfo pvinfo
;
2027 if (!kvmppc_get_pvinfo(env
, &pvinfo
) &&
2028 (pvinfo
.flags
& KVM_PPC_PVINFO_FLAGS_EV_IDLE
)) {
2035 int kvmppc_get_hypercall(CPUPPCState
*env
, uint8_t *buf
, int buf_len
)
2037 uint32_t *hc
= (uint32_t*)buf
;
2038 struct kvm_ppc_pvinfo pvinfo
;
2040 if (!kvmppc_get_pvinfo(env
, &pvinfo
)) {
2041 memcpy(buf
, pvinfo
.hcall
, buf_len
);
2046 * Fallback to always fail hypercalls regardless of endianness:
2048 * tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
2050 * b .+8 (becomes nop in wrong endian)
2051 * bswap32(li r3, -1)
2054 hc
[0] = cpu_to_be32(0x08000048);
2055 hc
[1] = cpu_to_be32(0x3860ffff);
2056 hc
[2] = cpu_to_be32(0x48000008);
2057 hc
[3] = cpu_to_be32(bswap32(0x3860ffff));
2062 static inline int kvmppc_enable_hcall(KVMState
*s
, target_ulong hcall
)
2064 return kvm_vm_enable_cap(s
, KVM_CAP_PPC_ENABLE_HCALL
, 0, hcall
, 1);
2067 void kvmppc_enable_logical_ci_hcalls(void)
2070 * FIXME: it would be nice if we could detect the cases where
2071 * we're using a device which requires the in kernel
2072 * implementation of these hcalls, but the kernel lacks them and
2073 * produce a warning.
2075 kvmppc_enable_hcall(kvm_state
, H_LOGICAL_CI_LOAD
);
2076 kvmppc_enable_hcall(kvm_state
, H_LOGICAL_CI_STORE
);
2079 void kvmppc_enable_set_mode_hcall(void)
2081 kvmppc_enable_hcall(kvm_state
, H_SET_MODE
);
2084 void kvmppc_enable_clear_ref_mod_hcalls(void)
2086 kvmppc_enable_hcall(kvm_state
, H_CLEAR_REF
);
2087 kvmppc_enable_hcall(kvm_state
, H_CLEAR_MOD
);
2090 void kvmppc_set_papr(PowerPCCPU
*cpu
)
2092 CPUState
*cs
= CPU(cpu
);
2095 ret
= kvm_vcpu_enable_cap(cs
, KVM_CAP_PPC_PAPR
, 0);
2097 error_report("This vCPU type or KVM version does not support PAPR");
2101 /* Update the capability flag so we sync the right information
2106 int kvmppc_set_compat(PowerPCCPU
*cpu
, uint32_t cpu_version
)
2108 return kvm_set_one_reg(CPU(cpu
), KVM_REG_PPC_ARCH_COMPAT
, &cpu_version
);
2111 void kvmppc_set_mpic_proxy(PowerPCCPU
*cpu
, int mpic_proxy
)
2113 CPUState
*cs
= CPU(cpu
);
2116 ret
= kvm_vcpu_enable_cap(cs
, KVM_CAP_PPC_EPR
, 0, mpic_proxy
);
2117 if (ret
&& mpic_proxy
) {
2118 error_report("This KVM version does not support EPR");
2123 int kvmppc_smt_threads(void)
2125 return cap_ppc_smt
? cap_ppc_smt
: 1;
2129 off_t
kvmppc_alloc_rma(void **rma
)
2133 struct kvm_allocate_rma ret
;
2135 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2136 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2137 * not necessary on this hardware
2138 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2140 * FIXME: We should allow the user to force contiguous RMA
2141 * allocation in the cap_ppc_rma==1 case.
2143 if (cap_ppc_rma
< 2) {
2147 fd
= kvm_vm_ioctl(kvm_state
, KVM_ALLOCATE_RMA
, &ret
);
2149 fprintf(stderr
, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2154 size
= MIN(ret
.rma_size
, 256ul << 20);
2156 *rma
= mmap(NULL
, size
, PROT_READ
|PROT_WRITE
, MAP_SHARED
, fd
, 0);
2157 if (*rma
== MAP_FAILED
) {
2158 fprintf(stderr
, "KVM: Error mapping RMA: %s\n", strerror(errno
));
2165 uint64_t kvmppc_rma_size(uint64_t current_size
, unsigned int hash_shift
)
2167 struct kvm_ppc_smmu_info info
;
2168 long rampagesize
, best_page_shift
;
2171 if (cap_ppc_rma
>= 2) {
2172 return current_size
;
2175 /* Find the largest hardware supported page size that's less than
2176 * or equal to the (logical) backing page size of guest RAM */
2177 kvm_get_smmu_info(POWERPC_CPU(first_cpu
), &info
);
2178 rampagesize
= getrampagesize();
2179 best_page_shift
= 0;
2181 for (i
= 0; i
< KVM_PPC_PAGE_SIZES_MAX_SZ
; i
++) {
2182 struct kvm_ppc_one_seg_page_size
*sps
= &info
.sps
[i
];
2184 if (!sps
->page_shift
) {
2188 if ((sps
->page_shift
> best_page_shift
)
2189 && ((1UL << sps
->page_shift
) <= rampagesize
)) {
2190 best_page_shift
= sps
->page_shift
;
2194 return MIN(current_size
,
2195 1ULL << (best_page_shift
+ hash_shift
- 7));
2199 bool kvmppc_spapr_use_multitce(void)
2201 return cap_spapr_multitce
;
2204 void *kvmppc_create_spapr_tce(uint32_t liobn
, uint32_t window_size
, int *pfd
,
2207 struct kvm_create_spapr_tce args
= {
2209 .window_size
= window_size
,
2215 /* Must set fd to -1 so we don't try to munmap when called for
2216 * destroying the table, which the upper layers -will- do
2219 if (!cap_spapr_tce
|| (need_vfio
&& !cap_spapr_vfio
)) {
2223 fd
= kvm_vm_ioctl(kvm_state
, KVM_CREATE_SPAPR_TCE
, &args
);
2225 fprintf(stderr
, "KVM: Failed to create TCE table for liobn 0x%x\n",
2230 len
= (window_size
/ SPAPR_TCE_PAGE_SIZE
) * sizeof(uint64_t);
2231 /* FIXME: round this up to page size */
2233 table
= mmap(NULL
, len
, PROT_READ
|PROT_WRITE
, MAP_SHARED
, fd
, 0);
2234 if (table
== MAP_FAILED
) {
2235 fprintf(stderr
, "KVM: Failed to map TCE table for liobn 0x%x\n",
2245 int kvmppc_remove_spapr_tce(void *table
, int fd
, uint32_t nb_table
)
2253 len
= nb_table
* sizeof(uint64_t);
2254 if ((munmap(table
, len
) < 0) ||
2256 fprintf(stderr
, "KVM: Unexpected error removing TCE table: %s",
2258 /* Leak the table */
2264 int kvmppc_reset_htab(int shift_hint
)
2266 uint32_t shift
= shift_hint
;
2268 if (!kvm_enabled()) {
2269 /* Full emulation, tell caller to allocate htab itself */
2272 if (kvm_check_extension(kvm_state
, KVM_CAP_PPC_ALLOC_HTAB
)) {
2274 ret
= kvm_vm_ioctl(kvm_state
, KVM_PPC_ALLOCATE_HTAB
, &shift
);
2275 if (ret
== -ENOTTY
) {
2276 /* At least some versions of PR KVM advertise the
2277 * capability, but don't implement the ioctl(). Oops.
2278 * Return 0 so that we allocate the htab in qemu, as is
2279 * correct for PR. */
2281 } else if (ret
< 0) {
2287 /* We have a kernel that predates the htab reset calls. For PR
2288 * KVM, we need to allocate the htab ourselves, for an HV KVM of
2289 * this era, it has allocated a 16MB fixed size hash table already. */
2290 if (kvmppc_is_pr(kvm_state
)) {
2291 /* PR - tell caller to allocate htab */
2294 /* HV - assume 16MB kernel allocated htab */
2299 static inline uint32_t mfpvr(void)
2308 static void alter_insns(uint64_t *word
, uint64_t flags
, bool on
)
2317 static void kvmppc_host_cpu_initfn(Object
*obj
)
2319 assert(kvm_enabled());
2322 static void kvmppc_host_cpu_class_init(ObjectClass
*oc
, void *data
)
2324 DeviceClass
*dc
= DEVICE_CLASS(oc
);
2325 PowerPCCPUClass
*pcc
= POWERPC_CPU_CLASS(oc
);
2326 uint32_t vmx
= kvmppc_get_vmx();
2327 uint32_t dfp
= kvmppc_get_dfp();
2328 uint32_t dcache_size
= kvmppc_read_int_cpu_dt("d-cache-size");
2329 uint32_t icache_size
= kvmppc_read_int_cpu_dt("i-cache-size");
2331 /* Now fix up the class with information we can query from the host */
2335 /* Only override when we know what the host supports */
2336 alter_insns(&pcc
->insns_flags
, PPC_ALTIVEC
, vmx
> 0);
2337 alter_insns(&pcc
->insns_flags2
, PPC2_VSX
, vmx
> 1);
2340 /* Only override when we know what the host supports */
2341 alter_insns(&pcc
->insns_flags2
, PPC2_DFP
, dfp
);
2344 if (dcache_size
!= -1) {
2345 pcc
->l1_dcache_size
= dcache_size
;
2348 if (icache_size
!= -1) {
2349 pcc
->l1_icache_size
= icache_size
;
2352 /* Reason: kvmppc_host_cpu_initfn() dies when !kvm_enabled() */
2353 dc
->cannot_destroy_with_object_finalize_yet
= true;
2356 bool kvmppc_has_cap_epr(void)
2361 bool kvmppc_has_cap_htab_fd(void)
2366 bool kvmppc_has_cap_fixup_hcalls(void)
2368 return cap_fixup_hcalls
;
2371 bool kvmppc_has_cap_htm(void)
2376 static PowerPCCPUClass
*ppc_cpu_get_family_class(PowerPCCPUClass
*pcc
)
2378 ObjectClass
*oc
= OBJECT_CLASS(pcc
);
2380 while (oc
&& !object_class_is_abstract(oc
)) {
2381 oc
= object_class_get_parent(oc
);
2385 return POWERPC_CPU_CLASS(oc
);
2388 PowerPCCPUClass
*kvm_ppc_get_host_cpu_class(void)
2390 uint32_t host_pvr
= mfpvr();
2391 PowerPCCPUClass
*pvr_pcc
;
2393 pvr_pcc
= ppc_cpu_class_by_pvr(host_pvr
);
2394 if (pvr_pcc
== NULL
) {
2395 pvr_pcc
= ppc_cpu_class_by_pvr_mask(host_pvr
);
2401 static int kvm_ppc_register_host_cpu_type(void)
2403 TypeInfo type_info
= {
2404 .name
= TYPE_HOST_POWERPC_CPU
,
2405 .instance_init
= kvmppc_host_cpu_initfn
,
2406 .class_init
= kvmppc_host_cpu_class_init
,
2408 PowerPCCPUClass
*pvr_pcc
;
2411 pvr_pcc
= kvm_ppc_get_host_cpu_class();
2412 if (pvr_pcc
== NULL
) {
2415 type_info
.parent
= object_class_get_name(OBJECT_CLASS(pvr_pcc
));
2416 type_register(&type_info
);
2418 /* Register generic family CPU class for a family */
2419 pvr_pcc
= ppc_cpu_get_family_class(pvr_pcc
);
2420 dc
= DEVICE_CLASS(pvr_pcc
);
2421 type_info
.parent
= object_class_get_name(OBJECT_CLASS(pvr_pcc
));
2422 type_info
.name
= g_strdup_printf("%s-"TYPE_POWERPC_CPU
, dc
->desc
);
2423 type_register(&type_info
);
2425 #if defined(TARGET_PPC64)
2426 type_info
.name
= g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE
, "host");
2427 type_info
.parent
= TYPE_SPAPR_CPU_CORE
,
2428 type_info
.instance_size
= sizeof(sPAPRCPUCore
);
2429 type_info
.instance_init
= NULL
;
2430 type_info
.class_init
= spapr_cpu_core_class_init
;
2431 type_info
.class_data
= (void *) "host";
2432 type_register(&type_info
);
2433 g_free((void *)type_info
.name
);
2435 /* Register generic spapr CPU family class for current host CPU type */
2436 type_info
.name
= g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE
, dc
->desc
);
2437 type_info
.class_data
= (void *) dc
->desc
;
2438 type_register(&type_info
);
2439 g_free((void *)type_info
.name
);
2445 int kvmppc_define_rtas_kernel_token(uint32_t token
, const char *function
)
2447 struct kvm_rtas_token_args args
= {
2451 if (!kvm_check_extension(kvm_state
, KVM_CAP_PPC_RTAS
)) {
2455 strncpy(args
.name
, function
, sizeof(args
.name
));
2457 return kvm_vm_ioctl(kvm_state
, KVM_PPC_RTAS_DEFINE_TOKEN
, &args
);
2460 int kvmppc_get_htab_fd(bool write
)
2462 struct kvm_get_htab_fd s
= {
2463 .flags
= write
? KVM_GET_HTAB_WRITE
: 0,
2468 fprintf(stderr
, "KVM version doesn't support saving the hash table\n");
2472 return kvm_vm_ioctl(kvm_state
, KVM_PPC_GET_HTAB_FD
, &s
);
2475 int kvmppc_save_htab(QEMUFile
*f
, int fd
, size_t bufsize
, int64_t max_ns
)
2477 int64_t starttime
= qemu_clock_get_ns(QEMU_CLOCK_REALTIME
);
2478 uint8_t buf
[bufsize
];
2482 rc
= read(fd
, buf
, bufsize
);
2484 fprintf(stderr
, "Error reading data from KVM HTAB fd: %s\n",
2488 uint8_t *buffer
= buf
;
2491 struct kvm_get_htab_header
*head
=
2492 (struct kvm_get_htab_header
*) buffer
;
2493 size_t chunksize
= sizeof(*head
) +
2494 HASH_PTE_SIZE_64
* head
->n_valid
;
2496 qemu_put_be32(f
, head
->index
);
2497 qemu_put_be16(f
, head
->n_valid
);
2498 qemu_put_be16(f
, head
->n_invalid
);
2499 qemu_put_buffer(f
, (void *)(head
+ 1),
2500 HASH_PTE_SIZE_64
* head
->n_valid
);
2502 buffer
+= chunksize
;
2508 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME
) - starttime
) < max_ns
)));
2510 return (rc
== 0) ? 1 : 0;
2513 int kvmppc_load_htab_chunk(QEMUFile
*f
, int fd
, uint32_t index
,
2514 uint16_t n_valid
, uint16_t n_invalid
)
2516 struct kvm_get_htab_header
*buf
;
2517 size_t chunksize
= sizeof(*buf
) + n_valid
*HASH_PTE_SIZE_64
;
2520 buf
= alloca(chunksize
);
2522 buf
->n_valid
= n_valid
;
2523 buf
->n_invalid
= n_invalid
;
2525 qemu_get_buffer(f
, (void *)(buf
+ 1), HASH_PTE_SIZE_64
*n_valid
);
2527 rc
= write(fd
, buf
, chunksize
);
2529 fprintf(stderr
, "Error writing KVM hash table: %s\n",
2533 if (rc
!= chunksize
) {
2534 /* We should never get a short write on a single chunk */
2535 fprintf(stderr
, "Short write, restoring KVM hash table\n");
2541 bool kvm_arch_stop_on_emulation_error(CPUState
*cpu
)
2546 int kvm_arch_on_sigbus_vcpu(CPUState
*cpu
, int code
, void *addr
)
2551 int kvm_arch_on_sigbus(int code
, void *addr
)
2556 void kvm_arch_init_irq_routing(KVMState
*s
)
2560 struct kvm_get_htab_buf
{
2561 struct kvm_get_htab_header header
;
2563 * We require one extra byte for read
2565 target_ulong hpte
[(HPTES_PER_GROUP
* 2) + 1];
2568 uint64_t kvmppc_hash64_read_pteg(PowerPCCPU
*cpu
, target_ulong pte_index
)
2571 struct kvm_get_htab_fd ghf
;
2572 struct kvm_get_htab_buf
*hpte_buf
;
2575 ghf
.start_index
= pte_index
;
2576 htab_fd
= kvm_vm_ioctl(kvm_state
, KVM_PPC_GET_HTAB_FD
, &ghf
);
2581 hpte_buf
= g_malloc0(sizeof(*hpte_buf
));
2583 * Read the hpte group
2585 if (read(htab_fd
, hpte_buf
, sizeof(*hpte_buf
)) < 0) {
2590 return (uint64_t)(uintptr_t) hpte_buf
->hpte
;
2599 void kvmppc_hash64_free_pteg(uint64_t token
)
2601 struct kvm_get_htab_buf
*htab_buf
;
2603 htab_buf
= container_of((void *)(uintptr_t) token
, struct kvm_get_htab_buf
,
2609 void kvmppc_hash64_write_pte(CPUPPCState
*env
, target_ulong pte_index
,
2610 target_ulong pte0
, target_ulong pte1
)
2613 struct kvm_get_htab_fd ghf
;
2614 struct kvm_get_htab_buf hpte_buf
;
2617 ghf
.start_index
= 0; /* Ignored */
2618 htab_fd
= kvm_vm_ioctl(kvm_state
, KVM_PPC_GET_HTAB_FD
, &ghf
);
2623 hpte_buf
.header
.n_valid
= 1;
2624 hpte_buf
.header
.n_invalid
= 0;
2625 hpte_buf
.header
.index
= pte_index
;
2626 hpte_buf
.hpte
[0] = pte0
;
2627 hpte_buf
.hpte
[1] = pte1
;
2629 * Write the hpte entry.
2630 * CAUTION: write() has the warn_unused_result attribute. Hence we
2631 * need to check the return value, even though we do nothing.
2633 if (write(htab_fd
, &hpte_buf
, sizeof(hpte_buf
)) < 0) {
2645 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry
*route
,
2646 uint64_t address
, uint32_t data
, PCIDevice
*dev
)
2651 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry
*route
,
2652 int vector
, PCIDevice
*dev
)
2657 int kvm_arch_release_virq_post(int virq
)
2662 int kvm_arch_msi_data_to_gsi(uint32_t data
)
2664 return data
& 0xffff;
2667 int kvmppc_enable_hwrng(void)
2669 if (!kvm_enabled() || !kvm_check_extension(kvm_state
, KVM_CAP_PPC_HWRNG
)) {
2673 return kvmppc_enable_hcall(kvm_state
, H_RANDOM
);