2 * PowerPC implementation of KVM hooks
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
17 #include "qemu/osdep.h"
19 #include <sys/ioctl.h>
22 #include <linux/kvm.h>
24 #include "qemu-common.h"
25 #include "qemu/error-report.h"
27 #include "cpu-models.h"
28 #include "qemu/timer.h"
29 #include "sysemu/sysemu.h"
30 #include "sysemu/hw_accel.h"
31 #include "sysemu/numa.h"
33 #include "sysemu/cpus.h"
34 #include "sysemu/device_tree.h"
35 #include "mmu-hash64.h"
37 #include "hw/sysbus.h"
38 #include "hw/ppc/spapr.h"
39 #include "hw/ppc/spapr_vio.h"
40 #include "hw/ppc/spapr_cpu_core.h"
41 #include "hw/ppc/ppc.h"
42 #include "sysemu/watchdog.h"
44 #include "exec/gdbstub.h"
45 #include "exec/memattrs.h"
46 #include "sysemu/hostmem.h"
47 #include "qemu/cutils.h"
48 #if defined(TARGET_PPC64)
49 #include "hw/ppc/spapr_cpu_core.h"
55 #define DPRINTF(fmt, ...) \
56 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
58 #define DPRINTF(fmt, ...) \
62 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
64 const KVMCapabilityInfo kvm_arch_required_capabilities
[] = {
68 static int cap_interrupt_unset
= false;
69 static int cap_interrupt_level
= false;
70 static int cap_segstate
;
71 static int cap_booke_sregs
;
72 static int cap_ppc_smt
;
73 static int cap_ppc_rma
;
74 static int cap_spapr_tce
;
75 static int cap_spapr_multitce
;
76 static int cap_spapr_vfio
;
78 static int cap_one_reg
;
80 static int cap_ppc_watchdog
;
82 static int cap_htab_fd
;
83 static int cap_fixup_hcalls
;
84 static int cap_htm
; /* Hardware transactional memory support */
86 static uint32_t debug_inst_opcode
;
88 /* XXX We have a race condition where we actually have a level triggered
89 * interrupt, but the infrastructure can't expose that yet, so the guest
90 * takes but ignores it, goes to sleep and never gets notified that there's
91 * still an interrupt pending.
93 * As a quick workaround, let's just wake up again 20 ms after we injected
94 * an interrupt. That way we can assure that we're always reinjecting
95 * interrupts in case the guest swallowed them.
97 static QEMUTimer
*idle_timer
;
99 static void kvm_kick_cpu(void *opaque
)
101 PowerPCCPU
*cpu
= opaque
;
103 qemu_cpu_kick(CPU(cpu
));
106 /* Check whether we are running with KVM-PR (instead of KVM-HV). This
107 * should only be used for fallback tests - generally we should use
108 * explicit capabilities for the features we want, rather than
109 * assuming what is/isn't available depending on the KVM variant. */
110 static bool kvmppc_is_pr(KVMState
*ks
)
112 /* Assume KVM-PR if the GET_PVINFO capability is available */
113 return kvm_check_extension(ks
, KVM_CAP_PPC_GET_PVINFO
) != 0;
116 static int kvm_ppc_register_host_cpu_type(void);
118 int kvm_arch_init(MachineState
*ms
, KVMState
*s
)
120 cap_interrupt_unset
= kvm_check_extension(s
, KVM_CAP_PPC_UNSET_IRQ
);
121 cap_interrupt_level
= kvm_check_extension(s
, KVM_CAP_PPC_IRQ_LEVEL
);
122 cap_segstate
= kvm_check_extension(s
, KVM_CAP_PPC_SEGSTATE
);
123 cap_booke_sregs
= kvm_check_extension(s
, KVM_CAP_PPC_BOOKE_SREGS
);
124 cap_ppc_smt
= kvm_check_extension(s
, KVM_CAP_PPC_SMT
);
125 cap_ppc_rma
= kvm_check_extension(s
, KVM_CAP_PPC_RMA
);
126 cap_spapr_tce
= kvm_check_extension(s
, KVM_CAP_SPAPR_TCE
);
127 cap_spapr_multitce
= kvm_check_extension(s
, KVM_CAP_SPAPR_MULTITCE
);
128 cap_spapr_vfio
= false;
129 cap_one_reg
= kvm_check_extension(s
, KVM_CAP_ONE_REG
);
130 cap_hior
= kvm_check_extension(s
, KVM_CAP_PPC_HIOR
);
131 cap_epr
= kvm_check_extension(s
, KVM_CAP_PPC_EPR
);
132 cap_ppc_watchdog
= kvm_check_extension(s
, KVM_CAP_PPC_BOOKE_WATCHDOG
);
133 /* Note: we don't set cap_papr here, because this capability is
134 * only activated after this by kvmppc_set_papr() */
135 cap_htab_fd
= kvm_check_extension(s
, KVM_CAP_PPC_HTAB_FD
);
136 cap_fixup_hcalls
= kvm_check_extension(s
, KVM_CAP_PPC_FIXUP_HCALL
);
137 cap_htm
= kvm_vm_check_extension(s
, KVM_CAP_PPC_HTM
);
139 if (!cap_interrupt_level
) {
140 fprintf(stderr
, "KVM: Couldn't find level irq capability. Expect the "
141 "VM to stall at times!\n");
144 kvm_ppc_register_host_cpu_type();
149 int kvm_arch_irqchip_create(MachineState
*ms
, KVMState
*s
)
154 static int kvm_arch_sync_sregs(PowerPCCPU
*cpu
)
156 CPUPPCState
*cenv
= &cpu
->env
;
157 CPUState
*cs
= CPU(cpu
);
158 struct kvm_sregs sregs
;
161 if (cenv
->excp_model
== POWERPC_EXCP_BOOKE
) {
162 /* What we're really trying to say is "if we're on BookE, we use
163 the native PVR for now". This is the only sane way to check
164 it though, so we potentially confuse users that they can run
165 BookE guests on BookS. Let's hope nobody dares enough :) */
169 fprintf(stderr
, "kvm error: missing PVR setting capability\n");
174 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_SREGS
, &sregs
);
179 sregs
.pvr
= cenv
->spr
[SPR_PVR
];
180 return kvm_vcpu_ioctl(cs
, KVM_SET_SREGS
, &sregs
);
183 /* Set up a shared TLB array with KVM */
184 static int kvm_booke206_tlb_init(PowerPCCPU
*cpu
)
186 CPUPPCState
*env
= &cpu
->env
;
187 CPUState
*cs
= CPU(cpu
);
188 struct kvm_book3e_206_tlb_params params
= {};
189 struct kvm_config_tlb cfg
= {};
190 unsigned int entries
= 0;
193 if (!kvm_enabled() ||
194 !kvm_check_extension(cs
->kvm_state
, KVM_CAP_SW_TLB
)) {
198 assert(ARRAY_SIZE(params
.tlb_sizes
) == BOOKE206_MAX_TLBN
);
200 for (i
= 0; i
< BOOKE206_MAX_TLBN
; i
++) {
201 params
.tlb_sizes
[i
] = booke206_tlb_size(env
, i
);
202 params
.tlb_ways
[i
] = booke206_tlb_ways(env
, i
);
203 entries
+= params
.tlb_sizes
[i
];
206 assert(entries
== env
->nb_tlb
);
207 assert(sizeof(struct kvm_book3e_206_tlb_entry
) == sizeof(ppcmas_tlb_t
));
209 env
->tlb_dirty
= true;
211 cfg
.array
= (uintptr_t)env
->tlb
.tlbm
;
212 cfg
.array_len
= sizeof(ppcmas_tlb_t
) * entries
;
213 cfg
.params
= (uintptr_t)¶ms
;
214 cfg
.mmu_type
= KVM_MMU_FSL_BOOKE_NOHV
;
216 ret
= kvm_vcpu_enable_cap(cs
, KVM_CAP_SW_TLB
, 0, (uintptr_t)&cfg
);
218 fprintf(stderr
, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
219 __func__
, strerror(-ret
));
223 env
->kvm_sw_tlb
= true;
228 #if defined(TARGET_PPC64)
229 static void kvm_get_fallback_smmu_info(PowerPCCPU
*cpu
,
230 struct kvm_ppc_smmu_info
*info
)
232 CPUPPCState
*env
= &cpu
->env
;
233 CPUState
*cs
= CPU(cpu
);
235 memset(info
, 0, sizeof(*info
));
237 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
238 * need to "guess" what the supported page sizes are.
240 * For that to work we make a few assumptions:
242 * - Check whether we are running "PR" KVM which only supports 4K
243 * and 16M pages, but supports them regardless of the backing
244 * store characteritics. We also don't support 1T segments.
246 * This is safe as if HV KVM ever supports that capability or PR
247 * KVM grows supports for more page/segment sizes, those versions
248 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
249 * will not hit this fallback
251 * - Else we are running HV KVM. This means we only support page
252 * sizes that fit in the backing store. Additionally we only
253 * advertize 64K pages if the processor is ARCH 2.06 and we assume
254 * P7 encodings for the SLB and hash table. Here too, we assume
255 * support for any newer processor will mean a kernel that
256 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
259 if (kvmppc_is_pr(cs
->kvm_state
)) {
264 /* Standard 4k base page size segment */
265 info
->sps
[0].page_shift
= 12;
266 info
->sps
[0].slb_enc
= 0;
267 info
->sps
[0].enc
[0].page_shift
= 12;
268 info
->sps
[0].enc
[0].pte_enc
= 0;
270 /* Standard 16M large page size segment */
271 info
->sps
[1].page_shift
= 24;
272 info
->sps
[1].slb_enc
= SLB_VSID_L
;
273 info
->sps
[1].enc
[0].page_shift
= 24;
274 info
->sps
[1].enc
[0].pte_enc
= 0;
278 /* HV KVM has backing store size restrictions */
279 info
->flags
= KVM_PPC_PAGE_SIZES_REAL
;
281 if (env
->mmu_model
& POWERPC_MMU_1TSEG
) {
282 info
->flags
|= KVM_PPC_1T_SEGMENTS
;
285 if (env
->mmu_model
== POWERPC_MMU_2_06
||
286 env
->mmu_model
== POWERPC_MMU_2_07
) {
292 /* Standard 4k base page size segment */
293 info
->sps
[i
].page_shift
= 12;
294 info
->sps
[i
].slb_enc
= 0;
295 info
->sps
[i
].enc
[0].page_shift
= 12;
296 info
->sps
[i
].enc
[0].pte_enc
= 0;
299 /* 64K on MMU 2.06 and later */
300 if (env
->mmu_model
== POWERPC_MMU_2_06
||
301 env
->mmu_model
== POWERPC_MMU_2_07
) {
302 info
->sps
[i
].page_shift
= 16;
303 info
->sps
[i
].slb_enc
= 0x110;
304 info
->sps
[i
].enc
[0].page_shift
= 16;
305 info
->sps
[i
].enc
[0].pte_enc
= 1;
309 /* Standard 16M large page size segment */
310 info
->sps
[i
].page_shift
= 24;
311 info
->sps
[i
].slb_enc
= SLB_VSID_L
;
312 info
->sps
[i
].enc
[0].page_shift
= 24;
313 info
->sps
[i
].enc
[0].pte_enc
= 0;
317 static void kvm_get_smmu_info(PowerPCCPU
*cpu
, struct kvm_ppc_smmu_info
*info
)
319 CPUState
*cs
= CPU(cpu
);
322 if (kvm_check_extension(cs
->kvm_state
, KVM_CAP_PPC_GET_SMMU_INFO
)) {
323 ret
= kvm_vm_ioctl(cs
->kvm_state
, KVM_PPC_GET_SMMU_INFO
, info
);
329 kvm_get_fallback_smmu_info(cpu
, info
);
332 static long gethugepagesize(const char *mem_path
)
338 ret
= statfs(mem_path
, &fs
);
339 } while (ret
!= 0 && errno
== EINTR
);
342 fprintf(stderr
, "Couldn't statfs() memory path: %s\n",
347 #define HUGETLBFS_MAGIC 0x958458f6
349 if (fs
.f_type
!= HUGETLBFS_MAGIC
) {
350 /* Explicit mempath, but it's ordinary pages */
351 return getpagesize();
354 /* It's hugepage, return the huge page size */
359 * FIXME TOCTTOU: this iterates over memory backends' mem-path, which
360 * may or may not name the same files / on the same filesystem now as
361 * when we actually open and map them. Iterate over the file
362 * descriptors instead, and use qemu_fd_getpagesize().
364 static int find_max_supported_pagesize(Object
*obj
, void *opaque
)
367 long *hpsize_min
= opaque
;
369 if (object_dynamic_cast(obj
, TYPE_MEMORY_BACKEND
)) {
370 mem_path
= object_property_get_str(obj
, "mem-path", NULL
);
372 long hpsize
= gethugepagesize(mem_path
);
373 if (hpsize
< *hpsize_min
) {
374 *hpsize_min
= hpsize
;
377 *hpsize_min
= getpagesize();
384 static long getrampagesize(void)
386 long hpsize
= LONG_MAX
;
387 long mainrampagesize
;
391 mainrampagesize
= gethugepagesize(mem_path
);
393 mainrampagesize
= getpagesize();
396 /* it's possible we have memory-backend objects with
397 * hugepage-backed RAM. these may get mapped into system
398 * address space via -numa parameters or memory hotplug
399 * hooks. we want to take these into account, but we
400 * also want to make sure these supported hugepage
401 * sizes are applicable across the entire range of memory
402 * we may boot from, so we take the min across all
403 * backends, and assume normal pages in cases where a
404 * backend isn't backed by hugepages.
406 memdev_root
= object_resolve_path("/objects", NULL
);
408 object_child_foreach(memdev_root
, find_max_supported_pagesize
, &hpsize
);
410 if (hpsize
== LONG_MAX
) {
411 /* No additional memory regions found ==> Report main RAM page size */
412 return mainrampagesize
;
415 /* If NUMA is disabled or the NUMA nodes are not backed with a
416 * memory-backend, then there is at least one node using "normal" RAM,
417 * so if its page size is smaller we have got to report that size instead.
419 if (hpsize
> mainrampagesize
&&
420 (nb_numa_nodes
== 0 || numa_info
[0].node_memdev
== NULL
)) {
423 error_report("Huge page support disabled (n/a for main memory).");
426 return mainrampagesize
;
432 static bool kvm_valid_page_size(uint32_t flags
, long rampgsize
, uint32_t shift
)
434 if (!(flags
& KVM_PPC_PAGE_SIZES_REAL
)) {
438 return (1ul << shift
) <= rampgsize
;
441 static long max_cpu_page_size
;
443 static void kvm_fixup_page_sizes(PowerPCCPU
*cpu
)
445 static struct kvm_ppc_smmu_info smmu_info
;
446 static bool has_smmu_info
;
447 CPUPPCState
*env
= &cpu
->env
;
449 bool has_64k_pages
= false;
451 /* We only handle page sizes for 64-bit server guests for now */
452 if (!(env
->mmu_model
& POWERPC_MMU_64
)) {
456 /* Collect MMU info from kernel if not already */
457 if (!has_smmu_info
) {
458 kvm_get_smmu_info(cpu
, &smmu_info
);
459 has_smmu_info
= true;
462 if (!max_cpu_page_size
) {
463 max_cpu_page_size
= getrampagesize();
466 /* Convert to QEMU form */
467 memset(&env
->sps
, 0, sizeof(env
->sps
));
469 /* If we have HV KVM, we need to forbid CI large pages if our
470 * host page size is smaller than 64K.
472 if (smmu_info
.flags
& KVM_PPC_PAGE_SIZES_REAL
) {
473 env
->ci_large_pages
= getpagesize() >= 0x10000;
477 * XXX This loop should be an entry wide AND of the capabilities that
478 * the selected CPU has with the capabilities that KVM supports.
480 for (ik
= iq
= 0; ik
< KVM_PPC_PAGE_SIZES_MAX_SZ
; ik
++) {
481 struct ppc_one_seg_page_size
*qsps
= &env
->sps
.sps
[iq
];
482 struct kvm_ppc_one_seg_page_size
*ksps
= &smmu_info
.sps
[ik
];
484 if (!kvm_valid_page_size(smmu_info
.flags
, max_cpu_page_size
,
488 qsps
->page_shift
= ksps
->page_shift
;
489 qsps
->slb_enc
= ksps
->slb_enc
;
490 for (jk
= jq
= 0; jk
< KVM_PPC_PAGE_SIZES_MAX_SZ
; jk
++) {
491 if (!kvm_valid_page_size(smmu_info
.flags
, max_cpu_page_size
,
492 ksps
->enc
[jk
].page_shift
)) {
495 if (ksps
->enc
[jk
].page_shift
== 16) {
496 has_64k_pages
= true;
498 qsps
->enc
[jq
].page_shift
= ksps
->enc
[jk
].page_shift
;
499 qsps
->enc
[jq
].pte_enc
= ksps
->enc
[jk
].pte_enc
;
500 if (++jq
>= PPC_PAGE_SIZES_MAX_SZ
) {
504 if (++iq
>= PPC_PAGE_SIZES_MAX_SZ
) {
508 env
->slb_nr
= smmu_info
.slb_size
;
509 if (!(smmu_info
.flags
& KVM_PPC_1T_SEGMENTS
)) {
510 env
->mmu_model
&= ~POWERPC_MMU_1TSEG
;
512 if (!has_64k_pages
) {
513 env
->mmu_model
&= ~POWERPC_MMU_64K
;
517 bool kvmppc_is_mem_backend_page_size_ok(char *obj_path
)
519 Object
*mem_obj
= object_resolve_path(obj_path
, NULL
);
520 char *mempath
= object_property_get_str(mem_obj
, "mem-path", NULL
);
524 pagesize
= gethugepagesize(mempath
);
526 pagesize
= getpagesize();
529 return pagesize
>= max_cpu_page_size
;
532 #else /* defined (TARGET_PPC64) */
534 static inline void kvm_fixup_page_sizes(PowerPCCPU
*cpu
)
538 bool kvmppc_is_mem_backend_page_size_ok(char *obj_path
)
543 #endif /* !defined (TARGET_PPC64) */
545 unsigned long kvm_arch_vcpu_id(CPUState
*cpu
)
547 return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu
));
550 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
551 * book3s supports only 1 watchpoint, so array size
552 * of 4 is sufficient for now.
554 #define MAX_HW_BKPTS 4
556 static struct HWBreakpoint
{
559 } hw_debug_points
[MAX_HW_BKPTS
];
561 static CPUWatchpoint hw_watchpoint
;
563 /* Default there is no breakpoint and watchpoint supported */
564 static int max_hw_breakpoint
;
565 static int max_hw_watchpoint
;
566 static int nb_hw_breakpoint
;
567 static int nb_hw_watchpoint
;
569 static void kvmppc_hw_debug_points_init(CPUPPCState
*cenv
)
571 if (cenv
->excp_model
== POWERPC_EXCP_BOOKE
) {
572 max_hw_breakpoint
= 2;
573 max_hw_watchpoint
= 2;
576 if ((max_hw_breakpoint
+ max_hw_watchpoint
) > MAX_HW_BKPTS
) {
577 fprintf(stderr
, "Error initializing h/w breakpoints\n");
582 int kvm_arch_init_vcpu(CPUState
*cs
)
584 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
585 CPUPPCState
*cenv
= &cpu
->env
;
588 /* Gather server mmu info from KVM and update the CPU state */
589 kvm_fixup_page_sizes(cpu
);
591 /* Synchronize sregs with kvm */
592 ret
= kvm_arch_sync_sregs(cpu
);
594 if (ret
== -EINVAL
) {
595 error_report("Register sync failed... If you're using kvm-hv.ko,"
596 " only \"-cpu host\" is possible");
601 idle_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL
, kvm_kick_cpu
, cpu
);
603 switch (cenv
->mmu_model
) {
604 case POWERPC_MMU_BOOKE206
:
605 /* This target supports access to KVM's guest TLB */
606 ret
= kvm_booke206_tlb_init(cpu
);
608 case POWERPC_MMU_2_07
:
609 if (!cap_htm
&& !kvmppc_is_pr(cs
->kvm_state
)) {
610 /* KVM-HV has transactional memory on POWER8 also without the
611 * KVM_CAP_PPC_HTM extension, so enable it here instead. */
619 kvm_get_one_reg(cs
, KVM_REG_PPC_DEBUG_INST
, &debug_inst_opcode
);
620 kvmppc_hw_debug_points_init(cenv
);
625 static void kvm_sw_tlb_put(PowerPCCPU
*cpu
)
627 CPUPPCState
*env
= &cpu
->env
;
628 CPUState
*cs
= CPU(cpu
);
629 struct kvm_dirty_tlb dirty_tlb
;
630 unsigned char *bitmap
;
633 if (!env
->kvm_sw_tlb
) {
637 bitmap
= g_malloc((env
->nb_tlb
+ 7) / 8);
638 memset(bitmap
, 0xFF, (env
->nb_tlb
+ 7) / 8);
640 dirty_tlb
.bitmap
= (uintptr_t)bitmap
;
641 dirty_tlb
.num_dirty
= env
->nb_tlb
;
643 ret
= kvm_vcpu_ioctl(cs
, KVM_DIRTY_TLB
, &dirty_tlb
);
645 fprintf(stderr
, "%s: KVM_DIRTY_TLB: %s\n",
646 __func__
, strerror(-ret
));
652 static void kvm_get_one_spr(CPUState
*cs
, uint64_t id
, int spr
)
654 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
655 CPUPPCState
*env
= &cpu
->env
;
660 struct kvm_one_reg reg
= {
662 .addr
= (uintptr_t) &val
,
666 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
668 trace_kvm_failed_spr_get(spr
, strerror(errno
));
670 switch (id
& KVM_REG_SIZE_MASK
) {
671 case KVM_REG_SIZE_U32
:
672 env
->spr
[spr
] = val
.u32
;
675 case KVM_REG_SIZE_U64
:
676 env
->spr
[spr
] = val
.u64
;
680 /* Don't handle this size yet */
686 static void kvm_put_one_spr(CPUState
*cs
, uint64_t id
, int spr
)
688 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
689 CPUPPCState
*env
= &cpu
->env
;
694 struct kvm_one_reg reg
= {
696 .addr
= (uintptr_t) &val
,
700 switch (id
& KVM_REG_SIZE_MASK
) {
701 case KVM_REG_SIZE_U32
:
702 val
.u32
= env
->spr
[spr
];
705 case KVM_REG_SIZE_U64
:
706 val
.u64
= env
->spr
[spr
];
710 /* Don't handle this size yet */
714 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
716 trace_kvm_failed_spr_set(spr
, strerror(errno
));
720 static int kvm_put_fp(CPUState
*cs
)
722 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
723 CPUPPCState
*env
= &cpu
->env
;
724 struct kvm_one_reg reg
;
728 if (env
->insns_flags
& PPC_FLOAT
) {
729 uint64_t fpscr
= env
->fpscr
;
730 bool vsx
= !!(env
->insns_flags2
& PPC2_VSX
);
732 reg
.id
= KVM_REG_PPC_FPSCR
;
733 reg
.addr
= (uintptr_t)&fpscr
;
734 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
736 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno
));
740 for (i
= 0; i
< 32; i
++) {
743 #ifdef HOST_WORDS_BIGENDIAN
744 vsr
[0] = float64_val(env
->fpr
[i
]);
745 vsr
[1] = env
->vsr
[i
];
747 vsr
[0] = env
->vsr
[i
];
748 vsr
[1] = float64_val(env
->fpr
[i
]);
750 reg
.addr
= (uintptr_t) &vsr
;
751 reg
.id
= vsx
? KVM_REG_PPC_VSR(i
) : KVM_REG_PPC_FPR(i
);
753 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
755 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx
? "VSR" : "FPR",
762 if (env
->insns_flags
& PPC_ALTIVEC
) {
763 reg
.id
= KVM_REG_PPC_VSCR
;
764 reg
.addr
= (uintptr_t)&env
->vscr
;
765 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
767 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno
));
771 for (i
= 0; i
< 32; i
++) {
772 reg
.id
= KVM_REG_PPC_VR(i
);
773 reg
.addr
= (uintptr_t)&env
->avr
[i
];
774 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
776 DPRINTF("Unable to set VR%d to KVM: %s\n", i
, strerror(errno
));
785 static int kvm_get_fp(CPUState
*cs
)
787 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
788 CPUPPCState
*env
= &cpu
->env
;
789 struct kvm_one_reg reg
;
793 if (env
->insns_flags
& PPC_FLOAT
) {
795 bool vsx
= !!(env
->insns_flags2
& PPC2_VSX
);
797 reg
.id
= KVM_REG_PPC_FPSCR
;
798 reg
.addr
= (uintptr_t)&fpscr
;
799 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
801 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno
));
807 for (i
= 0; i
< 32; i
++) {
810 reg
.addr
= (uintptr_t) &vsr
;
811 reg
.id
= vsx
? KVM_REG_PPC_VSR(i
) : KVM_REG_PPC_FPR(i
);
813 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
815 DPRINTF("Unable to get %s%d from KVM: %s\n",
816 vsx
? "VSR" : "FPR", i
, strerror(errno
));
819 #ifdef HOST_WORDS_BIGENDIAN
820 env
->fpr
[i
] = vsr
[0];
822 env
->vsr
[i
] = vsr
[1];
825 env
->fpr
[i
] = vsr
[1];
827 env
->vsr
[i
] = vsr
[0];
834 if (env
->insns_flags
& PPC_ALTIVEC
) {
835 reg
.id
= KVM_REG_PPC_VSCR
;
836 reg
.addr
= (uintptr_t)&env
->vscr
;
837 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
839 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno
));
843 for (i
= 0; i
< 32; i
++) {
844 reg
.id
= KVM_REG_PPC_VR(i
);
845 reg
.addr
= (uintptr_t)&env
->avr
[i
];
846 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
848 DPRINTF("Unable to get VR%d from KVM: %s\n",
858 #if defined(TARGET_PPC64)
859 static int kvm_get_vpa(CPUState
*cs
)
861 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
862 CPUPPCState
*env
= &cpu
->env
;
863 struct kvm_one_reg reg
;
866 reg
.id
= KVM_REG_PPC_VPA_ADDR
;
867 reg
.addr
= (uintptr_t)&env
->vpa_addr
;
868 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
870 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno
));
874 assert((uintptr_t)&env
->slb_shadow_size
875 == ((uintptr_t)&env
->slb_shadow_addr
+ 8));
876 reg
.id
= KVM_REG_PPC_VPA_SLB
;
877 reg
.addr
= (uintptr_t)&env
->slb_shadow_addr
;
878 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
880 DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
885 assert((uintptr_t)&env
->dtl_size
== ((uintptr_t)&env
->dtl_addr
+ 8));
886 reg
.id
= KVM_REG_PPC_VPA_DTL
;
887 reg
.addr
= (uintptr_t)&env
->dtl_addr
;
888 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
890 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
898 static int kvm_put_vpa(CPUState
*cs
)
900 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
901 CPUPPCState
*env
= &cpu
->env
;
902 struct kvm_one_reg reg
;
905 /* SLB shadow or DTL can't be registered unless a master VPA is
906 * registered. That means when restoring state, if a VPA *is*
907 * registered, we need to set that up first. If not, we need to
908 * deregister the others before deregistering the master VPA */
909 assert(env
->vpa_addr
|| !(env
->slb_shadow_addr
|| env
->dtl_addr
));
912 reg
.id
= KVM_REG_PPC_VPA_ADDR
;
913 reg
.addr
= (uintptr_t)&env
->vpa_addr
;
914 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
916 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno
));
921 assert((uintptr_t)&env
->slb_shadow_size
922 == ((uintptr_t)&env
->slb_shadow_addr
+ 8));
923 reg
.id
= KVM_REG_PPC_VPA_SLB
;
924 reg
.addr
= (uintptr_t)&env
->slb_shadow_addr
;
925 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
927 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno
));
931 assert((uintptr_t)&env
->dtl_size
== ((uintptr_t)&env
->dtl_addr
+ 8));
932 reg
.id
= KVM_REG_PPC_VPA_DTL
;
933 reg
.addr
= (uintptr_t)&env
->dtl_addr
;
934 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
936 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
941 if (!env
->vpa_addr
) {
942 reg
.id
= KVM_REG_PPC_VPA_ADDR
;
943 reg
.addr
= (uintptr_t)&env
->vpa_addr
;
944 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
946 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno
));
953 #endif /* TARGET_PPC64 */
955 int kvmppc_put_books_sregs(PowerPCCPU
*cpu
)
957 CPUPPCState
*env
= &cpu
->env
;
958 struct kvm_sregs sregs
;
961 sregs
.pvr
= env
->spr
[SPR_PVR
];
963 sregs
.u
.s
.sdr1
= env
->spr
[SPR_SDR1
];
967 for (i
= 0; i
< ARRAY_SIZE(env
->slb
); i
++) {
968 sregs
.u
.s
.ppc64
.slb
[i
].slbe
= env
->slb
[i
].esid
;
969 if (env
->slb
[i
].esid
& SLB_ESID_V
) {
970 sregs
.u
.s
.ppc64
.slb
[i
].slbe
|= i
;
972 sregs
.u
.s
.ppc64
.slb
[i
].slbv
= env
->slb
[i
].vsid
;
977 for (i
= 0; i
< 16; i
++) {
978 sregs
.u
.s
.ppc32
.sr
[i
] = env
->sr
[i
];
982 for (i
= 0; i
< 8; i
++) {
983 /* Beware. We have to swap upper and lower bits here */
984 sregs
.u
.s
.ppc32
.dbat
[i
] = ((uint64_t)env
->DBAT
[0][i
] << 32)
986 sregs
.u
.s
.ppc32
.ibat
[i
] = ((uint64_t)env
->IBAT
[0][i
] << 32)
990 return kvm_vcpu_ioctl(CPU(cpu
), KVM_SET_SREGS
, &sregs
);
993 int kvm_arch_put_registers(CPUState
*cs
, int level
)
995 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
996 CPUPPCState
*env
= &cpu
->env
;
997 struct kvm_regs regs
;
1001 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_REGS
, ®s
);
1006 regs
.ctr
= env
->ctr
;
1008 regs
.xer
= cpu_read_xer(env
);
1009 regs
.msr
= env
->msr
;
1012 regs
.srr0
= env
->spr
[SPR_SRR0
];
1013 regs
.srr1
= env
->spr
[SPR_SRR1
];
1015 regs
.sprg0
= env
->spr
[SPR_SPRG0
];
1016 regs
.sprg1
= env
->spr
[SPR_SPRG1
];
1017 regs
.sprg2
= env
->spr
[SPR_SPRG2
];
1018 regs
.sprg3
= env
->spr
[SPR_SPRG3
];
1019 regs
.sprg4
= env
->spr
[SPR_SPRG4
];
1020 regs
.sprg5
= env
->spr
[SPR_SPRG5
];
1021 regs
.sprg6
= env
->spr
[SPR_SPRG6
];
1022 regs
.sprg7
= env
->spr
[SPR_SPRG7
];
1024 regs
.pid
= env
->spr
[SPR_BOOKE_PID
];
1026 for (i
= 0;i
< 32; i
++)
1027 regs
.gpr
[i
] = env
->gpr
[i
];
1030 for (i
= 0; i
< 8; i
++) {
1031 regs
.cr
|= (env
->crf
[i
] & 15) << (4 * (7 - i
));
1034 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_REGS
, ®s
);
1040 if (env
->tlb_dirty
) {
1041 kvm_sw_tlb_put(cpu
);
1042 env
->tlb_dirty
= false;
1045 if (cap_segstate
&& (level
>= KVM_PUT_RESET_STATE
)) {
1046 ret
= kvmppc_put_books_sregs(cpu
);
1052 if (cap_hior
&& (level
>= KVM_PUT_RESET_STATE
)) {
1053 kvm_put_one_spr(cs
, KVM_REG_PPC_HIOR
, SPR_HIOR
);
1059 /* We deliberately ignore errors here, for kernels which have
1060 * the ONE_REG calls, but don't support the specific
1061 * registers, there's a reasonable chance things will still
1062 * work, at least until we try to migrate. */
1063 for (i
= 0; i
< 1024; i
++) {
1064 uint64_t id
= env
->spr_cb
[i
].one_reg_id
;
1067 kvm_put_one_spr(cs
, id
, i
);
1073 for (i
= 0; i
< ARRAY_SIZE(env
->tm_gpr
); i
++) {
1074 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_GPR(i
), &env
->tm_gpr
[i
]);
1076 for (i
= 0; i
< ARRAY_SIZE(env
->tm_vsr
); i
++) {
1077 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_VSR(i
), &env
->tm_vsr
[i
]);
1079 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_CR
, &env
->tm_cr
);
1080 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_LR
, &env
->tm_lr
);
1081 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_CTR
, &env
->tm_ctr
);
1082 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_FPSCR
, &env
->tm_fpscr
);
1083 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_AMR
, &env
->tm_amr
);
1084 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_PPR
, &env
->tm_ppr
);
1085 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_VRSAVE
, &env
->tm_vrsave
);
1086 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_VSCR
, &env
->tm_vscr
);
1087 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_DSCR
, &env
->tm_dscr
);
1088 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_TAR
, &env
->tm_tar
);
1092 if (kvm_put_vpa(cs
) < 0) {
1093 DPRINTF("Warning: Unable to set VPA information to KVM\n");
1097 kvm_set_one_reg(cs
, KVM_REG_PPC_TB_OFFSET
, &env
->tb_env
->tb_offset
);
1098 #endif /* TARGET_PPC64 */
1104 static void kvm_sync_excp(CPUPPCState
*env
, int vector
, int ivor
)
1106 env
->excp_vectors
[vector
] = env
->spr
[ivor
] + env
->spr
[SPR_BOOKE_IVPR
];
1109 static int kvmppc_get_booke_sregs(PowerPCCPU
*cpu
)
1111 CPUPPCState
*env
= &cpu
->env
;
1112 struct kvm_sregs sregs
;
1115 ret
= kvm_vcpu_ioctl(CPU(cpu
), KVM_GET_SREGS
, &sregs
);
1120 if (sregs
.u
.e
.features
& KVM_SREGS_E_BASE
) {
1121 env
->spr
[SPR_BOOKE_CSRR0
] = sregs
.u
.e
.csrr0
;
1122 env
->spr
[SPR_BOOKE_CSRR1
] = sregs
.u
.e
.csrr1
;
1123 env
->spr
[SPR_BOOKE_ESR
] = sregs
.u
.e
.esr
;
1124 env
->spr
[SPR_BOOKE_DEAR
] = sregs
.u
.e
.dear
;
1125 env
->spr
[SPR_BOOKE_MCSR
] = sregs
.u
.e
.mcsr
;
1126 env
->spr
[SPR_BOOKE_TSR
] = sregs
.u
.e
.tsr
;
1127 env
->spr
[SPR_BOOKE_TCR
] = sregs
.u
.e
.tcr
;
1128 env
->spr
[SPR_DECR
] = sregs
.u
.e
.dec
;
1129 env
->spr
[SPR_TBL
] = sregs
.u
.e
.tb
& 0xffffffff;
1130 env
->spr
[SPR_TBU
] = sregs
.u
.e
.tb
>> 32;
1131 env
->spr
[SPR_VRSAVE
] = sregs
.u
.e
.vrsave
;
1134 if (sregs
.u
.e
.features
& KVM_SREGS_E_ARCH206
) {
1135 env
->spr
[SPR_BOOKE_PIR
] = sregs
.u
.e
.pir
;
1136 env
->spr
[SPR_BOOKE_MCSRR0
] = sregs
.u
.e
.mcsrr0
;
1137 env
->spr
[SPR_BOOKE_MCSRR1
] = sregs
.u
.e
.mcsrr1
;
1138 env
->spr
[SPR_BOOKE_DECAR
] = sregs
.u
.e
.decar
;
1139 env
->spr
[SPR_BOOKE_IVPR
] = sregs
.u
.e
.ivpr
;
1142 if (sregs
.u
.e
.features
& KVM_SREGS_E_64
) {
1143 env
->spr
[SPR_BOOKE_EPCR
] = sregs
.u
.e
.epcr
;
1146 if (sregs
.u
.e
.features
& KVM_SREGS_E_SPRG8
) {
1147 env
->spr
[SPR_BOOKE_SPRG8
] = sregs
.u
.e
.sprg8
;
1150 if (sregs
.u
.e
.features
& KVM_SREGS_E_IVOR
) {
1151 env
->spr
[SPR_BOOKE_IVOR0
] = sregs
.u
.e
.ivor_low
[0];
1152 kvm_sync_excp(env
, POWERPC_EXCP_CRITICAL
, SPR_BOOKE_IVOR0
);
1153 env
->spr
[SPR_BOOKE_IVOR1
] = sregs
.u
.e
.ivor_low
[1];
1154 kvm_sync_excp(env
, POWERPC_EXCP_MCHECK
, SPR_BOOKE_IVOR1
);
1155 env
->spr
[SPR_BOOKE_IVOR2
] = sregs
.u
.e
.ivor_low
[2];
1156 kvm_sync_excp(env
, POWERPC_EXCP_DSI
, SPR_BOOKE_IVOR2
);
1157 env
->spr
[SPR_BOOKE_IVOR3
] = sregs
.u
.e
.ivor_low
[3];
1158 kvm_sync_excp(env
, POWERPC_EXCP_ISI
, SPR_BOOKE_IVOR3
);
1159 env
->spr
[SPR_BOOKE_IVOR4
] = sregs
.u
.e
.ivor_low
[4];
1160 kvm_sync_excp(env
, POWERPC_EXCP_EXTERNAL
, SPR_BOOKE_IVOR4
);
1161 env
->spr
[SPR_BOOKE_IVOR5
] = sregs
.u
.e
.ivor_low
[5];
1162 kvm_sync_excp(env
, POWERPC_EXCP_ALIGN
, SPR_BOOKE_IVOR5
);
1163 env
->spr
[SPR_BOOKE_IVOR6
] = sregs
.u
.e
.ivor_low
[6];
1164 kvm_sync_excp(env
, POWERPC_EXCP_PROGRAM
, SPR_BOOKE_IVOR6
);
1165 env
->spr
[SPR_BOOKE_IVOR7
] = sregs
.u
.e
.ivor_low
[7];
1166 kvm_sync_excp(env
, POWERPC_EXCP_FPU
, SPR_BOOKE_IVOR7
);
1167 env
->spr
[SPR_BOOKE_IVOR8
] = sregs
.u
.e
.ivor_low
[8];
1168 kvm_sync_excp(env
, POWERPC_EXCP_SYSCALL
, SPR_BOOKE_IVOR8
);
1169 env
->spr
[SPR_BOOKE_IVOR9
] = sregs
.u
.e
.ivor_low
[9];
1170 kvm_sync_excp(env
, POWERPC_EXCP_APU
, SPR_BOOKE_IVOR9
);
1171 env
->spr
[SPR_BOOKE_IVOR10
] = sregs
.u
.e
.ivor_low
[10];
1172 kvm_sync_excp(env
, POWERPC_EXCP_DECR
, SPR_BOOKE_IVOR10
);
1173 env
->spr
[SPR_BOOKE_IVOR11
] = sregs
.u
.e
.ivor_low
[11];
1174 kvm_sync_excp(env
, POWERPC_EXCP_FIT
, SPR_BOOKE_IVOR11
);
1175 env
->spr
[SPR_BOOKE_IVOR12
] = sregs
.u
.e
.ivor_low
[12];
1176 kvm_sync_excp(env
, POWERPC_EXCP_WDT
, SPR_BOOKE_IVOR12
);
1177 env
->spr
[SPR_BOOKE_IVOR13
] = sregs
.u
.e
.ivor_low
[13];
1178 kvm_sync_excp(env
, POWERPC_EXCP_DTLB
, SPR_BOOKE_IVOR13
);
1179 env
->spr
[SPR_BOOKE_IVOR14
] = sregs
.u
.e
.ivor_low
[14];
1180 kvm_sync_excp(env
, POWERPC_EXCP_ITLB
, SPR_BOOKE_IVOR14
);
1181 env
->spr
[SPR_BOOKE_IVOR15
] = sregs
.u
.e
.ivor_low
[15];
1182 kvm_sync_excp(env
, POWERPC_EXCP_DEBUG
, SPR_BOOKE_IVOR15
);
1184 if (sregs
.u
.e
.features
& KVM_SREGS_E_SPE
) {
1185 env
->spr
[SPR_BOOKE_IVOR32
] = sregs
.u
.e
.ivor_high
[0];
1186 kvm_sync_excp(env
, POWERPC_EXCP_SPEU
, SPR_BOOKE_IVOR32
);
1187 env
->spr
[SPR_BOOKE_IVOR33
] = sregs
.u
.e
.ivor_high
[1];
1188 kvm_sync_excp(env
, POWERPC_EXCP_EFPDI
, SPR_BOOKE_IVOR33
);
1189 env
->spr
[SPR_BOOKE_IVOR34
] = sregs
.u
.e
.ivor_high
[2];
1190 kvm_sync_excp(env
, POWERPC_EXCP_EFPRI
, SPR_BOOKE_IVOR34
);
1193 if (sregs
.u
.e
.features
& KVM_SREGS_E_PM
) {
1194 env
->spr
[SPR_BOOKE_IVOR35
] = sregs
.u
.e
.ivor_high
[3];
1195 kvm_sync_excp(env
, POWERPC_EXCP_EPERFM
, SPR_BOOKE_IVOR35
);
1198 if (sregs
.u
.e
.features
& KVM_SREGS_E_PC
) {
1199 env
->spr
[SPR_BOOKE_IVOR36
] = sregs
.u
.e
.ivor_high
[4];
1200 kvm_sync_excp(env
, POWERPC_EXCP_DOORI
, SPR_BOOKE_IVOR36
);
1201 env
->spr
[SPR_BOOKE_IVOR37
] = sregs
.u
.e
.ivor_high
[5];
1202 kvm_sync_excp(env
, POWERPC_EXCP_DOORCI
, SPR_BOOKE_IVOR37
);
1206 if (sregs
.u
.e
.features
& KVM_SREGS_E_ARCH206_MMU
) {
1207 env
->spr
[SPR_BOOKE_MAS0
] = sregs
.u
.e
.mas0
;
1208 env
->spr
[SPR_BOOKE_MAS1
] = sregs
.u
.e
.mas1
;
1209 env
->spr
[SPR_BOOKE_MAS2
] = sregs
.u
.e
.mas2
;
1210 env
->spr
[SPR_BOOKE_MAS3
] = sregs
.u
.e
.mas7_3
& 0xffffffff;
1211 env
->spr
[SPR_BOOKE_MAS4
] = sregs
.u
.e
.mas4
;
1212 env
->spr
[SPR_BOOKE_MAS6
] = sregs
.u
.e
.mas6
;
1213 env
->spr
[SPR_BOOKE_MAS7
] = sregs
.u
.e
.mas7_3
>> 32;
1214 env
->spr
[SPR_MMUCFG
] = sregs
.u
.e
.mmucfg
;
1215 env
->spr
[SPR_BOOKE_TLB0CFG
] = sregs
.u
.e
.tlbcfg
[0];
1216 env
->spr
[SPR_BOOKE_TLB1CFG
] = sregs
.u
.e
.tlbcfg
[1];
1219 if (sregs
.u
.e
.features
& KVM_SREGS_EXP
) {
1220 env
->spr
[SPR_BOOKE_EPR
] = sregs
.u
.e
.epr
;
1223 if (sregs
.u
.e
.features
& KVM_SREGS_E_PD
) {
1224 env
->spr
[SPR_BOOKE_EPLC
] = sregs
.u
.e
.eplc
;
1225 env
->spr
[SPR_BOOKE_EPSC
] = sregs
.u
.e
.epsc
;
1228 if (sregs
.u
.e
.impl_id
== KVM_SREGS_E_IMPL_FSL
) {
1229 env
->spr
[SPR_E500_SVR
] = sregs
.u
.e
.impl
.fsl
.svr
;
1230 env
->spr
[SPR_Exxx_MCAR
] = sregs
.u
.e
.impl
.fsl
.mcar
;
1231 env
->spr
[SPR_HID0
] = sregs
.u
.e
.impl
.fsl
.hid0
;
1233 if (sregs
.u
.e
.impl
.fsl
.features
& KVM_SREGS_E_FSL_PIDn
) {
1234 env
->spr
[SPR_BOOKE_PID1
] = sregs
.u
.e
.impl
.fsl
.pid1
;
1235 env
->spr
[SPR_BOOKE_PID2
] = sregs
.u
.e
.impl
.fsl
.pid2
;
1242 static int kvmppc_get_books_sregs(PowerPCCPU
*cpu
)
1244 CPUPPCState
*env
= &cpu
->env
;
1245 struct kvm_sregs sregs
;
1249 ret
= kvm_vcpu_ioctl(CPU(cpu
), KVM_GET_SREGS
, &sregs
);
1255 ppc_store_sdr1(env
, sregs
.u
.s
.sdr1
);
1261 * The packed SLB array we get from KVM_GET_SREGS only contains
1262 * information about valid entries. So we flush our internal copy
1263 * to get rid of stale ones, then put all valid SLB entries back
1266 memset(env
->slb
, 0, sizeof(env
->slb
));
1267 for (i
= 0; i
< ARRAY_SIZE(env
->slb
); i
++) {
1268 target_ulong rb
= sregs
.u
.s
.ppc64
.slb
[i
].slbe
;
1269 target_ulong rs
= sregs
.u
.s
.ppc64
.slb
[i
].slbv
;
1271 * Only restore valid entries
1273 if (rb
& SLB_ESID_V
) {
1274 ppc_store_slb(cpu
, rb
& 0xfff, rb
& ~0xfffULL
, rs
);
1280 for (i
= 0; i
< 16; i
++) {
1281 env
->sr
[i
] = sregs
.u
.s
.ppc32
.sr
[i
];
1285 for (i
= 0; i
< 8; i
++) {
1286 env
->DBAT
[0][i
] = sregs
.u
.s
.ppc32
.dbat
[i
] & 0xffffffff;
1287 env
->DBAT
[1][i
] = sregs
.u
.s
.ppc32
.dbat
[i
] >> 32;
1288 env
->IBAT
[0][i
] = sregs
.u
.s
.ppc32
.ibat
[i
] & 0xffffffff;
1289 env
->IBAT
[1][i
] = sregs
.u
.s
.ppc32
.ibat
[i
] >> 32;
1295 int kvm_arch_get_registers(CPUState
*cs
)
1297 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
1298 CPUPPCState
*env
= &cpu
->env
;
1299 struct kvm_regs regs
;
1303 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_REGS
, ®s
);
1308 for (i
= 7; i
>= 0; i
--) {
1309 env
->crf
[i
] = cr
& 15;
1313 env
->ctr
= regs
.ctr
;
1315 cpu_write_xer(env
, regs
.xer
);
1316 env
->msr
= regs
.msr
;
1319 env
->spr
[SPR_SRR0
] = regs
.srr0
;
1320 env
->spr
[SPR_SRR1
] = regs
.srr1
;
1322 env
->spr
[SPR_SPRG0
] = regs
.sprg0
;
1323 env
->spr
[SPR_SPRG1
] = regs
.sprg1
;
1324 env
->spr
[SPR_SPRG2
] = regs
.sprg2
;
1325 env
->spr
[SPR_SPRG3
] = regs
.sprg3
;
1326 env
->spr
[SPR_SPRG4
] = regs
.sprg4
;
1327 env
->spr
[SPR_SPRG5
] = regs
.sprg5
;
1328 env
->spr
[SPR_SPRG6
] = regs
.sprg6
;
1329 env
->spr
[SPR_SPRG7
] = regs
.sprg7
;
1331 env
->spr
[SPR_BOOKE_PID
] = regs
.pid
;
1333 for (i
= 0;i
< 32; i
++)
1334 env
->gpr
[i
] = regs
.gpr
[i
];
1338 if (cap_booke_sregs
) {
1339 ret
= kvmppc_get_booke_sregs(cpu
);
1346 ret
= kvmppc_get_books_sregs(cpu
);
1353 kvm_get_one_spr(cs
, KVM_REG_PPC_HIOR
, SPR_HIOR
);
1359 /* We deliberately ignore errors here, for kernels which have
1360 * the ONE_REG calls, but don't support the specific
1361 * registers, there's a reasonable chance things will still
1362 * work, at least until we try to migrate. */
1363 for (i
= 0; i
< 1024; i
++) {
1364 uint64_t id
= env
->spr_cb
[i
].one_reg_id
;
1367 kvm_get_one_spr(cs
, id
, i
);
1373 for (i
= 0; i
< ARRAY_SIZE(env
->tm_gpr
); i
++) {
1374 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_GPR(i
), &env
->tm_gpr
[i
]);
1376 for (i
= 0; i
< ARRAY_SIZE(env
->tm_vsr
); i
++) {
1377 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_VSR(i
), &env
->tm_vsr
[i
]);
1379 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_CR
, &env
->tm_cr
);
1380 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_LR
, &env
->tm_lr
);
1381 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_CTR
, &env
->tm_ctr
);
1382 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_FPSCR
, &env
->tm_fpscr
);
1383 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_AMR
, &env
->tm_amr
);
1384 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_PPR
, &env
->tm_ppr
);
1385 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_VRSAVE
, &env
->tm_vrsave
);
1386 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_VSCR
, &env
->tm_vscr
);
1387 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_DSCR
, &env
->tm_dscr
);
1388 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_TAR
, &env
->tm_tar
);
1392 if (kvm_get_vpa(cs
) < 0) {
1393 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1397 kvm_get_one_reg(cs
, KVM_REG_PPC_TB_OFFSET
, &env
->tb_env
->tb_offset
);
1404 int kvmppc_set_interrupt(PowerPCCPU
*cpu
, int irq
, int level
)
1406 unsigned virq
= level
? KVM_INTERRUPT_SET_LEVEL
: KVM_INTERRUPT_UNSET
;
1408 if (irq
!= PPC_INTERRUPT_EXT
) {
1412 if (!kvm_enabled() || !cap_interrupt_unset
|| !cap_interrupt_level
) {
1416 kvm_vcpu_ioctl(CPU(cpu
), KVM_INTERRUPT
, &virq
);
1421 #if defined(TARGET_PPCEMB)
1422 #define PPC_INPUT_INT PPC40x_INPUT_INT
1423 #elif defined(TARGET_PPC64)
1424 #define PPC_INPUT_INT PPC970_INPUT_INT
1426 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1429 void kvm_arch_pre_run(CPUState
*cs
, struct kvm_run
*run
)
1431 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
1432 CPUPPCState
*env
= &cpu
->env
;
1436 qemu_mutex_lock_iothread();
1438 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1439 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1440 if (!cap_interrupt_level
&&
1441 run
->ready_for_interrupt_injection
&&
1442 (cs
->interrupt_request
& CPU_INTERRUPT_HARD
) &&
1443 (env
->irq_input_state
& (1<<PPC_INPUT_INT
)))
1445 /* For now KVM disregards the 'irq' argument. However, in the
1446 * future KVM could cache it in-kernel to avoid a heavyweight exit
1447 * when reading the UIC.
1449 irq
= KVM_INTERRUPT_SET
;
1451 DPRINTF("injected interrupt %d\n", irq
);
1452 r
= kvm_vcpu_ioctl(cs
, KVM_INTERRUPT
, &irq
);
1454 printf("cpu %d fail inject %x\n", cs
->cpu_index
, irq
);
1457 /* Always wake up soon in case the interrupt was level based */
1458 timer_mod(idle_timer
, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) +
1459 (NANOSECONDS_PER_SECOND
/ 50));
1462 /* We don't know if there are more interrupts pending after this. However,
1463 * the guest will return to userspace in the course of handling this one
1464 * anyways, so we will get a chance to deliver the rest. */
1466 qemu_mutex_unlock_iothread();
1469 MemTxAttrs
kvm_arch_post_run(CPUState
*cs
, struct kvm_run
*run
)
1471 return MEMTXATTRS_UNSPECIFIED
;
1474 int kvm_arch_process_async_events(CPUState
*cs
)
1479 static int kvmppc_handle_halt(PowerPCCPU
*cpu
)
1481 CPUState
*cs
= CPU(cpu
);
1482 CPUPPCState
*env
= &cpu
->env
;
1484 if (!(cs
->interrupt_request
& CPU_INTERRUPT_HARD
) && (msr_ee
)) {
1486 cs
->exception_index
= EXCP_HLT
;
1492 /* map dcr access to existing qemu dcr emulation */
1493 static int kvmppc_handle_dcr_read(CPUPPCState
*env
, uint32_t dcrn
, uint32_t *data
)
1495 if (ppc_dcr_read(env
->dcr_env
, dcrn
, data
) < 0)
1496 fprintf(stderr
, "Read to unhandled DCR (0x%x)\n", dcrn
);
1501 static int kvmppc_handle_dcr_write(CPUPPCState
*env
, uint32_t dcrn
, uint32_t data
)
1503 if (ppc_dcr_write(env
->dcr_env
, dcrn
, data
) < 0)
1504 fprintf(stderr
, "Write to unhandled DCR (0x%x)\n", dcrn
);
1509 int kvm_arch_insert_sw_breakpoint(CPUState
*cs
, struct kvm_sw_breakpoint
*bp
)
1511 /* Mixed endian case is not handled */
1512 uint32_t sc
= debug_inst_opcode
;
1514 if (cpu_memory_rw_debug(cs
, bp
->pc
, (uint8_t *)&bp
->saved_insn
,
1516 cpu_memory_rw_debug(cs
, bp
->pc
, (uint8_t *)&sc
, sizeof(sc
), 1)) {
1523 int kvm_arch_remove_sw_breakpoint(CPUState
*cs
, struct kvm_sw_breakpoint
*bp
)
1527 if (cpu_memory_rw_debug(cs
, bp
->pc
, (uint8_t *)&sc
, sizeof(sc
), 0) ||
1528 sc
!= debug_inst_opcode
||
1529 cpu_memory_rw_debug(cs
, bp
->pc
, (uint8_t *)&bp
->saved_insn
,
1537 static int find_hw_breakpoint(target_ulong addr
, int type
)
1541 assert((nb_hw_breakpoint
+ nb_hw_watchpoint
)
1542 <= ARRAY_SIZE(hw_debug_points
));
1544 for (n
= 0; n
< nb_hw_breakpoint
+ nb_hw_watchpoint
; n
++) {
1545 if (hw_debug_points
[n
].addr
== addr
&&
1546 hw_debug_points
[n
].type
== type
) {
1554 static int find_hw_watchpoint(target_ulong addr
, int *flag
)
1558 n
= find_hw_breakpoint(addr
, GDB_WATCHPOINT_ACCESS
);
1560 *flag
= BP_MEM_ACCESS
;
1564 n
= find_hw_breakpoint(addr
, GDB_WATCHPOINT_WRITE
);
1566 *flag
= BP_MEM_WRITE
;
1570 n
= find_hw_breakpoint(addr
, GDB_WATCHPOINT_READ
);
1572 *flag
= BP_MEM_READ
;
1579 int kvm_arch_insert_hw_breakpoint(target_ulong addr
,
1580 target_ulong len
, int type
)
1582 if ((nb_hw_breakpoint
+ nb_hw_watchpoint
) >= ARRAY_SIZE(hw_debug_points
)) {
1586 hw_debug_points
[nb_hw_breakpoint
+ nb_hw_watchpoint
].addr
= addr
;
1587 hw_debug_points
[nb_hw_breakpoint
+ nb_hw_watchpoint
].type
= type
;
1590 case GDB_BREAKPOINT_HW
:
1591 if (nb_hw_breakpoint
>= max_hw_breakpoint
) {
1595 if (find_hw_breakpoint(addr
, type
) >= 0) {
1602 case GDB_WATCHPOINT_WRITE
:
1603 case GDB_WATCHPOINT_READ
:
1604 case GDB_WATCHPOINT_ACCESS
:
1605 if (nb_hw_watchpoint
>= max_hw_watchpoint
) {
1609 if (find_hw_breakpoint(addr
, type
) >= 0) {
1623 int kvm_arch_remove_hw_breakpoint(target_ulong addr
,
1624 target_ulong len
, int type
)
1628 n
= find_hw_breakpoint(addr
, type
);
1634 case GDB_BREAKPOINT_HW
:
1638 case GDB_WATCHPOINT_WRITE
:
1639 case GDB_WATCHPOINT_READ
:
1640 case GDB_WATCHPOINT_ACCESS
:
1647 hw_debug_points
[n
] = hw_debug_points
[nb_hw_breakpoint
+ nb_hw_watchpoint
];
1652 void kvm_arch_remove_all_hw_breakpoints(void)
1654 nb_hw_breakpoint
= nb_hw_watchpoint
= 0;
1657 void kvm_arch_update_guest_debug(CPUState
*cs
, struct kvm_guest_debug
*dbg
)
1661 /* Software Breakpoint updates */
1662 if (kvm_sw_breakpoints_active(cs
)) {
1663 dbg
->control
|= KVM_GUESTDBG_ENABLE
| KVM_GUESTDBG_USE_SW_BP
;
1666 assert((nb_hw_breakpoint
+ nb_hw_watchpoint
)
1667 <= ARRAY_SIZE(hw_debug_points
));
1668 assert((nb_hw_breakpoint
+ nb_hw_watchpoint
) <= ARRAY_SIZE(dbg
->arch
.bp
));
1670 if (nb_hw_breakpoint
+ nb_hw_watchpoint
> 0) {
1671 dbg
->control
|= KVM_GUESTDBG_ENABLE
| KVM_GUESTDBG_USE_HW_BP
;
1672 memset(dbg
->arch
.bp
, 0, sizeof(dbg
->arch
.bp
));
1673 for (n
= 0; n
< nb_hw_breakpoint
+ nb_hw_watchpoint
; n
++) {
1674 switch (hw_debug_points
[n
].type
) {
1675 case GDB_BREAKPOINT_HW
:
1676 dbg
->arch
.bp
[n
].type
= KVMPPC_DEBUG_BREAKPOINT
;
1678 case GDB_WATCHPOINT_WRITE
:
1679 dbg
->arch
.bp
[n
].type
= KVMPPC_DEBUG_WATCH_WRITE
;
1681 case GDB_WATCHPOINT_READ
:
1682 dbg
->arch
.bp
[n
].type
= KVMPPC_DEBUG_WATCH_READ
;
1684 case GDB_WATCHPOINT_ACCESS
:
1685 dbg
->arch
.bp
[n
].type
= KVMPPC_DEBUG_WATCH_WRITE
|
1686 KVMPPC_DEBUG_WATCH_READ
;
1689 cpu_abort(cs
, "Unsupported breakpoint type\n");
1691 dbg
->arch
.bp
[n
].addr
= hw_debug_points
[n
].addr
;
1696 static int kvm_handle_debug(PowerPCCPU
*cpu
, struct kvm_run
*run
)
1698 CPUState
*cs
= CPU(cpu
);
1699 CPUPPCState
*env
= &cpu
->env
;
1700 struct kvm_debug_exit_arch
*arch_info
= &run
->debug
.arch
;
1705 if (cs
->singlestep_enabled
) {
1707 } else if (arch_info
->status
) {
1708 if (nb_hw_breakpoint
+ nb_hw_watchpoint
> 0) {
1709 if (arch_info
->status
& KVMPPC_DEBUG_BREAKPOINT
) {
1710 n
= find_hw_breakpoint(arch_info
->address
, GDB_BREAKPOINT_HW
);
1714 } else if (arch_info
->status
& (KVMPPC_DEBUG_WATCH_READ
|
1715 KVMPPC_DEBUG_WATCH_WRITE
)) {
1716 n
= find_hw_watchpoint(arch_info
->address
, &flag
);
1719 cs
->watchpoint_hit
= &hw_watchpoint
;
1720 hw_watchpoint
.vaddr
= hw_debug_points
[n
].addr
;
1721 hw_watchpoint
.flags
= flag
;
1725 } else if (kvm_find_sw_breakpoint(cs
, arch_info
->address
)) {
1728 /* QEMU is not able to handle debug exception, so inject
1729 * program exception to guest;
1730 * Yes program exception NOT debug exception !!
1731 * When QEMU is using debug resources then debug exception must
1732 * be always set. To achieve this we set MSR_DE and also set
1733 * MSRP_DEP so guest cannot change MSR_DE.
1734 * When emulating debug resource for guest we want guest
1735 * to control MSR_DE (enable/disable debug interrupt on need).
1736 * Supporting both configurations are NOT possible.
1737 * So the result is that we cannot share debug resources
1738 * between QEMU and Guest on BOOKE architecture.
1739 * In the current design QEMU gets the priority over guest,
1740 * this means that if QEMU is using debug resources then guest
1742 * For software breakpoint QEMU uses a privileged instruction;
1743 * So there cannot be any reason that we are here for guest
1744 * set debug exception, only possibility is guest executed a
1745 * privileged / illegal instruction and that's why we are
1746 * injecting a program interrupt.
1749 cpu_synchronize_state(cs
);
1750 /* env->nip is PC, so increment this by 4 to use
1751 * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1754 cs
->exception_index
= POWERPC_EXCP_PROGRAM
;
1755 env
->error_code
= POWERPC_EXCP_INVAL
;
1756 ppc_cpu_do_interrupt(cs
);
1762 int kvm_arch_handle_exit(CPUState
*cs
, struct kvm_run
*run
)
1764 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
1765 CPUPPCState
*env
= &cpu
->env
;
1768 qemu_mutex_lock_iothread();
1770 switch (run
->exit_reason
) {
1772 if (run
->dcr
.is_write
) {
1773 DPRINTF("handle dcr write\n");
1774 ret
= kvmppc_handle_dcr_write(env
, run
->dcr
.dcrn
, run
->dcr
.data
);
1776 DPRINTF("handle dcr read\n");
1777 ret
= kvmppc_handle_dcr_read(env
, run
->dcr
.dcrn
, &run
->dcr
.data
);
1781 DPRINTF("handle halt\n");
1782 ret
= kvmppc_handle_halt(cpu
);
1784 #if defined(TARGET_PPC64)
1785 case KVM_EXIT_PAPR_HCALL
:
1786 DPRINTF("handle PAPR hypercall\n");
1787 run
->papr_hcall
.ret
= spapr_hypercall(cpu
,
1789 run
->papr_hcall
.args
);
1794 DPRINTF("handle epr\n");
1795 run
->epr
.epr
= ldl_phys(cs
->as
, env
->mpic_iack
);
1798 case KVM_EXIT_WATCHDOG
:
1799 DPRINTF("handle watchdog expiry\n");
1800 watchdog_perform_action();
1804 case KVM_EXIT_DEBUG
:
1805 DPRINTF("handle debug exception\n");
1806 if (kvm_handle_debug(cpu
, run
)) {
1810 /* re-enter, this exception was guest-internal */
1815 fprintf(stderr
, "KVM: unknown exit reason %d\n", run
->exit_reason
);
1820 qemu_mutex_unlock_iothread();
1824 int kvmppc_or_tsr_bits(PowerPCCPU
*cpu
, uint32_t tsr_bits
)
1826 CPUState
*cs
= CPU(cpu
);
1827 uint32_t bits
= tsr_bits
;
1828 struct kvm_one_reg reg
= {
1829 .id
= KVM_REG_PPC_OR_TSR
,
1830 .addr
= (uintptr_t) &bits
,
1833 return kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
1836 int kvmppc_clear_tsr_bits(PowerPCCPU
*cpu
, uint32_t tsr_bits
)
1839 CPUState
*cs
= CPU(cpu
);
1840 uint32_t bits
= tsr_bits
;
1841 struct kvm_one_reg reg
= {
1842 .id
= KVM_REG_PPC_CLEAR_TSR
,
1843 .addr
= (uintptr_t) &bits
,
1846 return kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
1849 int kvmppc_set_tcr(PowerPCCPU
*cpu
)
1851 CPUState
*cs
= CPU(cpu
);
1852 CPUPPCState
*env
= &cpu
->env
;
1853 uint32_t tcr
= env
->spr
[SPR_BOOKE_TCR
];
1855 struct kvm_one_reg reg
= {
1856 .id
= KVM_REG_PPC_TCR
,
1857 .addr
= (uintptr_t) &tcr
,
1860 return kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
1863 int kvmppc_booke_watchdog_enable(PowerPCCPU
*cpu
)
1865 CPUState
*cs
= CPU(cpu
);
1868 if (!kvm_enabled()) {
1872 if (!cap_ppc_watchdog
) {
1873 printf("warning: KVM does not support watchdog");
1877 ret
= kvm_vcpu_enable_cap(cs
, KVM_CAP_PPC_BOOKE_WATCHDOG
, 0);
1879 fprintf(stderr
, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1880 __func__
, strerror(-ret
));
1887 static int read_cpuinfo(const char *field
, char *value
, int len
)
1891 int field_len
= strlen(field
);
1894 f
= fopen("/proc/cpuinfo", "r");
1900 if (!fgets(line
, sizeof(line
), f
)) {
1903 if (!strncmp(line
, field
, field_len
)) {
1904 pstrcpy(value
, len
, line
);
1915 uint32_t kvmppc_get_tbfreq(void)
1919 uint32_t retval
= NANOSECONDS_PER_SECOND
;
1921 if (read_cpuinfo("timebase", line
, sizeof(line
))) {
1925 if (!(ns
= strchr(line
, ':'))) {
1934 bool kvmppc_get_host_serial(char **value
)
1936 return g_file_get_contents("/proc/device-tree/system-id", value
, NULL
,
1940 bool kvmppc_get_host_model(char **value
)
1942 return g_file_get_contents("/proc/device-tree/model", value
, NULL
, NULL
);
1945 /* Try to find a device tree node for a CPU with clock-frequency property */
1946 static int kvmppc_find_cpu_dt(char *buf
, int buf_len
)
1948 struct dirent
*dirp
;
1951 if ((dp
= opendir(PROC_DEVTREE_CPU
)) == NULL
) {
1952 printf("Can't open directory " PROC_DEVTREE_CPU
"\n");
1957 while ((dirp
= readdir(dp
)) != NULL
) {
1959 snprintf(buf
, buf_len
, "%s%s/clock-frequency", PROC_DEVTREE_CPU
,
1961 f
= fopen(buf
, "r");
1963 snprintf(buf
, buf_len
, "%s%s", PROC_DEVTREE_CPU
, dirp
->d_name
);
1970 if (buf
[0] == '\0') {
1971 printf("Unknown host!\n");
1978 static uint64_t kvmppc_read_int_dt(const char *filename
)
1987 f
= fopen(filename
, "rb");
1992 len
= fread(&u
, 1, sizeof(u
), f
);
1996 /* property is a 32-bit quantity */
1997 return be32_to_cpu(u
.v32
);
1999 return be64_to_cpu(u
.v64
);
2005 /* Read a CPU node property from the host device tree that's a single
2006 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
2007 * (can't find or open the property, or doesn't understand the
2009 static uint64_t kvmppc_read_int_cpu_dt(const char *propname
)
2011 char buf
[PATH_MAX
], *tmp
;
2014 if (kvmppc_find_cpu_dt(buf
, sizeof(buf
))) {
2018 tmp
= g_strdup_printf("%s/%s", buf
, propname
);
2019 val
= kvmppc_read_int_dt(tmp
);
2025 uint64_t kvmppc_get_clockfreq(void)
2027 return kvmppc_read_int_cpu_dt("clock-frequency");
2030 uint32_t kvmppc_get_vmx(void)
2032 return kvmppc_read_int_cpu_dt("ibm,vmx");
2035 uint32_t kvmppc_get_dfp(void)
2037 return kvmppc_read_int_cpu_dt("ibm,dfp");
2040 static int kvmppc_get_pvinfo(CPUPPCState
*env
, struct kvm_ppc_pvinfo
*pvinfo
)
2042 PowerPCCPU
*cpu
= ppc_env_get_cpu(env
);
2043 CPUState
*cs
= CPU(cpu
);
2045 if (kvm_vm_check_extension(cs
->kvm_state
, KVM_CAP_PPC_GET_PVINFO
) &&
2046 !kvm_vm_ioctl(cs
->kvm_state
, KVM_PPC_GET_PVINFO
, pvinfo
)) {
2053 int kvmppc_get_hasidle(CPUPPCState
*env
)
2055 struct kvm_ppc_pvinfo pvinfo
;
2057 if (!kvmppc_get_pvinfo(env
, &pvinfo
) &&
2058 (pvinfo
.flags
& KVM_PPC_PVINFO_FLAGS_EV_IDLE
)) {
2065 int kvmppc_get_hypercall(CPUPPCState
*env
, uint8_t *buf
, int buf_len
)
2067 uint32_t *hc
= (uint32_t*)buf
;
2068 struct kvm_ppc_pvinfo pvinfo
;
2070 if (!kvmppc_get_pvinfo(env
, &pvinfo
)) {
2071 memcpy(buf
, pvinfo
.hcall
, buf_len
);
2076 * Fallback to always fail hypercalls regardless of endianness:
2078 * tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
2080 * b .+8 (becomes nop in wrong endian)
2081 * bswap32(li r3, -1)
2084 hc
[0] = cpu_to_be32(0x08000048);
2085 hc
[1] = cpu_to_be32(0x3860ffff);
2086 hc
[2] = cpu_to_be32(0x48000008);
2087 hc
[3] = cpu_to_be32(bswap32(0x3860ffff));
2092 static inline int kvmppc_enable_hcall(KVMState
*s
, target_ulong hcall
)
2094 return kvm_vm_enable_cap(s
, KVM_CAP_PPC_ENABLE_HCALL
, 0, hcall
, 1);
2097 void kvmppc_enable_logical_ci_hcalls(void)
2100 * FIXME: it would be nice if we could detect the cases where
2101 * we're using a device which requires the in kernel
2102 * implementation of these hcalls, but the kernel lacks them and
2103 * produce a warning.
2105 kvmppc_enable_hcall(kvm_state
, H_LOGICAL_CI_LOAD
);
2106 kvmppc_enable_hcall(kvm_state
, H_LOGICAL_CI_STORE
);
2109 void kvmppc_enable_set_mode_hcall(void)
2111 kvmppc_enable_hcall(kvm_state
, H_SET_MODE
);
2114 void kvmppc_enable_clear_ref_mod_hcalls(void)
2116 kvmppc_enable_hcall(kvm_state
, H_CLEAR_REF
);
2117 kvmppc_enable_hcall(kvm_state
, H_CLEAR_MOD
);
2120 void kvmppc_set_papr(PowerPCCPU
*cpu
)
2122 CPUState
*cs
= CPU(cpu
);
2125 ret
= kvm_vcpu_enable_cap(cs
, KVM_CAP_PPC_PAPR
, 0);
2127 error_report("This vCPU type or KVM version does not support PAPR");
2131 /* Update the capability flag so we sync the right information
2136 int kvmppc_set_compat(PowerPCCPU
*cpu
, uint32_t compat_pvr
)
2138 return kvm_set_one_reg(CPU(cpu
), KVM_REG_PPC_ARCH_COMPAT
, &compat_pvr
);
2141 void kvmppc_set_mpic_proxy(PowerPCCPU
*cpu
, int mpic_proxy
)
2143 CPUState
*cs
= CPU(cpu
);
2146 ret
= kvm_vcpu_enable_cap(cs
, KVM_CAP_PPC_EPR
, 0, mpic_proxy
);
2147 if (ret
&& mpic_proxy
) {
2148 error_report("This KVM version does not support EPR");
2153 int kvmppc_smt_threads(void)
2155 return cap_ppc_smt
? cap_ppc_smt
: 1;
2159 off_t
kvmppc_alloc_rma(void **rma
)
2163 struct kvm_allocate_rma ret
;
2165 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2166 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2167 * not necessary on this hardware
2168 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2170 * FIXME: We should allow the user to force contiguous RMA
2171 * allocation in the cap_ppc_rma==1 case.
2173 if (cap_ppc_rma
< 2) {
2177 fd
= kvm_vm_ioctl(kvm_state
, KVM_ALLOCATE_RMA
, &ret
);
2179 fprintf(stderr
, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2184 size
= MIN(ret
.rma_size
, 256ul << 20);
2186 *rma
= mmap(NULL
, size
, PROT_READ
|PROT_WRITE
, MAP_SHARED
, fd
, 0);
2187 if (*rma
== MAP_FAILED
) {
2188 fprintf(stderr
, "KVM: Error mapping RMA: %s\n", strerror(errno
));
2195 uint64_t kvmppc_rma_size(uint64_t current_size
, unsigned int hash_shift
)
2197 struct kvm_ppc_smmu_info info
;
2198 long rampagesize
, best_page_shift
;
2201 if (cap_ppc_rma
>= 2) {
2202 return current_size
;
2205 /* Find the largest hardware supported page size that's less than
2206 * or equal to the (logical) backing page size of guest RAM */
2207 kvm_get_smmu_info(POWERPC_CPU(first_cpu
), &info
);
2208 rampagesize
= getrampagesize();
2209 best_page_shift
= 0;
2211 for (i
= 0; i
< KVM_PPC_PAGE_SIZES_MAX_SZ
; i
++) {
2212 struct kvm_ppc_one_seg_page_size
*sps
= &info
.sps
[i
];
2214 if (!sps
->page_shift
) {
2218 if ((sps
->page_shift
> best_page_shift
)
2219 && ((1UL << sps
->page_shift
) <= rampagesize
)) {
2220 best_page_shift
= sps
->page_shift
;
2224 return MIN(current_size
,
2225 1ULL << (best_page_shift
+ hash_shift
- 7));
2229 bool kvmppc_spapr_use_multitce(void)
2231 return cap_spapr_multitce
;
2234 void *kvmppc_create_spapr_tce(uint32_t liobn
, uint32_t window_size
, int *pfd
,
2237 struct kvm_create_spapr_tce args
= {
2239 .window_size
= window_size
,
2245 /* Must set fd to -1 so we don't try to munmap when called for
2246 * destroying the table, which the upper layers -will- do
2249 if (!cap_spapr_tce
|| (need_vfio
&& !cap_spapr_vfio
)) {
2253 fd
= kvm_vm_ioctl(kvm_state
, KVM_CREATE_SPAPR_TCE
, &args
);
2255 fprintf(stderr
, "KVM: Failed to create TCE table for liobn 0x%x\n",
2260 len
= (window_size
/ SPAPR_TCE_PAGE_SIZE
) * sizeof(uint64_t);
2261 /* FIXME: round this up to page size */
2263 table
= mmap(NULL
, len
, PROT_READ
|PROT_WRITE
, MAP_SHARED
, fd
, 0);
2264 if (table
== MAP_FAILED
) {
2265 fprintf(stderr
, "KVM: Failed to map TCE table for liobn 0x%x\n",
2275 int kvmppc_remove_spapr_tce(void *table
, int fd
, uint32_t nb_table
)
2283 len
= nb_table
* sizeof(uint64_t);
2284 if ((munmap(table
, len
) < 0) ||
2286 fprintf(stderr
, "KVM: Unexpected error removing TCE table: %s",
2288 /* Leak the table */
2294 int kvmppc_reset_htab(int shift_hint
)
2296 uint32_t shift
= shift_hint
;
2298 if (!kvm_enabled()) {
2299 /* Full emulation, tell caller to allocate htab itself */
2302 if (kvm_check_extension(kvm_state
, KVM_CAP_PPC_ALLOC_HTAB
)) {
2304 ret
= kvm_vm_ioctl(kvm_state
, KVM_PPC_ALLOCATE_HTAB
, &shift
);
2305 if (ret
== -ENOTTY
) {
2306 /* At least some versions of PR KVM advertise the
2307 * capability, but don't implement the ioctl(). Oops.
2308 * Return 0 so that we allocate the htab in qemu, as is
2309 * correct for PR. */
2311 } else if (ret
< 0) {
2317 /* We have a kernel that predates the htab reset calls. For PR
2318 * KVM, we need to allocate the htab ourselves, for an HV KVM of
2319 * this era, it has allocated a 16MB fixed size hash table already. */
2320 if (kvmppc_is_pr(kvm_state
)) {
2321 /* PR - tell caller to allocate htab */
2324 /* HV - assume 16MB kernel allocated htab */
2329 static inline uint32_t mfpvr(void)
2338 static void alter_insns(uint64_t *word
, uint64_t flags
, bool on
)
2347 static void kvmppc_host_cpu_initfn(Object
*obj
)
2349 assert(kvm_enabled());
2352 static void kvmppc_host_cpu_class_init(ObjectClass
*oc
, void *data
)
2354 DeviceClass
*dc
= DEVICE_CLASS(oc
);
2355 PowerPCCPUClass
*pcc
= POWERPC_CPU_CLASS(oc
);
2356 uint32_t vmx
= kvmppc_get_vmx();
2357 uint32_t dfp
= kvmppc_get_dfp();
2358 uint32_t dcache_size
= kvmppc_read_int_cpu_dt("d-cache-size");
2359 uint32_t icache_size
= kvmppc_read_int_cpu_dt("i-cache-size");
2361 /* Now fix up the class with information we can query from the host */
2365 /* Only override when we know what the host supports */
2366 alter_insns(&pcc
->insns_flags
, PPC_ALTIVEC
, vmx
> 0);
2367 alter_insns(&pcc
->insns_flags2
, PPC2_VSX
, vmx
> 1);
2370 /* Only override when we know what the host supports */
2371 alter_insns(&pcc
->insns_flags2
, PPC2_DFP
, dfp
);
2374 if (dcache_size
!= -1) {
2375 pcc
->l1_dcache_size
= dcache_size
;
2378 if (icache_size
!= -1) {
2379 pcc
->l1_icache_size
= icache_size
;
2382 /* Reason: kvmppc_host_cpu_initfn() dies when !kvm_enabled() */
2383 dc
->cannot_destroy_with_object_finalize_yet
= true;
2386 bool kvmppc_has_cap_epr(void)
2391 bool kvmppc_has_cap_htab_fd(void)
2396 bool kvmppc_has_cap_fixup_hcalls(void)
2398 return cap_fixup_hcalls
;
2401 bool kvmppc_has_cap_htm(void)
2406 static PowerPCCPUClass
*ppc_cpu_get_family_class(PowerPCCPUClass
*pcc
)
2408 ObjectClass
*oc
= OBJECT_CLASS(pcc
);
2410 while (oc
&& !object_class_is_abstract(oc
)) {
2411 oc
= object_class_get_parent(oc
);
2415 return POWERPC_CPU_CLASS(oc
);
2418 PowerPCCPUClass
*kvm_ppc_get_host_cpu_class(void)
2420 uint32_t host_pvr
= mfpvr();
2421 PowerPCCPUClass
*pvr_pcc
;
2423 pvr_pcc
= ppc_cpu_class_by_pvr(host_pvr
);
2424 if (pvr_pcc
== NULL
) {
2425 pvr_pcc
= ppc_cpu_class_by_pvr_mask(host_pvr
);
2431 static int kvm_ppc_register_host_cpu_type(void)
2433 TypeInfo type_info
= {
2434 .name
= TYPE_HOST_POWERPC_CPU
,
2435 .instance_init
= kvmppc_host_cpu_initfn
,
2436 .class_init
= kvmppc_host_cpu_class_init
,
2438 PowerPCCPUClass
*pvr_pcc
;
2442 pvr_pcc
= kvm_ppc_get_host_cpu_class();
2443 if (pvr_pcc
== NULL
) {
2446 type_info
.parent
= object_class_get_name(OBJECT_CLASS(pvr_pcc
));
2447 type_register(&type_info
);
2449 #if defined(TARGET_PPC64)
2450 type_info
.name
= g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE
, "host");
2451 type_info
.parent
= TYPE_SPAPR_CPU_CORE
,
2452 type_info
.instance_size
= sizeof(sPAPRCPUCore
);
2453 type_info
.instance_init
= NULL
;
2454 type_info
.class_init
= spapr_cpu_core_class_init
;
2455 type_info
.class_data
= (void *) "host";
2456 type_register(&type_info
);
2457 g_free((void *)type_info
.name
);
2461 * Update generic CPU family class alias (e.g. on a POWER8NVL host,
2462 * we want "POWER8" to be a "family" alias that points to the current
2463 * host CPU type, too)
2465 dc
= DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc
));
2466 for (i
= 0; ppc_cpu_aliases
[i
].alias
!= NULL
; i
++) {
2467 if (strcmp(ppc_cpu_aliases
[i
].alias
, dc
->desc
) == 0) {
2468 ObjectClass
*oc
= OBJECT_CLASS(pvr_pcc
);
2471 ppc_cpu_aliases
[i
].model
= g_strdup(object_class_get_name(oc
));
2472 suffix
= strstr(ppc_cpu_aliases
[i
].model
, "-"TYPE_POWERPC_CPU
);
2476 ppc_cpu_aliases
[i
].oc
= oc
;
2484 int kvmppc_define_rtas_kernel_token(uint32_t token
, const char *function
)
2486 struct kvm_rtas_token_args args
= {
2490 if (!kvm_check_extension(kvm_state
, KVM_CAP_PPC_RTAS
)) {
2494 strncpy(args
.name
, function
, sizeof(args
.name
));
2496 return kvm_vm_ioctl(kvm_state
, KVM_PPC_RTAS_DEFINE_TOKEN
, &args
);
2499 int kvmppc_get_htab_fd(bool write
)
2501 struct kvm_get_htab_fd s
= {
2502 .flags
= write
? KVM_GET_HTAB_WRITE
: 0,
2507 fprintf(stderr
, "KVM version doesn't support saving the hash table\n");
2511 return kvm_vm_ioctl(kvm_state
, KVM_PPC_GET_HTAB_FD
, &s
);
2514 int kvmppc_save_htab(QEMUFile
*f
, int fd
, size_t bufsize
, int64_t max_ns
)
2516 int64_t starttime
= qemu_clock_get_ns(QEMU_CLOCK_REALTIME
);
2517 uint8_t buf
[bufsize
];
2521 rc
= read(fd
, buf
, bufsize
);
2523 fprintf(stderr
, "Error reading data from KVM HTAB fd: %s\n",
2527 uint8_t *buffer
= buf
;
2530 struct kvm_get_htab_header
*head
=
2531 (struct kvm_get_htab_header
*) buffer
;
2532 size_t chunksize
= sizeof(*head
) +
2533 HASH_PTE_SIZE_64
* head
->n_valid
;
2535 qemu_put_be32(f
, head
->index
);
2536 qemu_put_be16(f
, head
->n_valid
);
2537 qemu_put_be16(f
, head
->n_invalid
);
2538 qemu_put_buffer(f
, (void *)(head
+ 1),
2539 HASH_PTE_SIZE_64
* head
->n_valid
);
2541 buffer
+= chunksize
;
2547 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME
) - starttime
) < max_ns
)));
2549 return (rc
== 0) ? 1 : 0;
2552 int kvmppc_load_htab_chunk(QEMUFile
*f
, int fd
, uint32_t index
,
2553 uint16_t n_valid
, uint16_t n_invalid
)
2555 struct kvm_get_htab_header
*buf
;
2556 size_t chunksize
= sizeof(*buf
) + n_valid
*HASH_PTE_SIZE_64
;
2559 buf
= alloca(chunksize
);
2561 buf
->n_valid
= n_valid
;
2562 buf
->n_invalid
= n_invalid
;
2564 qemu_get_buffer(f
, (void *)(buf
+ 1), HASH_PTE_SIZE_64
*n_valid
);
2566 rc
= write(fd
, buf
, chunksize
);
2568 fprintf(stderr
, "Error writing KVM hash table: %s\n",
2572 if (rc
!= chunksize
) {
2573 /* We should never get a short write on a single chunk */
2574 fprintf(stderr
, "Short write, restoring KVM hash table\n");
2580 bool kvm_arch_stop_on_emulation_error(CPUState
*cpu
)
2585 int kvm_arch_on_sigbus_vcpu(CPUState
*cpu
, int code
, void *addr
)
2590 int kvm_arch_on_sigbus(int code
, void *addr
)
2595 void kvm_arch_init_irq_routing(KVMState
*s
)
2599 void kvmppc_read_hptes(ppc_hash_pte64_t
*hptes
, hwaddr ptex
, int n
)
2601 struct kvm_get_htab_fd ghf
= {
2603 .start_index
= ptex
,
2608 fd
= kvm_vm_ioctl(kvm_state
, KVM_PPC_GET_HTAB_FD
, &ghf
);
2610 hw_error("kvmppc_read_hptes: Unable to open HPT fd");
2615 struct kvm_get_htab_header
*hdr
;
2616 int m
= n
< HPTES_PER_GROUP
? n
: HPTES_PER_GROUP
;
2617 char buf
[sizeof(*hdr
) + m
* HASH_PTE_SIZE_64
];
2619 rc
= read(fd
, buf
, sizeof(buf
));
2621 hw_error("kvmppc_read_hptes: Unable to read HPTEs");
2624 hdr
= (struct kvm_get_htab_header
*)buf
;
2625 while ((i
< n
) && ((char *)hdr
< (buf
+ rc
))) {
2626 int invalid
= hdr
->n_invalid
;
2628 if (hdr
->index
!= (ptex
+ i
)) {
2629 hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32
2630 " != (%"HWADDR_PRIu
" + %d", hdr
->index
, ptex
, i
);
2633 memcpy(hptes
+ i
, hdr
+ 1, HASH_PTE_SIZE_64
* hdr
->n_valid
);
2636 if ((n
- i
) < invalid
) {
2639 memset(hptes
+ i
, 0, invalid
* HASH_PTE_SIZE_64
);
2640 i
+= hdr
->n_invalid
;
2642 hdr
= (struct kvm_get_htab_header
*)
2643 ((char *)(hdr
+ 1) + HASH_PTE_SIZE_64
* hdr
->n_valid
);
2650 void kvmppc_write_hpte(hwaddr ptex
, uint64_t pte0
, uint64_t pte1
)
2653 struct kvm_get_htab_fd ghf
;
2655 struct kvm_get_htab_header hdr
;
2661 ghf
.start_index
= 0; /* Ignored */
2662 fd
= kvm_vm_ioctl(kvm_state
, KVM_PPC_GET_HTAB_FD
, &ghf
);
2664 hw_error("kvmppc_write_hpte: Unable to open HPT fd");
2667 buf
.hdr
.n_valid
= 1;
2668 buf
.hdr
.n_invalid
= 0;
2669 buf
.hdr
.index
= ptex
;
2670 buf
.pte0
= cpu_to_be64(pte0
);
2671 buf
.pte1
= cpu_to_be64(pte1
);
2673 rc
= write(fd
, &buf
, sizeof(buf
));
2674 if (rc
!= sizeof(buf
)) {
2675 hw_error("kvmppc_write_hpte: Unable to update KVM HPT");
2680 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry
*route
,
2681 uint64_t address
, uint32_t data
, PCIDevice
*dev
)
2686 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry
*route
,
2687 int vector
, PCIDevice
*dev
)
2692 int kvm_arch_release_virq_post(int virq
)
2697 int kvm_arch_msi_data_to_gsi(uint32_t data
)
2699 return data
& 0xffff;
2702 int kvmppc_enable_hwrng(void)
2704 if (!kvm_enabled() || !kvm_check_extension(kvm_state
, KVM_CAP_PPC_HWRNG
)) {
2708 return kvmppc_enable_hcall(kvm_state
, H_RANDOM
);