2 * PowerPC implementation of KVM hooks
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
17 #include "qemu/osdep.h"
19 #include <sys/ioctl.h>
22 #include <linux/kvm.h>
24 #include "qemu-common.h"
25 #include "qemu/error-report.h"
27 #include "qemu/timer.h"
28 #include "sysemu/sysemu.h"
29 #include "sysemu/kvm.h"
30 #include "sysemu/numa.h"
32 #include "sysemu/cpus.h"
33 #include "sysemu/device_tree.h"
34 #include "mmu-hash64.h"
36 #include "hw/sysbus.h"
37 #include "hw/ppc/spapr.h"
38 #include "hw/ppc/spapr_vio.h"
39 #include "hw/ppc/spapr_cpu_core.h"
40 #include "hw/ppc/ppc.h"
41 #include "sysemu/watchdog.h"
43 #include "exec/gdbstub.h"
44 #include "exec/memattrs.h"
45 #include "sysemu/hostmem.h"
46 #include "qemu/cutils.h"
47 #if defined(TARGET_PPC64)
48 #include "hw/ppc/spapr_cpu_core.h"
54 #define DPRINTF(fmt, ...) \
55 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
57 #define DPRINTF(fmt, ...) \
61 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
63 const KVMCapabilityInfo kvm_arch_required_capabilities
[] = {
67 static int cap_interrupt_unset
= false;
68 static int cap_interrupt_level
= false;
69 static int cap_segstate
;
70 static int cap_booke_sregs
;
71 static int cap_ppc_smt
;
72 static int cap_ppc_rma
;
73 static int cap_spapr_tce
;
74 static int cap_spapr_multitce
;
75 static int cap_spapr_vfio
;
77 static int cap_one_reg
;
79 static int cap_ppc_watchdog
;
81 static int cap_htab_fd
;
82 static int cap_fixup_hcalls
;
83 static int cap_htm
; /* Hardware transactional memory support */
85 static uint32_t debug_inst_opcode
;
87 /* XXX We have a race condition where we actually have a level triggered
88 * interrupt, but the infrastructure can't expose that yet, so the guest
89 * takes but ignores it, goes to sleep and never gets notified that there's
90 * still an interrupt pending.
92 * As a quick workaround, let's just wake up again 20 ms after we injected
93 * an interrupt. That way we can assure that we're always reinjecting
94 * interrupts in case the guest swallowed them.
96 static QEMUTimer
*idle_timer
;
98 static void kvm_kick_cpu(void *opaque
)
100 PowerPCCPU
*cpu
= opaque
;
102 qemu_cpu_kick(CPU(cpu
));
105 static int kvm_ppc_register_host_cpu_type(void);
107 int kvm_arch_init(MachineState
*ms
, KVMState
*s
)
109 cap_interrupt_unset
= kvm_check_extension(s
, KVM_CAP_PPC_UNSET_IRQ
);
110 cap_interrupt_level
= kvm_check_extension(s
, KVM_CAP_PPC_IRQ_LEVEL
);
111 cap_segstate
= kvm_check_extension(s
, KVM_CAP_PPC_SEGSTATE
);
112 cap_booke_sregs
= kvm_check_extension(s
, KVM_CAP_PPC_BOOKE_SREGS
);
113 cap_ppc_smt
= kvm_check_extension(s
, KVM_CAP_PPC_SMT
);
114 cap_ppc_rma
= kvm_check_extension(s
, KVM_CAP_PPC_RMA
);
115 cap_spapr_tce
= kvm_check_extension(s
, KVM_CAP_SPAPR_TCE
);
116 cap_spapr_multitce
= kvm_check_extension(s
, KVM_CAP_SPAPR_MULTITCE
);
117 cap_spapr_vfio
= false;
118 cap_one_reg
= kvm_check_extension(s
, KVM_CAP_ONE_REG
);
119 cap_hior
= kvm_check_extension(s
, KVM_CAP_PPC_HIOR
);
120 cap_epr
= kvm_check_extension(s
, KVM_CAP_PPC_EPR
);
121 cap_ppc_watchdog
= kvm_check_extension(s
, KVM_CAP_PPC_BOOKE_WATCHDOG
);
122 /* Note: we don't set cap_papr here, because this capability is
123 * only activated after this by kvmppc_set_papr() */
124 cap_htab_fd
= kvm_check_extension(s
, KVM_CAP_PPC_HTAB_FD
);
125 cap_fixup_hcalls
= kvm_check_extension(s
, KVM_CAP_PPC_FIXUP_HCALL
);
126 cap_htm
= kvm_vm_check_extension(s
, KVM_CAP_PPC_HTM
);
128 if (!cap_interrupt_level
) {
129 fprintf(stderr
, "KVM: Couldn't find level irq capability. Expect the "
130 "VM to stall at times!\n");
133 kvm_ppc_register_host_cpu_type();
138 static int kvm_arch_sync_sregs(PowerPCCPU
*cpu
)
140 CPUPPCState
*cenv
= &cpu
->env
;
141 CPUState
*cs
= CPU(cpu
);
142 struct kvm_sregs sregs
;
145 if (cenv
->excp_model
== POWERPC_EXCP_BOOKE
) {
146 /* What we're really trying to say is "if we're on BookE, we use
147 the native PVR for now". This is the only sane way to check
148 it though, so we potentially confuse users that they can run
149 BookE guests on BookS. Let's hope nobody dares enough :) */
153 fprintf(stderr
, "kvm error: missing PVR setting capability\n");
158 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_SREGS
, &sregs
);
163 sregs
.pvr
= cenv
->spr
[SPR_PVR
];
164 return kvm_vcpu_ioctl(cs
, KVM_SET_SREGS
, &sregs
);
167 /* Set up a shared TLB array with KVM */
168 static int kvm_booke206_tlb_init(PowerPCCPU
*cpu
)
170 CPUPPCState
*env
= &cpu
->env
;
171 CPUState
*cs
= CPU(cpu
);
172 struct kvm_book3e_206_tlb_params params
= {};
173 struct kvm_config_tlb cfg
= {};
174 unsigned int entries
= 0;
177 if (!kvm_enabled() ||
178 !kvm_check_extension(cs
->kvm_state
, KVM_CAP_SW_TLB
)) {
182 assert(ARRAY_SIZE(params
.tlb_sizes
) == BOOKE206_MAX_TLBN
);
184 for (i
= 0; i
< BOOKE206_MAX_TLBN
; i
++) {
185 params
.tlb_sizes
[i
] = booke206_tlb_size(env
, i
);
186 params
.tlb_ways
[i
] = booke206_tlb_ways(env
, i
);
187 entries
+= params
.tlb_sizes
[i
];
190 assert(entries
== env
->nb_tlb
);
191 assert(sizeof(struct kvm_book3e_206_tlb_entry
) == sizeof(ppcmas_tlb_t
));
193 env
->tlb_dirty
= true;
195 cfg
.array
= (uintptr_t)env
->tlb
.tlbm
;
196 cfg
.array_len
= sizeof(ppcmas_tlb_t
) * entries
;
197 cfg
.params
= (uintptr_t)¶ms
;
198 cfg
.mmu_type
= KVM_MMU_FSL_BOOKE_NOHV
;
200 ret
= kvm_vcpu_enable_cap(cs
, KVM_CAP_SW_TLB
, 0, (uintptr_t)&cfg
);
202 fprintf(stderr
, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
203 __func__
, strerror(-ret
));
207 env
->kvm_sw_tlb
= true;
212 #if defined(TARGET_PPC64)
213 static void kvm_get_fallback_smmu_info(PowerPCCPU
*cpu
,
214 struct kvm_ppc_smmu_info
*info
)
216 CPUPPCState
*env
= &cpu
->env
;
217 CPUState
*cs
= CPU(cpu
);
219 memset(info
, 0, sizeof(*info
));
221 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
222 * need to "guess" what the supported page sizes are.
224 * For that to work we make a few assumptions:
226 * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
227 * KVM which only supports 4K and 16M pages, but supports them
228 * regardless of the backing store characteritics. We also don't
229 * support 1T segments.
231 * This is safe as if HV KVM ever supports that capability or PR
232 * KVM grows supports for more page/segment sizes, those versions
233 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
234 * will not hit this fallback
236 * - Else we are running HV KVM. This means we only support page
237 * sizes that fit in the backing store. Additionally we only
238 * advertize 64K pages if the processor is ARCH 2.06 and we assume
239 * P7 encodings for the SLB and hash table. Here too, we assume
240 * support for any newer processor will mean a kernel that
241 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
244 if (kvm_check_extension(cs
->kvm_state
, KVM_CAP_PPC_GET_PVINFO
)) {
249 /* Standard 4k base page size segment */
250 info
->sps
[0].page_shift
= 12;
251 info
->sps
[0].slb_enc
= 0;
252 info
->sps
[0].enc
[0].page_shift
= 12;
253 info
->sps
[0].enc
[0].pte_enc
= 0;
255 /* Standard 16M large page size segment */
256 info
->sps
[1].page_shift
= 24;
257 info
->sps
[1].slb_enc
= SLB_VSID_L
;
258 info
->sps
[1].enc
[0].page_shift
= 24;
259 info
->sps
[1].enc
[0].pte_enc
= 0;
263 /* HV KVM has backing store size restrictions */
264 info
->flags
= KVM_PPC_PAGE_SIZES_REAL
;
266 if (env
->mmu_model
& POWERPC_MMU_1TSEG
) {
267 info
->flags
|= KVM_PPC_1T_SEGMENTS
;
270 if (env
->mmu_model
== POWERPC_MMU_2_06
||
271 env
->mmu_model
== POWERPC_MMU_2_07
) {
277 /* Standard 4k base page size segment */
278 info
->sps
[i
].page_shift
= 12;
279 info
->sps
[i
].slb_enc
= 0;
280 info
->sps
[i
].enc
[0].page_shift
= 12;
281 info
->sps
[i
].enc
[0].pte_enc
= 0;
284 /* 64K on MMU 2.06 and later */
285 if (env
->mmu_model
== POWERPC_MMU_2_06
||
286 env
->mmu_model
== POWERPC_MMU_2_07
) {
287 info
->sps
[i
].page_shift
= 16;
288 info
->sps
[i
].slb_enc
= 0x110;
289 info
->sps
[i
].enc
[0].page_shift
= 16;
290 info
->sps
[i
].enc
[0].pte_enc
= 1;
294 /* Standard 16M large page size segment */
295 info
->sps
[i
].page_shift
= 24;
296 info
->sps
[i
].slb_enc
= SLB_VSID_L
;
297 info
->sps
[i
].enc
[0].page_shift
= 24;
298 info
->sps
[i
].enc
[0].pte_enc
= 0;
302 static void kvm_get_smmu_info(PowerPCCPU
*cpu
, struct kvm_ppc_smmu_info
*info
)
304 CPUState
*cs
= CPU(cpu
);
307 if (kvm_check_extension(cs
->kvm_state
, KVM_CAP_PPC_GET_SMMU_INFO
)) {
308 ret
= kvm_vm_ioctl(cs
->kvm_state
, KVM_PPC_GET_SMMU_INFO
, info
);
314 kvm_get_fallback_smmu_info(cpu
, info
);
317 static long gethugepagesize(const char *mem_path
)
323 ret
= statfs(mem_path
, &fs
);
324 } while (ret
!= 0 && errno
== EINTR
);
327 fprintf(stderr
, "Couldn't statfs() memory path: %s\n",
332 #define HUGETLBFS_MAGIC 0x958458f6
334 if (fs
.f_type
!= HUGETLBFS_MAGIC
) {
335 /* Explicit mempath, but it's ordinary pages */
336 return getpagesize();
339 /* It's hugepage, return the huge page size */
344 * FIXME TOCTTOU: this iterates over memory backends' mem-path, which
345 * may or may not name the same files / on the same filesystem now as
346 * when we actually open and map them. Iterate over the file
347 * descriptors instead, and use qemu_fd_getpagesize().
349 static int find_max_supported_pagesize(Object
*obj
, void *opaque
)
352 long *hpsize_min
= opaque
;
354 if (object_dynamic_cast(obj
, TYPE_MEMORY_BACKEND
)) {
355 mem_path
= object_property_get_str(obj
, "mem-path", NULL
);
357 long hpsize
= gethugepagesize(mem_path
);
358 if (hpsize
< *hpsize_min
) {
359 *hpsize_min
= hpsize
;
362 *hpsize_min
= getpagesize();
369 static long getrampagesize(void)
371 long hpsize
= LONG_MAX
;
372 long mainrampagesize
;
376 mainrampagesize
= gethugepagesize(mem_path
);
378 mainrampagesize
= getpagesize();
381 /* it's possible we have memory-backend objects with
382 * hugepage-backed RAM. these may get mapped into system
383 * address space via -numa parameters or memory hotplug
384 * hooks. we want to take these into account, but we
385 * also want to make sure these supported hugepage
386 * sizes are applicable across the entire range of memory
387 * we may boot from, so we take the min across all
388 * backends, and assume normal pages in cases where a
389 * backend isn't backed by hugepages.
391 memdev_root
= object_resolve_path("/objects", NULL
);
393 object_child_foreach(memdev_root
, find_max_supported_pagesize
, &hpsize
);
395 if (hpsize
== LONG_MAX
) {
396 /* No additional memory regions found ==> Report main RAM page size */
397 return mainrampagesize
;
400 /* If NUMA is disabled or the NUMA nodes are not backed with a
401 * memory-backend, then there is at least one node using "normal" RAM,
402 * so if its page size is smaller we have got to report that size instead.
404 if (hpsize
> mainrampagesize
&&
405 (nb_numa_nodes
== 0 || numa_info
[0].node_memdev
== NULL
)) {
408 error_report("Huge page support disabled (n/a for main memory).");
411 return mainrampagesize
;
417 static bool kvm_valid_page_size(uint32_t flags
, long rampgsize
, uint32_t shift
)
419 if (!(flags
& KVM_PPC_PAGE_SIZES_REAL
)) {
423 return (1ul << shift
) <= rampgsize
;
426 static void kvm_fixup_page_sizes(PowerPCCPU
*cpu
)
428 static struct kvm_ppc_smmu_info smmu_info
;
429 static bool has_smmu_info
;
430 CPUPPCState
*env
= &cpu
->env
;
433 bool has_64k_pages
= false;
435 /* We only handle page sizes for 64-bit server guests for now */
436 if (!(env
->mmu_model
& POWERPC_MMU_64
)) {
440 /* Collect MMU info from kernel if not already */
441 if (!has_smmu_info
) {
442 kvm_get_smmu_info(cpu
, &smmu_info
);
443 has_smmu_info
= true;
446 rampagesize
= getrampagesize();
448 /* Convert to QEMU form */
449 memset(&env
->sps
, 0, sizeof(env
->sps
));
451 /* If we have HV KVM, we need to forbid CI large pages if our
452 * host page size is smaller than 64K.
454 if (smmu_info
.flags
& KVM_PPC_PAGE_SIZES_REAL
) {
455 env
->ci_large_pages
= getpagesize() >= 0x10000;
459 * XXX This loop should be an entry wide AND of the capabilities that
460 * the selected CPU has with the capabilities that KVM supports.
462 for (ik
= iq
= 0; ik
< KVM_PPC_PAGE_SIZES_MAX_SZ
; ik
++) {
463 struct ppc_one_seg_page_size
*qsps
= &env
->sps
.sps
[iq
];
464 struct kvm_ppc_one_seg_page_size
*ksps
= &smmu_info
.sps
[ik
];
466 if (!kvm_valid_page_size(smmu_info
.flags
, rampagesize
,
470 qsps
->page_shift
= ksps
->page_shift
;
471 qsps
->slb_enc
= ksps
->slb_enc
;
472 for (jk
= jq
= 0; jk
< KVM_PPC_PAGE_SIZES_MAX_SZ
; jk
++) {
473 if (!kvm_valid_page_size(smmu_info
.flags
, rampagesize
,
474 ksps
->enc
[jk
].page_shift
)) {
477 if (ksps
->enc
[jk
].page_shift
== 16) {
478 has_64k_pages
= true;
480 qsps
->enc
[jq
].page_shift
= ksps
->enc
[jk
].page_shift
;
481 qsps
->enc
[jq
].pte_enc
= ksps
->enc
[jk
].pte_enc
;
482 if (++jq
>= PPC_PAGE_SIZES_MAX_SZ
) {
486 if (++iq
>= PPC_PAGE_SIZES_MAX_SZ
) {
490 env
->slb_nr
= smmu_info
.slb_size
;
491 if (!(smmu_info
.flags
& KVM_PPC_1T_SEGMENTS
)) {
492 env
->mmu_model
&= ~POWERPC_MMU_1TSEG
;
494 if (!has_64k_pages
) {
495 env
->mmu_model
&= ~POWERPC_MMU_64K
;
498 #else /* defined (TARGET_PPC64) */
500 static inline void kvm_fixup_page_sizes(PowerPCCPU
*cpu
)
504 #endif /* !defined (TARGET_PPC64) */
506 unsigned long kvm_arch_vcpu_id(CPUState
*cpu
)
508 return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu
));
511 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
512 * book3s supports only 1 watchpoint, so array size
513 * of 4 is sufficient for now.
515 #define MAX_HW_BKPTS 4
517 static struct HWBreakpoint
{
520 } hw_debug_points
[MAX_HW_BKPTS
];
522 static CPUWatchpoint hw_watchpoint
;
524 /* Default there is no breakpoint and watchpoint supported */
525 static int max_hw_breakpoint
;
526 static int max_hw_watchpoint
;
527 static int nb_hw_breakpoint
;
528 static int nb_hw_watchpoint
;
530 static void kvmppc_hw_debug_points_init(CPUPPCState
*cenv
)
532 if (cenv
->excp_model
== POWERPC_EXCP_BOOKE
) {
533 max_hw_breakpoint
= 2;
534 max_hw_watchpoint
= 2;
537 if ((max_hw_breakpoint
+ max_hw_watchpoint
) > MAX_HW_BKPTS
) {
538 fprintf(stderr
, "Error initializing h/w breakpoints\n");
543 int kvm_arch_init_vcpu(CPUState
*cs
)
545 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
546 CPUPPCState
*cenv
= &cpu
->env
;
549 /* Gather server mmu info from KVM and update the CPU state */
550 kvm_fixup_page_sizes(cpu
);
552 /* Synchronize sregs with kvm */
553 ret
= kvm_arch_sync_sregs(cpu
);
555 if (ret
== -EINVAL
) {
556 error_report("Register sync failed... If you're using kvm-hv.ko,"
557 " only \"-cpu host\" is possible");
562 idle_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL
, kvm_kick_cpu
, cpu
);
564 /* Some targets support access to KVM's guest TLB. */
565 switch (cenv
->mmu_model
) {
566 case POWERPC_MMU_BOOKE206
:
567 ret
= kvm_booke206_tlb_init(cpu
);
573 kvm_get_one_reg(cs
, KVM_REG_PPC_DEBUG_INST
, &debug_inst_opcode
);
574 kvmppc_hw_debug_points_init(cenv
);
579 static void kvm_sw_tlb_put(PowerPCCPU
*cpu
)
581 CPUPPCState
*env
= &cpu
->env
;
582 CPUState
*cs
= CPU(cpu
);
583 struct kvm_dirty_tlb dirty_tlb
;
584 unsigned char *bitmap
;
587 if (!env
->kvm_sw_tlb
) {
591 bitmap
= g_malloc((env
->nb_tlb
+ 7) / 8);
592 memset(bitmap
, 0xFF, (env
->nb_tlb
+ 7) / 8);
594 dirty_tlb
.bitmap
= (uintptr_t)bitmap
;
595 dirty_tlb
.num_dirty
= env
->nb_tlb
;
597 ret
= kvm_vcpu_ioctl(cs
, KVM_DIRTY_TLB
, &dirty_tlb
);
599 fprintf(stderr
, "%s: KVM_DIRTY_TLB: %s\n",
600 __func__
, strerror(-ret
));
606 static void kvm_get_one_spr(CPUState
*cs
, uint64_t id
, int spr
)
608 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
609 CPUPPCState
*env
= &cpu
->env
;
614 struct kvm_one_reg reg
= {
616 .addr
= (uintptr_t) &val
,
620 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
622 trace_kvm_failed_spr_get(spr
, strerror(errno
));
624 switch (id
& KVM_REG_SIZE_MASK
) {
625 case KVM_REG_SIZE_U32
:
626 env
->spr
[spr
] = val
.u32
;
629 case KVM_REG_SIZE_U64
:
630 env
->spr
[spr
] = val
.u64
;
634 /* Don't handle this size yet */
640 static void kvm_put_one_spr(CPUState
*cs
, uint64_t id
, int spr
)
642 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
643 CPUPPCState
*env
= &cpu
->env
;
648 struct kvm_one_reg reg
= {
650 .addr
= (uintptr_t) &val
,
654 switch (id
& KVM_REG_SIZE_MASK
) {
655 case KVM_REG_SIZE_U32
:
656 val
.u32
= env
->spr
[spr
];
659 case KVM_REG_SIZE_U64
:
660 val
.u64
= env
->spr
[spr
];
664 /* Don't handle this size yet */
668 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
670 trace_kvm_failed_spr_set(spr
, strerror(errno
));
674 static int kvm_put_fp(CPUState
*cs
)
676 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
677 CPUPPCState
*env
= &cpu
->env
;
678 struct kvm_one_reg reg
;
682 if (env
->insns_flags
& PPC_FLOAT
) {
683 uint64_t fpscr
= env
->fpscr
;
684 bool vsx
= !!(env
->insns_flags2
& PPC2_VSX
);
686 reg
.id
= KVM_REG_PPC_FPSCR
;
687 reg
.addr
= (uintptr_t)&fpscr
;
688 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
690 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno
));
694 for (i
= 0; i
< 32; i
++) {
697 #ifdef HOST_WORDS_BIGENDIAN
698 vsr
[0] = float64_val(env
->fpr
[i
]);
699 vsr
[1] = env
->vsr
[i
];
701 vsr
[0] = env
->vsr
[i
];
702 vsr
[1] = float64_val(env
->fpr
[i
]);
704 reg
.addr
= (uintptr_t) &vsr
;
705 reg
.id
= vsx
? KVM_REG_PPC_VSR(i
) : KVM_REG_PPC_FPR(i
);
707 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
709 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx
? "VSR" : "FPR",
716 if (env
->insns_flags
& PPC_ALTIVEC
) {
717 reg
.id
= KVM_REG_PPC_VSCR
;
718 reg
.addr
= (uintptr_t)&env
->vscr
;
719 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
721 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno
));
725 for (i
= 0; i
< 32; i
++) {
726 reg
.id
= KVM_REG_PPC_VR(i
);
727 reg
.addr
= (uintptr_t)&env
->avr
[i
];
728 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
730 DPRINTF("Unable to set VR%d to KVM: %s\n", i
, strerror(errno
));
739 static int kvm_get_fp(CPUState
*cs
)
741 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
742 CPUPPCState
*env
= &cpu
->env
;
743 struct kvm_one_reg reg
;
747 if (env
->insns_flags
& PPC_FLOAT
) {
749 bool vsx
= !!(env
->insns_flags2
& PPC2_VSX
);
751 reg
.id
= KVM_REG_PPC_FPSCR
;
752 reg
.addr
= (uintptr_t)&fpscr
;
753 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
755 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno
));
761 for (i
= 0; i
< 32; i
++) {
764 reg
.addr
= (uintptr_t) &vsr
;
765 reg
.id
= vsx
? KVM_REG_PPC_VSR(i
) : KVM_REG_PPC_FPR(i
);
767 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
769 DPRINTF("Unable to get %s%d from KVM: %s\n",
770 vsx
? "VSR" : "FPR", i
, strerror(errno
));
773 #ifdef HOST_WORDS_BIGENDIAN
774 env
->fpr
[i
] = vsr
[0];
776 env
->vsr
[i
] = vsr
[1];
779 env
->fpr
[i
] = vsr
[1];
781 env
->vsr
[i
] = vsr
[0];
788 if (env
->insns_flags
& PPC_ALTIVEC
) {
789 reg
.id
= KVM_REG_PPC_VSCR
;
790 reg
.addr
= (uintptr_t)&env
->vscr
;
791 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
793 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno
));
797 for (i
= 0; i
< 32; i
++) {
798 reg
.id
= KVM_REG_PPC_VR(i
);
799 reg
.addr
= (uintptr_t)&env
->avr
[i
];
800 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
802 DPRINTF("Unable to get VR%d from KVM: %s\n",
812 #if defined(TARGET_PPC64)
813 static int kvm_get_vpa(CPUState
*cs
)
815 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
816 CPUPPCState
*env
= &cpu
->env
;
817 struct kvm_one_reg reg
;
820 reg
.id
= KVM_REG_PPC_VPA_ADDR
;
821 reg
.addr
= (uintptr_t)&env
->vpa_addr
;
822 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
824 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno
));
828 assert((uintptr_t)&env
->slb_shadow_size
829 == ((uintptr_t)&env
->slb_shadow_addr
+ 8));
830 reg
.id
= KVM_REG_PPC_VPA_SLB
;
831 reg
.addr
= (uintptr_t)&env
->slb_shadow_addr
;
832 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
834 DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
839 assert((uintptr_t)&env
->dtl_size
== ((uintptr_t)&env
->dtl_addr
+ 8));
840 reg
.id
= KVM_REG_PPC_VPA_DTL
;
841 reg
.addr
= (uintptr_t)&env
->dtl_addr
;
842 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
844 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
852 static int kvm_put_vpa(CPUState
*cs
)
854 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
855 CPUPPCState
*env
= &cpu
->env
;
856 struct kvm_one_reg reg
;
859 /* SLB shadow or DTL can't be registered unless a master VPA is
860 * registered. That means when restoring state, if a VPA *is*
861 * registered, we need to set that up first. If not, we need to
862 * deregister the others before deregistering the master VPA */
863 assert(env
->vpa_addr
|| !(env
->slb_shadow_addr
|| env
->dtl_addr
));
866 reg
.id
= KVM_REG_PPC_VPA_ADDR
;
867 reg
.addr
= (uintptr_t)&env
->vpa_addr
;
868 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
870 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno
));
875 assert((uintptr_t)&env
->slb_shadow_size
876 == ((uintptr_t)&env
->slb_shadow_addr
+ 8));
877 reg
.id
= KVM_REG_PPC_VPA_SLB
;
878 reg
.addr
= (uintptr_t)&env
->slb_shadow_addr
;
879 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
881 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno
));
885 assert((uintptr_t)&env
->dtl_size
== ((uintptr_t)&env
->dtl_addr
+ 8));
886 reg
.id
= KVM_REG_PPC_VPA_DTL
;
887 reg
.addr
= (uintptr_t)&env
->dtl_addr
;
888 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
890 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
895 if (!env
->vpa_addr
) {
896 reg
.id
= KVM_REG_PPC_VPA_ADDR
;
897 reg
.addr
= (uintptr_t)&env
->vpa_addr
;
898 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
900 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno
));
907 #endif /* TARGET_PPC64 */
909 int kvmppc_put_books_sregs(PowerPCCPU
*cpu
)
911 CPUPPCState
*env
= &cpu
->env
;
912 struct kvm_sregs sregs
;
915 sregs
.pvr
= env
->spr
[SPR_PVR
];
917 sregs
.u
.s
.sdr1
= env
->spr
[SPR_SDR1
];
921 for (i
= 0; i
< ARRAY_SIZE(env
->slb
); i
++) {
922 sregs
.u
.s
.ppc64
.slb
[i
].slbe
= env
->slb
[i
].esid
;
923 if (env
->slb
[i
].esid
& SLB_ESID_V
) {
924 sregs
.u
.s
.ppc64
.slb
[i
].slbe
|= i
;
926 sregs
.u
.s
.ppc64
.slb
[i
].slbv
= env
->slb
[i
].vsid
;
931 for (i
= 0; i
< 16; i
++) {
932 sregs
.u
.s
.ppc32
.sr
[i
] = env
->sr
[i
];
936 for (i
= 0; i
< 8; i
++) {
937 /* Beware. We have to swap upper and lower bits here */
938 sregs
.u
.s
.ppc32
.dbat
[i
] = ((uint64_t)env
->DBAT
[0][i
] << 32)
940 sregs
.u
.s
.ppc32
.ibat
[i
] = ((uint64_t)env
->IBAT
[0][i
] << 32)
944 return kvm_vcpu_ioctl(CPU(cpu
), KVM_SET_SREGS
, &sregs
);
947 int kvm_arch_put_registers(CPUState
*cs
, int level
)
949 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
950 CPUPPCState
*env
= &cpu
->env
;
951 struct kvm_regs regs
;
955 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_REGS
, ®s
);
962 regs
.xer
= cpu_read_xer(env
);
966 regs
.srr0
= env
->spr
[SPR_SRR0
];
967 regs
.srr1
= env
->spr
[SPR_SRR1
];
969 regs
.sprg0
= env
->spr
[SPR_SPRG0
];
970 regs
.sprg1
= env
->spr
[SPR_SPRG1
];
971 regs
.sprg2
= env
->spr
[SPR_SPRG2
];
972 regs
.sprg3
= env
->spr
[SPR_SPRG3
];
973 regs
.sprg4
= env
->spr
[SPR_SPRG4
];
974 regs
.sprg5
= env
->spr
[SPR_SPRG5
];
975 regs
.sprg6
= env
->spr
[SPR_SPRG6
];
976 regs
.sprg7
= env
->spr
[SPR_SPRG7
];
978 regs
.pid
= env
->spr
[SPR_BOOKE_PID
];
980 for (i
= 0;i
< 32; i
++)
981 regs
.gpr
[i
] = env
->gpr
[i
];
984 for (i
= 0; i
< 8; i
++) {
985 regs
.cr
|= (env
->crf
[i
] & 15) << (4 * (7 - i
));
988 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_REGS
, ®s
);
994 if (env
->tlb_dirty
) {
996 env
->tlb_dirty
= false;
999 if (cap_segstate
&& (level
>= KVM_PUT_RESET_STATE
)) {
1000 ret
= kvmppc_put_books_sregs(cpu
);
1006 if (cap_hior
&& (level
>= KVM_PUT_RESET_STATE
)) {
1007 kvm_put_one_spr(cs
, KVM_REG_PPC_HIOR
, SPR_HIOR
);
1013 /* We deliberately ignore errors here, for kernels which have
1014 * the ONE_REG calls, but don't support the specific
1015 * registers, there's a reasonable chance things will still
1016 * work, at least until we try to migrate. */
1017 for (i
= 0; i
< 1024; i
++) {
1018 uint64_t id
= env
->spr_cb
[i
].one_reg_id
;
1021 kvm_put_one_spr(cs
, id
, i
);
1027 for (i
= 0; i
< ARRAY_SIZE(env
->tm_gpr
); i
++) {
1028 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_GPR(i
), &env
->tm_gpr
[i
]);
1030 for (i
= 0; i
< ARRAY_SIZE(env
->tm_vsr
); i
++) {
1031 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_VSR(i
), &env
->tm_vsr
[i
]);
1033 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_CR
, &env
->tm_cr
);
1034 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_LR
, &env
->tm_lr
);
1035 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_CTR
, &env
->tm_ctr
);
1036 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_FPSCR
, &env
->tm_fpscr
);
1037 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_AMR
, &env
->tm_amr
);
1038 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_PPR
, &env
->tm_ppr
);
1039 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_VRSAVE
, &env
->tm_vrsave
);
1040 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_VSCR
, &env
->tm_vscr
);
1041 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_DSCR
, &env
->tm_dscr
);
1042 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_TAR
, &env
->tm_tar
);
1046 if (kvm_put_vpa(cs
) < 0) {
1047 DPRINTF("Warning: Unable to set VPA information to KVM\n");
1051 kvm_set_one_reg(cs
, KVM_REG_PPC_TB_OFFSET
, &env
->tb_env
->tb_offset
);
1052 #endif /* TARGET_PPC64 */
1058 static void kvm_sync_excp(CPUPPCState
*env
, int vector
, int ivor
)
1060 env
->excp_vectors
[vector
] = env
->spr
[ivor
] + env
->spr
[SPR_BOOKE_IVPR
];
1063 static int kvmppc_get_booke_sregs(PowerPCCPU
*cpu
)
1065 CPUPPCState
*env
= &cpu
->env
;
1066 struct kvm_sregs sregs
;
1069 ret
= kvm_vcpu_ioctl(CPU(cpu
), KVM_GET_SREGS
, &sregs
);
1074 if (sregs
.u
.e
.features
& KVM_SREGS_E_BASE
) {
1075 env
->spr
[SPR_BOOKE_CSRR0
] = sregs
.u
.e
.csrr0
;
1076 env
->spr
[SPR_BOOKE_CSRR1
] = sregs
.u
.e
.csrr1
;
1077 env
->spr
[SPR_BOOKE_ESR
] = sregs
.u
.e
.esr
;
1078 env
->spr
[SPR_BOOKE_DEAR
] = sregs
.u
.e
.dear
;
1079 env
->spr
[SPR_BOOKE_MCSR
] = sregs
.u
.e
.mcsr
;
1080 env
->spr
[SPR_BOOKE_TSR
] = sregs
.u
.e
.tsr
;
1081 env
->spr
[SPR_BOOKE_TCR
] = sregs
.u
.e
.tcr
;
1082 env
->spr
[SPR_DECR
] = sregs
.u
.e
.dec
;
1083 env
->spr
[SPR_TBL
] = sregs
.u
.e
.tb
& 0xffffffff;
1084 env
->spr
[SPR_TBU
] = sregs
.u
.e
.tb
>> 32;
1085 env
->spr
[SPR_VRSAVE
] = sregs
.u
.e
.vrsave
;
1088 if (sregs
.u
.e
.features
& KVM_SREGS_E_ARCH206
) {
1089 env
->spr
[SPR_BOOKE_PIR
] = sregs
.u
.e
.pir
;
1090 env
->spr
[SPR_BOOKE_MCSRR0
] = sregs
.u
.e
.mcsrr0
;
1091 env
->spr
[SPR_BOOKE_MCSRR1
] = sregs
.u
.e
.mcsrr1
;
1092 env
->spr
[SPR_BOOKE_DECAR
] = sregs
.u
.e
.decar
;
1093 env
->spr
[SPR_BOOKE_IVPR
] = sregs
.u
.e
.ivpr
;
1096 if (sregs
.u
.e
.features
& KVM_SREGS_E_64
) {
1097 env
->spr
[SPR_BOOKE_EPCR
] = sregs
.u
.e
.epcr
;
1100 if (sregs
.u
.e
.features
& KVM_SREGS_E_SPRG8
) {
1101 env
->spr
[SPR_BOOKE_SPRG8
] = sregs
.u
.e
.sprg8
;
1104 if (sregs
.u
.e
.features
& KVM_SREGS_E_IVOR
) {
1105 env
->spr
[SPR_BOOKE_IVOR0
] = sregs
.u
.e
.ivor_low
[0];
1106 kvm_sync_excp(env
, POWERPC_EXCP_CRITICAL
, SPR_BOOKE_IVOR0
);
1107 env
->spr
[SPR_BOOKE_IVOR1
] = sregs
.u
.e
.ivor_low
[1];
1108 kvm_sync_excp(env
, POWERPC_EXCP_MCHECK
, SPR_BOOKE_IVOR1
);
1109 env
->spr
[SPR_BOOKE_IVOR2
] = sregs
.u
.e
.ivor_low
[2];
1110 kvm_sync_excp(env
, POWERPC_EXCP_DSI
, SPR_BOOKE_IVOR2
);
1111 env
->spr
[SPR_BOOKE_IVOR3
] = sregs
.u
.e
.ivor_low
[3];
1112 kvm_sync_excp(env
, POWERPC_EXCP_ISI
, SPR_BOOKE_IVOR3
);
1113 env
->spr
[SPR_BOOKE_IVOR4
] = sregs
.u
.e
.ivor_low
[4];
1114 kvm_sync_excp(env
, POWERPC_EXCP_EXTERNAL
, SPR_BOOKE_IVOR4
);
1115 env
->spr
[SPR_BOOKE_IVOR5
] = sregs
.u
.e
.ivor_low
[5];
1116 kvm_sync_excp(env
, POWERPC_EXCP_ALIGN
, SPR_BOOKE_IVOR5
);
1117 env
->spr
[SPR_BOOKE_IVOR6
] = sregs
.u
.e
.ivor_low
[6];
1118 kvm_sync_excp(env
, POWERPC_EXCP_PROGRAM
, SPR_BOOKE_IVOR6
);
1119 env
->spr
[SPR_BOOKE_IVOR7
] = sregs
.u
.e
.ivor_low
[7];
1120 kvm_sync_excp(env
, POWERPC_EXCP_FPU
, SPR_BOOKE_IVOR7
);
1121 env
->spr
[SPR_BOOKE_IVOR8
] = sregs
.u
.e
.ivor_low
[8];
1122 kvm_sync_excp(env
, POWERPC_EXCP_SYSCALL
, SPR_BOOKE_IVOR8
);
1123 env
->spr
[SPR_BOOKE_IVOR9
] = sregs
.u
.e
.ivor_low
[9];
1124 kvm_sync_excp(env
, POWERPC_EXCP_APU
, SPR_BOOKE_IVOR9
);
1125 env
->spr
[SPR_BOOKE_IVOR10
] = sregs
.u
.e
.ivor_low
[10];
1126 kvm_sync_excp(env
, POWERPC_EXCP_DECR
, SPR_BOOKE_IVOR10
);
1127 env
->spr
[SPR_BOOKE_IVOR11
] = sregs
.u
.e
.ivor_low
[11];
1128 kvm_sync_excp(env
, POWERPC_EXCP_FIT
, SPR_BOOKE_IVOR11
);
1129 env
->spr
[SPR_BOOKE_IVOR12
] = sregs
.u
.e
.ivor_low
[12];
1130 kvm_sync_excp(env
, POWERPC_EXCP_WDT
, SPR_BOOKE_IVOR12
);
1131 env
->spr
[SPR_BOOKE_IVOR13
] = sregs
.u
.e
.ivor_low
[13];
1132 kvm_sync_excp(env
, POWERPC_EXCP_DTLB
, SPR_BOOKE_IVOR13
);
1133 env
->spr
[SPR_BOOKE_IVOR14
] = sregs
.u
.e
.ivor_low
[14];
1134 kvm_sync_excp(env
, POWERPC_EXCP_ITLB
, SPR_BOOKE_IVOR14
);
1135 env
->spr
[SPR_BOOKE_IVOR15
] = sregs
.u
.e
.ivor_low
[15];
1136 kvm_sync_excp(env
, POWERPC_EXCP_DEBUG
, SPR_BOOKE_IVOR15
);
1138 if (sregs
.u
.e
.features
& KVM_SREGS_E_SPE
) {
1139 env
->spr
[SPR_BOOKE_IVOR32
] = sregs
.u
.e
.ivor_high
[0];
1140 kvm_sync_excp(env
, POWERPC_EXCP_SPEU
, SPR_BOOKE_IVOR32
);
1141 env
->spr
[SPR_BOOKE_IVOR33
] = sregs
.u
.e
.ivor_high
[1];
1142 kvm_sync_excp(env
, POWERPC_EXCP_EFPDI
, SPR_BOOKE_IVOR33
);
1143 env
->spr
[SPR_BOOKE_IVOR34
] = sregs
.u
.e
.ivor_high
[2];
1144 kvm_sync_excp(env
, POWERPC_EXCP_EFPRI
, SPR_BOOKE_IVOR34
);
1147 if (sregs
.u
.e
.features
& KVM_SREGS_E_PM
) {
1148 env
->spr
[SPR_BOOKE_IVOR35
] = sregs
.u
.e
.ivor_high
[3];
1149 kvm_sync_excp(env
, POWERPC_EXCP_EPERFM
, SPR_BOOKE_IVOR35
);
1152 if (sregs
.u
.e
.features
& KVM_SREGS_E_PC
) {
1153 env
->spr
[SPR_BOOKE_IVOR36
] = sregs
.u
.e
.ivor_high
[4];
1154 kvm_sync_excp(env
, POWERPC_EXCP_DOORI
, SPR_BOOKE_IVOR36
);
1155 env
->spr
[SPR_BOOKE_IVOR37
] = sregs
.u
.e
.ivor_high
[5];
1156 kvm_sync_excp(env
, POWERPC_EXCP_DOORCI
, SPR_BOOKE_IVOR37
);
1160 if (sregs
.u
.e
.features
& KVM_SREGS_E_ARCH206_MMU
) {
1161 env
->spr
[SPR_BOOKE_MAS0
] = sregs
.u
.e
.mas0
;
1162 env
->spr
[SPR_BOOKE_MAS1
] = sregs
.u
.e
.mas1
;
1163 env
->spr
[SPR_BOOKE_MAS2
] = sregs
.u
.e
.mas2
;
1164 env
->spr
[SPR_BOOKE_MAS3
] = sregs
.u
.e
.mas7_3
& 0xffffffff;
1165 env
->spr
[SPR_BOOKE_MAS4
] = sregs
.u
.e
.mas4
;
1166 env
->spr
[SPR_BOOKE_MAS6
] = sregs
.u
.e
.mas6
;
1167 env
->spr
[SPR_BOOKE_MAS7
] = sregs
.u
.e
.mas7_3
>> 32;
1168 env
->spr
[SPR_MMUCFG
] = sregs
.u
.e
.mmucfg
;
1169 env
->spr
[SPR_BOOKE_TLB0CFG
] = sregs
.u
.e
.tlbcfg
[0];
1170 env
->spr
[SPR_BOOKE_TLB1CFG
] = sregs
.u
.e
.tlbcfg
[1];
1173 if (sregs
.u
.e
.features
& KVM_SREGS_EXP
) {
1174 env
->spr
[SPR_BOOKE_EPR
] = sregs
.u
.e
.epr
;
1177 if (sregs
.u
.e
.features
& KVM_SREGS_E_PD
) {
1178 env
->spr
[SPR_BOOKE_EPLC
] = sregs
.u
.e
.eplc
;
1179 env
->spr
[SPR_BOOKE_EPSC
] = sregs
.u
.e
.epsc
;
1182 if (sregs
.u
.e
.impl_id
== KVM_SREGS_E_IMPL_FSL
) {
1183 env
->spr
[SPR_E500_SVR
] = sregs
.u
.e
.impl
.fsl
.svr
;
1184 env
->spr
[SPR_Exxx_MCAR
] = sregs
.u
.e
.impl
.fsl
.mcar
;
1185 env
->spr
[SPR_HID0
] = sregs
.u
.e
.impl
.fsl
.hid0
;
1187 if (sregs
.u
.e
.impl
.fsl
.features
& KVM_SREGS_E_FSL_PIDn
) {
1188 env
->spr
[SPR_BOOKE_PID1
] = sregs
.u
.e
.impl
.fsl
.pid1
;
1189 env
->spr
[SPR_BOOKE_PID2
] = sregs
.u
.e
.impl
.fsl
.pid2
;
1196 static int kvmppc_get_books_sregs(PowerPCCPU
*cpu
)
1198 CPUPPCState
*env
= &cpu
->env
;
1199 struct kvm_sregs sregs
;
1203 ret
= kvm_vcpu_ioctl(CPU(cpu
), KVM_GET_SREGS
, &sregs
);
1208 if (!env
->external_htab
) {
1209 ppc_store_sdr1(env
, sregs
.u
.s
.sdr1
);
1215 * The packed SLB array we get from KVM_GET_SREGS only contains
1216 * information about valid entries. So we flush our internal copy
1217 * to get rid of stale ones, then put all valid SLB entries back
1220 memset(env
->slb
, 0, sizeof(env
->slb
));
1221 for (i
= 0; i
< ARRAY_SIZE(env
->slb
); i
++) {
1222 target_ulong rb
= sregs
.u
.s
.ppc64
.slb
[i
].slbe
;
1223 target_ulong rs
= sregs
.u
.s
.ppc64
.slb
[i
].slbv
;
1225 * Only restore valid entries
1227 if (rb
& SLB_ESID_V
) {
1228 ppc_store_slb(cpu
, rb
& 0xfff, rb
& ~0xfffULL
, rs
);
1234 for (i
= 0; i
< 16; i
++) {
1235 env
->sr
[i
] = sregs
.u
.s
.ppc32
.sr
[i
];
1239 for (i
= 0; i
< 8; i
++) {
1240 env
->DBAT
[0][i
] = sregs
.u
.s
.ppc32
.dbat
[i
] & 0xffffffff;
1241 env
->DBAT
[1][i
] = sregs
.u
.s
.ppc32
.dbat
[i
] >> 32;
1242 env
->IBAT
[0][i
] = sregs
.u
.s
.ppc32
.ibat
[i
] & 0xffffffff;
1243 env
->IBAT
[1][i
] = sregs
.u
.s
.ppc32
.ibat
[i
] >> 32;
1249 int kvm_arch_get_registers(CPUState
*cs
)
1251 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
1252 CPUPPCState
*env
= &cpu
->env
;
1253 struct kvm_regs regs
;
1257 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_REGS
, ®s
);
1262 for (i
= 7; i
>= 0; i
--) {
1263 env
->crf
[i
] = cr
& 15;
1267 env
->ctr
= regs
.ctr
;
1269 cpu_write_xer(env
, regs
.xer
);
1270 env
->msr
= regs
.msr
;
1273 env
->spr
[SPR_SRR0
] = regs
.srr0
;
1274 env
->spr
[SPR_SRR1
] = regs
.srr1
;
1276 env
->spr
[SPR_SPRG0
] = regs
.sprg0
;
1277 env
->spr
[SPR_SPRG1
] = regs
.sprg1
;
1278 env
->spr
[SPR_SPRG2
] = regs
.sprg2
;
1279 env
->spr
[SPR_SPRG3
] = regs
.sprg3
;
1280 env
->spr
[SPR_SPRG4
] = regs
.sprg4
;
1281 env
->spr
[SPR_SPRG5
] = regs
.sprg5
;
1282 env
->spr
[SPR_SPRG6
] = regs
.sprg6
;
1283 env
->spr
[SPR_SPRG7
] = regs
.sprg7
;
1285 env
->spr
[SPR_BOOKE_PID
] = regs
.pid
;
1287 for (i
= 0;i
< 32; i
++)
1288 env
->gpr
[i
] = regs
.gpr
[i
];
1292 if (cap_booke_sregs
) {
1293 ret
= kvmppc_get_booke_sregs(cpu
);
1300 ret
= kvmppc_get_books_sregs(cpu
);
1307 kvm_get_one_spr(cs
, KVM_REG_PPC_HIOR
, SPR_HIOR
);
1313 /* We deliberately ignore errors here, for kernels which have
1314 * the ONE_REG calls, but don't support the specific
1315 * registers, there's a reasonable chance things will still
1316 * work, at least until we try to migrate. */
1317 for (i
= 0; i
< 1024; i
++) {
1318 uint64_t id
= env
->spr_cb
[i
].one_reg_id
;
1321 kvm_get_one_spr(cs
, id
, i
);
1327 for (i
= 0; i
< ARRAY_SIZE(env
->tm_gpr
); i
++) {
1328 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_GPR(i
), &env
->tm_gpr
[i
]);
1330 for (i
= 0; i
< ARRAY_SIZE(env
->tm_vsr
); i
++) {
1331 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_VSR(i
), &env
->tm_vsr
[i
]);
1333 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_CR
, &env
->tm_cr
);
1334 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_LR
, &env
->tm_lr
);
1335 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_CTR
, &env
->tm_ctr
);
1336 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_FPSCR
, &env
->tm_fpscr
);
1337 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_AMR
, &env
->tm_amr
);
1338 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_PPR
, &env
->tm_ppr
);
1339 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_VRSAVE
, &env
->tm_vrsave
);
1340 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_VSCR
, &env
->tm_vscr
);
1341 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_DSCR
, &env
->tm_dscr
);
1342 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_TAR
, &env
->tm_tar
);
1346 if (kvm_get_vpa(cs
) < 0) {
1347 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1351 kvm_get_one_reg(cs
, KVM_REG_PPC_TB_OFFSET
, &env
->tb_env
->tb_offset
);
1358 int kvmppc_set_interrupt(PowerPCCPU
*cpu
, int irq
, int level
)
1360 unsigned virq
= level
? KVM_INTERRUPT_SET_LEVEL
: KVM_INTERRUPT_UNSET
;
1362 if (irq
!= PPC_INTERRUPT_EXT
) {
1366 if (!kvm_enabled() || !cap_interrupt_unset
|| !cap_interrupt_level
) {
1370 kvm_vcpu_ioctl(CPU(cpu
), KVM_INTERRUPT
, &virq
);
1375 #if defined(TARGET_PPCEMB)
1376 #define PPC_INPUT_INT PPC40x_INPUT_INT
1377 #elif defined(TARGET_PPC64)
1378 #define PPC_INPUT_INT PPC970_INPUT_INT
1380 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1383 void kvm_arch_pre_run(CPUState
*cs
, struct kvm_run
*run
)
1385 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
1386 CPUPPCState
*env
= &cpu
->env
;
1390 qemu_mutex_lock_iothread();
1392 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1393 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1394 if (!cap_interrupt_level
&&
1395 run
->ready_for_interrupt_injection
&&
1396 (cs
->interrupt_request
& CPU_INTERRUPT_HARD
) &&
1397 (env
->irq_input_state
& (1<<PPC_INPUT_INT
)))
1399 /* For now KVM disregards the 'irq' argument. However, in the
1400 * future KVM could cache it in-kernel to avoid a heavyweight exit
1401 * when reading the UIC.
1403 irq
= KVM_INTERRUPT_SET
;
1405 DPRINTF("injected interrupt %d\n", irq
);
1406 r
= kvm_vcpu_ioctl(cs
, KVM_INTERRUPT
, &irq
);
1408 printf("cpu %d fail inject %x\n", cs
->cpu_index
, irq
);
1411 /* Always wake up soon in case the interrupt was level based */
1412 timer_mod(idle_timer
, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) +
1413 (NANOSECONDS_PER_SECOND
/ 50));
1416 /* We don't know if there are more interrupts pending after this. However,
1417 * the guest will return to userspace in the course of handling this one
1418 * anyways, so we will get a chance to deliver the rest. */
1420 qemu_mutex_unlock_iothread();
1423 MemTxAttrs
kvm_arch_post_run(CPUState
*cs
, struct kvm_run
*run
)
1425 return MEMTXATTRS_UNSPECIFIED
;
1428 int kvm_arch_process_async_events(CPUState
*cs
)
1433 static int kvmppc_handle_halt(PowerPCCPU
*cpu
)
1435 CPUState
*cs
= CPU(cpu
);
1436 CPUPPCState
*env
= &cpu
->env
;
1438 if (!(cs
->interrupt_request
& CPU_INTERRUPT_HARD
) && (msr_ee
)) {
1440 cs
->exception_index
= EXCP_HLT
;
1446 /* map dcr access to existing qemu dcr emulation */
1447 static int kvmppc_handle_dcr_read(CPUPPCState
*env
, uint32_t dcrn
, uint32_t *data
)
1449 if (ppc_dcr_read(env
->dcr_env
, dcrn
, data
) < 0)
1450 fprintf(stderr
, "Read to unhandled DCR (0x%x)\n", dcrn
);
1455 static int kvmppc_handle_dcr_write(CPUPPCState
*env
, uint32_t dcrn
, uint32_t data
)
1457 if (ppc_dcr_write(env
->dcr_env
, dcrn
, data
) < 0)
1458 fprintf(stderr
, "Write to unhandled DCR (0x%x)\n", dcrn
);
1463 int kvm_arch_insert_sw_breakpoint(CPUState
*cs
, struct kvm_sw_breakpoint
*bp
)
1465 /* Mixed endian case is not handled */
1466 uint32_t sc
= debug_inst_opcode
;
1468 if (cpu_memory_rw_debug(cs
, bp
->pc
, (uint8_t *)&bp
->saved_insn
,
1470 cpu_memory_rw_debug(cs
, bp
->pc
, (uint8_t *)&sc
, sizeof(sc
), 1)) {
1477 int kvm_arch_remove_sw_breakpoint(CPUState
*cs
, struct kvm_sw_breakpoint
*bp
)
1481 if (cpu_memory_rw_debug(cs
, bp
->pc
, (uint8_t *)&sc
, sizeof(sc
), 0) ||
1482 sc
!= debug_inst_opcode
||
1483 cpu_memory_rw_debug(cs
, bp
->pc
, (uint8_t *)&bp
->saved_insn
,
1491 static int find_hw_breakpoint(target_ulong addr
, int type
)
1495 assert((nb_hw_breakpoint
+ nb_hw_watchpoint
)
1496 <= ARRAY_SIZE(hw_debug_points
));
1498 for (n
= 0; n
< nb_hw_breakpoint
+ nb_hw_watchpoint
; n
++) {
1499 if (hw_debug_points
[n
].addr
== addr
&&
1500 hw_debug_points
[n
].type
== type
) {
1508 static int find_hw_watchpoint(target_ulong addr
, int *flag
)
1512 n
= find_hw_breakpoint(addr
, GDB_WATCHPOINT_ACCESS
);
1514 *flag
= BP_MEM_ACCESS
;
1518 n
= find_hw_breakpoint(addr
, GDB_WATCHPOINT_WRITE
);
1520 *flag
= BP_MEM_WRITE
;
1524 n
= find_hw_breakpoint(addr
, GDB_WATCHPOINT_READ
);
1526 *flag
= BP_MEM_READ
;
1533 int kvm_arch_insert_hw_breakpoint(target_ulong addr
,
1534 target_ulong len
, int type
)
1536 if ((nb_hw_breakpoint
+ nb_hw_watchpoint
) >= ARRAY_SIZE(hw_debug_points
)) {
1540 hw_debug_points
[nb_hw_breakpoint
+ nb_hw_watchpoint
].addr
= addr
;
1541 hw_debug_points
[nb_hw_breakpoint
+ nb_hw_watchpoint
].type
= type
;
1544 case GDB_BREAKPOINT_HW
:
1545 if (nb_hw_breakpoint
>= max_hw_breakpoint
) {
1549 if (find_hw_breakpoint(addr
, type
) >= 0) {
1556 case GDB_WATCHPOINT_WRITE
:
1557 case GDB_WATCHPOINT_READ
:
1558 case GDB_WATCHPOINT_ACCESS
:
1559 if (nb_hw_watchpoint
>= max_hw_watchpoint
) {
1563 if (find_hw_breakpoint(addr
, type
) >= 0) {
1577 int kvm_arch_remove_hw_breakpoint(target_ulong addr
,
1578 target_ulong len
, int type
)
1582 n
= find_hw_breakpoint(addr
, type
);
1588 case GDB_BREAKPOINT_HW
:
1592 case GDB_WATCHPOINT_WRITE
:
1593 case GDB_WATCHPOINT_READ
:
1594 case GDB_WATCHPOINT_ACCESS
:
1601 hw_debug_points
[n
] = hw_debug_points
[nb_hw_breakpoint
+ nb_hw_watchpoint
];
1606 void kvm_arch_remove_all_hw_breakpoints(void)
1608 nb_hw_breakpoint
= nb_hw_watchpoint
= 0;
1611 void kvm_arch_update_guest_debug(CPUState
*cs
, struct kvm_guest_debug
*dbg
)
1615 /* Software Breakpoint updates */
1616 if (kvm_sw_breakpoints_active(cs
)) {
1617 dbg
->control
|= KVM_GUESTDBG_ENABLE
| KVM_GUESTDBG_USE_SW_BP
;
1620 assert((nb_hw_breakpoint
+ nb_hw_watchpoint
)
1621 <= ARRAY_SIZE(hw_debug_points
));
1622 assert((nb_hw_breakpoint
+ nb_hw_watchpoint
) <= ARRAY_SIZE(dbg
->arch
.bp
));
1624 if (nb_hw_breakpoint
+ nb_hw_watchpoint
> 0) {
1625 dbg
->control
|= KVM_GUESTDBG_ENABLE
| KVM_GUESTDBG_USE_HW_BP
;
1626 memset(dbg
->arch
.bp
, 0, sizeof(dbg
->arch
.bp
));
1627 for (n
= 0; n
< nb_hw_breakpoint
+ nb_hw_watchpoint
; n
++) {
1628 switch (hw_debug_points
[n
].type
) {
1629 case GDB_BREAKPOINT_HW
:
1630 dbg
->arch
.bp
[n
].type
= KVMPPC_DEBUG_BREAKPOINT
;
1632 case GDB_WATCHPOINT_WRITE
:
1633 dbg
->arch
.bp
[n
].type
= KVMPPC_DEBUG_WATCH_WRITE
;
1635 case GDB_WATCHPOINT_READ
:
1636 dbg
->arch
.bp
[n
].type
= KVMPPC_DEBUG_WATCH_READ
;
1638 case GDB_WATCHPOINT_ACCESS
:
1639 dbg
->arch
.bp
[n
].type
= KVMPPC_DEBUG_WATCH_WRITE
|
1640 KVMPPC_DEBUG_WATCH_READ
;
1643 cpu_abort(cs
, "Unsupported breakpoint type\n");
1645 dbg
->arch
.bp
[n
].addr
= hw_debug_points
[n
].addr
;
1650 static int kvm_handle_debug(PowerPCCPU
*cpu
, struct kvm_run
*run
)
1652 CPUState
*cs
= CPU(cpu
);
1653 CPUPPCState
*env
= &cpu
->env
;
1654 struct kvm_debug_exit_arch
*arch_info
= &run
->debug
.arch
;
1659 if (cs
->singlestep_enabled
) {
1661 } else if (arch_info
->status
) {
1662 if (nb_hw_breakpoint
+ nb_hw_watchpoint
> 0) {
1663 if (arch_info
->status
& KVMPPC_DEBUG_BREAKPOINT
) {
1664 n
= find_hw_breakpoint(arch_info
->address
, GDB_BREAKPOINT_HW
);
1668 } else if (arch_info
->status
& (KVMPPC_DEBUG_WATCH_READ
|
1669 KVMPPC_DEBUG_WATCH_WRITE
)) {
1670 n
= find_hw_watchpoint(arch_info
->address
, &flag
);
1673 cs
->watchpoint_hit
= &hw_watchpoint
;
1674 hw_watchpoint
.vaddr
= hw_debug_points
[n
].addr
;
1675 hw_watchpoint
.flags
= flag
;
1679 } else if (kvm_find_sw_breakpoint(cs
, arch_info
->address
)) {
1682 /* QEMU is not able to handle debug exception, so inject
1683 * program exception to guest;
1684 * Yes program exception NOT debug exception !!
1685 * When QEMU is using debug resources then debug exception must
1686 * be always set. To achieve this we set MSR_DE and also set
1687 * MSRP_DEP so guest cannot change MSR_DE.
1688 * When emulating debug resource for guest we want guest
1689 * to control MSR_DE (enable/disable debug interrupt on need).
1690 * Supporting both configurations are NOT possible.
1691 * So the result is that we cannot share debug resources
1692 * between QEMU and Guest on BOOKE architecture.
1693 * In the current design QEMU gets the priority over guest,
1694 * this means that if QEMU is using debug resources then guest
1696 * For software breakpoint QEMU uses a privileged instruction;
1697 * So there cannot be any reason that we are here for guest
1698 * set debug exception, only possibility is guest executed a
1699 * privileged / illegal instruction and that's why we are
1700 * injecting a program interrupt.
1703 cpu_synchronize_state(cs
);
1704 /* env->nip is PC, so increment this by 4 to use
1705 * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1708 cs
->exception_index
= POWERPC_EXCP_PROGRAM
;
1709 env
->error_code
= POWERPC_EXCP_INVAL
;
1710 ppc_cpu_do_interrupt(cs
);
1716 int kvm_arch_handle_exit(CPUState
*cs
, struct kvm_run
*run
)
1718 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
1719 CPUPPCState
*env
= &cpu
->env
;
1722 qemu_mutex_lock_iothread();
1724 switch (run
->exit_reason
) {
1726 if (run
->dcr
.is_write
) {
1727 DPRINTF("handle dcr write\n");
1728 ret
= kvmppc_handle_dcr_write(env
, run
->dcr
.dcrn
, run
->dcr
.data
);
1730 DPRINTF("handle dcr read\n");
1731 ret
= kvmppc_handle_dcr_read(env
, run
->dcr
.dcrn
, &run
->dcr
.data
);
1735 DPRINTF("handle halt\n");
1736 ret
= kvmppc_handle_halt(cpu
);
1738 #if defined(TARGET_PPC64)
1739 case KVM_EXIT_PAPR_HCALL
:
1740 DPRINTF("handle PAPR hypercall\n");
1741 run
->papr_hcall
.ret
= spapr_hypercall(cpu
,
1743 run
->papr_hcall
.args
);
1748 DPRINTF("handle epr\n");
1749 run
->epr
.epr
= ldl_phys(cs
->as
, env
->mpic_iack
);
1752 case KVM_EXIT_WATCHDOG
:
1753 DPRINTF("handle watchdog expiry\n");
1754 watchdog_perform_action();
1758 case KVM_EXIT_DEBUG
:
1759 DPRINTF("handle debug exception\n");
1760 if (kvm_handle_debug(cpu
, run
)) {
1764 /* re-enter, this exception was guest-internal */
1769 fprintf(stderr
, "KVM: unknown exit reason %d\n", run
->exit_reason
);
1774 qemu_mutex_unlock_iothread();
1778 int kvmppc_or_tsr_bits(PowerPCCPU
*cpu
, uint32_t tsr_bits
)
1780 CPUState
*cs
= CPU(cpu
);
1781 uint32_t bits
= tsr_bits
;
1782 struct kvm_one_reg reg
= {
1783 .id
= KVM_REG_PPC_OR_TSR
,
1784 .addr
= (uintptr_t) &bits
,
1787 return kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
1790 int kvmppc_clear_tsr_bits(PowerPCCPU
*cpu
, uint32_t tsr_bits
)
1793 CPUState
*cs
= CPU(cpu
);
1794 uint32_t bits
= tsr_bits
;
1795 struct kvm_one_reg reg
= {
1796 .id
= KVM_REG_PPC_CLEAR_TSR
,
1797 .addr
= (uintptr_t) &bits
,
1800 return kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
1803 int kvmppc_set_tcr(PowerPCCPU
*cpu
)
1805 CPUState
*cs
= CPU(cpu
);
1806 CPUPPCState
*env
= &cpu
->env
;
1807 uint32_t tcr
= env
->spr
[SPR_BOOKE_TCR
];
1809 struct kvm_one_reg reg
= {
1810 .id
= KVM_REG_PPC_TCR
,
1811 .addr
= (uintptr_t) &tcr
,
1814 return kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
1817 int kvmppc_booke_watchdog_enable(PowerPCCPU
*cpu
)
1819 CPUState
*cs
= CPU(cpu
);
1822 if (!kvm_enabled()) {
1826 if (!cap_ppc_watchdog
) {
1827 printf("warning: KVM does not support watchdog");
1831 ret
= kvm_vcpu_enable_cap(cs
, KVM_CAP_PPC_BOOKE_WATCHDOG
, 0);
1833 fprintf(stderr
, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1834 __func__
, strerror(-ret
));
1841 static int read_cpuinfo(const char *field
, char *value
, int len
)
1845 int field_len
= strlen(field
);
1848 f
= fopen("/proc/cpuinfo", "r");
1854 if (!fgets(line
, sizeof(line
), f
)) {
1857 if (!strncmp(line
, field
, field_len
)) {
1858 pstrcpy(value
, len
, line
);
1869 uint32_t kvmppc_get_tbfreq(void)
1873 uint32_t retval
= NANOSECONDS_PER_SECOND
;
1875 if (read_cpuinfo("timebase", line
, sizeof(line
))) {
1879 if (!(ns
= strchr(line
, ':'))) {
1888 bool kvmppc_get_host_serial(char **value
)
1890 return g_file_get_contents("/proc/device-tree/system-id", value
, NULL
,
1894 bool kvmppc_get_host_model(char **value
)
1896 return g_file_get_contents("/proc/device-tree/model", value
, NULL
, NULL
);
1899 /* Try to find a device tree node for a CPU with clock-frequency property */
1900 static int kvmppc_find_cpu_dt(char *buf
, int buf_len
)
1902 struct dirent
*dirp
;
1905 if ((dp
= opendir(PROC_DEVTREE_CPU
)) == NULL
) {
1906 printf("Can't open directory " PROC_DEVTREE_CPU
"\n");
1911 while ((dirp
= readdir(dp
)) != NULL
) {
1913 snprintf(buf
, buf_len
, "%s%s/clock-frequency", PROC_DEVTREE_CPU
,
1915 f
= fopen(buf
, "r");
1917 snprintf(buf
, buf_len
, "%s%s", PROC_DEVTREE_CPU
, dirp
->d_name
);
1924 if (buf
[0] == '\0') {
1925 printf("Unknown host!\n");
1932 static uint64_t kvmppc_read_int_dt(const char *filename
)
1941 f
= fopen(filename
, "rb");
1946 len
= fread(&u
, 1, sizeof(u
), f
);
1950 /* property is a 32-bit quantity */
1951 return be32_to_cpu(u
.v32
);
1953 return be64_to_cpu(u
.v64
);
1959 /* Read a CPU node property from the host device tree that's a single
1960 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1961 * (can't find or open the property, or doesn't understand the
1963 static uint64_t kvmppc_read_int_cpu_dt(const char *propname
)
1965 char buf
[PATH_MAX
], *tmp
;
1968 if (kvmppc_find_cpu_dt(buf
, sizeof(buf
))) {
1972 tmp
= g_strdup_printf("%s/%s", buf
, propname
);
1973 val
= kvmppc_read_int_dt(tmp
);
1979 uint64_t kvmppc_get_clockfreq(void)
1981 return kvmppc_read_int_cpu_dt("clock-frequency");
1984 uint32_t kvmppc_get_vmx(void)
1986 return kvmppc_read_int_cpu_dt("ibm,vmx");
1989 uint32_t kvmppc_get_dfp(void)
1991 return kvmppc_read_int_cpu_dt("ibm,dfp");
1994 static int kvmppc_get_pvinfo(CPUPPCState
*env
, struct kvm_ppc_pvinfo
*pvinfo
)
1996 PowerPCCPU
*cpu
= ppc_env_get_cpu(env
);
1997 CPUState
*cs
= CPU(cpu
);
1999 if (kvm_vm_check_extension(cs
->kvm_state
, KVM_CAP_PPC_GET_PVINFO
) &&
2000 !kvm_vm_ioctl(cs
->kvm_state
, KVM_PPC_GET_PVINFO
, pvinfo
)) {
2007 int kvmppc_get_hasidle(CPUPPCState
*env
)
2009 struct kvm_ppc_pvinfo pvinfo
;
2011 if (!kvmppc_get_pvinfo(env
, &pvinfo
) &&
2012 (pvinfo
.flags
& KVM_PPC_PVINFO_FLAGS_EV_IDLE
)) {
2019 int kvmppc_get_hypercall(CPUPPCState
*env
, uint8_t *buf
, int buf_len
)
2021 uint32_t *hc
= (uint32_t*)buf
;
2022 struct kvm_ppc_pvinfo pvinfo
;
2024 if (!kvmppc_get_pvinfo(env
, &pvinfo
)) {
2025 memcpy(buf
, pvinfo
.hcall
, buf_len
);
2030 * Fallback to always fail hypercalls regardless of endianness:
2032 * tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
2034 * b .+8 (becomes nop in wrong endian)
2035 * bswap32(li r3, -1)
2038 hc
[0] = cpu_to_be32(0x08000048);
2039 hc
[1] = cpu_to_be32(0x3860ffff);
2040 hc
[2] = cpu_to_be32(0x48000008);
2041 hc
[3] = cpu_to_be32(bswap32(0x3860ffff));
2046 static inline int kvmppc_enable_hcall(KVMState
*s
, target_ulong hcall
)
2048 return kvm_vm_enable_cap(s
, KVM_CAP_PPC_ENABLE_HCALL
, 0, hcall
, 1);
2051 void kvmppc_enable_logical_ci_hcalls(void)
2054 * FIXME: it would be nice if we could detect the cases where
2055 * we're using a device which requires the in kernel
2056 * implementation of these hcalls, but the kernel lacks them and
2057 * produce a warning.
2059 kvmppc_enable_hcall(kvm_state
, H_LOGICAL_CI_LOAD
);
2060 kvmppc_enable_hcall(kvm_state
, H_LOGICAL_CI_STORE
);
2063 void kvmppc_enable_set_mode_hcall(void)
2065 kvmppc_enable_hcall(kvm_state
, H_SET_MODE
);
2068 void kvmppc_enable_clear_ref_mod_hcalls(void)
2070 kvmppc_enable_hcall(kvm_state
, H_CLEAR_REF
);
2071 kvmppc_enable_hcall(kvm_state
, H_CLEAR_MOD
);
2074 void kvmppc_set_papr(PowerPCCPU
*cpu
)
2076 CPUState
*cs
= CPU(cpu
);
2079 ret
= kvm_vcpu_enable_cap(cs
, KVM_CAP_PPC_PAPR
, 0);
2081 error_report("This vCPU type or KVM version does not support PAPR");
2085 /* Update the capability flag so we sync the right information
2090 int kvmppc_set_compat(PowerPCCPU
*cpu
, uint32_t cpu_version
)
2092 return kvm_set_one_reg(CPU(cpu
), KVM_REG_PPC_ARCH_COMPAT
, &cpu_version
);
2095 void kvmppc_set_mpic_proxy(PowerPCCPU
*cpu
, int mpic_proxy
)
2097 CPUState
*cs
= CPU(cpu
);
2100 ret
= kvm_vcpu_enable_cap(cs
, KVM_CAP_PPC_EPR
, 0, mpic_proxy
);
2101 if (ret
&& mpic_proxy
) {
2102 error_report("This KVM version does not support EPR");
2107 int kvmppc_smt_threads(void)
2109 return cap_ppc_smt
? cap_ppc_smt
: 1;
2113 off_t
kvmppc_alloc_rma(void **rma
)
2117 struct kvm_allocate_rma ret
;
2119 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2120 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2121 * not necessary on this hardware
2122 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2124 * FIXME: We should allow the user to force contiguous RMA
2125 * allocation in the cap_ppc_rma==1 case.
2127 if (cap_ppc_rma
< 2) {
2131 fd
= kvm_vm_ioctl(kvm_state
, KVM_ALLOCATE_RMA
, &ret
);
2133 fprintf(stderr
, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2138 size
= MIN(ret
.rma_size
, 256ul << 20);
2140 *rma
= mmap(NULL
, size
, PROT_READ
|PROT_WRITE
, MAP_SHARED
, fd
, 0);
2141 if (*rma
== MAP_FAILED
) {
2142 fprintf(stderr
, "KVM: Error mapping RMA: %s\n", strerror(errno
));
2149 uint64_t kvmppc_rma_size(uint64_t current_size
, unsigned int hash_shift
)
2151 struct kvm_ppc_smmu_info info
;
2152 long rampagesize
, best_page_shift
;
2155 if (cap_ppc_rma
>= 2) {
2156 return current_size
;
2159 /* Find the largest hardware supported page size that's less than
2160 * or equal to the (logical) backing page size of guest RAM */
2161 kvm_get_smmu_info(POWERPC_CPU(first_cpu
), &info
);
2162 rampagesize
= getrampagesize();
2163 best_page_shift
= 0;
2165 for (i
= 0; i
< KVM_PPC_PAGE_SIZES_MAX_SZ
; i
++) {
2166 struct kvm_ppc_one_seg_page_size
*sps
= &info
.sps
[i
];
2168 if (!sps
->page_shift
) {
2172 if ((sps
->page_shift
> best_page_shift
)
2173 && ((1UL << sps
->page_shift
) <= rampagesize
)) {
2174 best_page_shift
= sps
->page_shift
;
2178 return MIN(current_size
,
2179 1ULL << (best_page_shift
+ hash_shift
- 7));
2183 bool kvmppc_spapr_use_multitce(void)
2185 return cap_spapr_multitce
;
2188 void *kvmppc_create_spapr_tce(uint32_t liobn
, uint32_t window_size
, int *pfd
,
2191 struct kvm_create_spapr_tce args
= {
2193 .window_size
= window_size
,
2199 /* Must set fd to -1 so we don't try to munmap when called for
2200 * destroying the table, which the upper layers -will- do
2203 if (!cap_spapr_tce
|| (need_vfio
&& !cap_spapr_vfio
)) {
2207 fd
= kvm_vm_ioctl(kvm_state
, KVM_CREATE_SPAPR_TCE
, &args
);
2209 fprintf(stderr
, "KVM: Failed to create TCE table for liobn 0x%x\n",
2214 len
= (window_size
/ SPAPR_TCE_PAGE_SIZE
) * sizeof(uint64_t);
2215 /* FIXME: round this up to page size */
2217 table
= mmap(NULL
, len
, PROT_READ
|PROT_WRITE
, MAP_SHARED
, fd
, 0);
2218 if (table
== MAP_FAILED
) {
2219 fprintf(stderr
, "KVM: Failed to map TCE table for liobn 0x%x\n",
2229 int kvmppc_remove_spapr_tce(void *table
, int fd
, uint32_t nb_table
)
2237 len
= nb_table
* sizeof(uint64_t);
2238 if ((munmap(table
, len
) < 0) ||
2240 fprintf(stderr
, "KVM: Unexpected error removing TCE table: %s",
2242 /* Leak the table */
2248 int kvmppc_reset_htab(int shift_hint
)
2250 uint32_t shift
= shift_hint
;
2252 if (!kvm_enabled()) {
2253 /* Full emulation, tell caller to allocate htab itself */
2256 if (kvm_check_extension(kvm_state
, KVM_CAP_PPC_ALLOC_HTAB
)) {
2258 ret
= kvm_vm_ioctl(kvm_state
, KVM_PPC_ALLOCATE_HTAB
, &shift
);
2259 if (ret
== -ENOTTY
) {
2260 /* At least some versions of PR KVM advertise the
2261 * capability, but don't implement the ioctl(). Oops.
2262 * Return 0 so that we allocate the htab in qemu, as is
2263 * correct for PR. */
2265 } else if (ret
< 0) {
2271 /* We have a kernel that predates the htab reset calls. For PR
2272 * KVM, we need to allocate the htab ourselves, for an HV KVM of
2273 * this era, it has allocated a 16MB fixed size hash table
2274 * already. Kernels of this era have the GET_PVINFO capability
2275 * only on PR, so we use this hack to determine the right
2277 if (kvm_check_extension(kvm_state
, KVM_CAP_PPC_GET_PVINFO
)) {
2278 /* PR - tell caller to allocate htab */
2281 /* HV - assume 16MB kernel allocated htab */
2286 static inline uint32_t mfpvr(void)
2295 static void alter_insns(uint64_t *word
, uint64_t flags
, bool on
)
2304 static void kvmppc_host_cpu_initfn(Object
*obj
)
2306 assert(kvm_enabled());
2309 static void kvmppc_host_cpu_class_init(ObjectClass
*oc
, void *data
)
2311 DeviceClass
*dc
= DEVICE_CLASS(oc
);
2312 PowerPCCPUClass
*pcc
= POWERPC_CPU_CLASS(oc
);
2313 uint32_t vmx
= kvmppc_get_vmx();
2314 uint32_t dfp
= kvmppc_get_dfp();
2315 uint32_t dcache_size
= kvmppc_read_int_cpu_dt("d-cache-size");
2316 uint32_t icache_size
= kvmppc_read_int_cpu_dt("i-cache-size");
2318 /* Now fix up the class with information we can query from the host */
2322 /* Only override when we know what the host supports */
2323 alter_insns(&pcc
->insns_flags
, PPC_ALTIVEC
, vmx
> 0);
2324 alter_insns(&pcc
->insns_flags2
, PPC2_VSX
, vmx
> 1);
2327 /* Only override when we know what the host supports */
2328 alter_insns(&pcc
->insns_flags2
, PPC2_DFP
, dfp
);
2331 if (dcache_size
!= -1) {
2332 pcc
->l1_dcache_size
= dcache_size
;
2335 if (icache_size
!= -1) {
2336 pcc
->l1_icache_size
= icache_size
;
2339 /* Reason: kvmppc_host_cpu_initfn() dies when !kvm_enabled() */
2340 dc
->cannot_destroy_with_object_finalize_yet
= true;
2343 bool kvmppc_has_cap_epr(void)
2348 bool kvmppc_has_cap_htab_fd(void)
2353 bool kvmppc_has_cap_fixup_hcalls(void)
2355 return cap_fixup_hcalls
;
2358 bool kvmppc_has_cap_htm(void)
2363 static PowerPCCPUClass
*ppc_cpu_get_family_class(PowerPCCPUClass
*pcc
)
2365 ObjectClass
*oc
= OBJECT_CLASS(pcc
);
2367 while (oc
&& !object_class_is_abstract(oc
)) {
2368 oc
= object_class_get_parent(oc
);
2372 return POWERPC_CPU_CLASS(oc
);
2375 PowerPCCPUClass
*kvm_ppc_get_host_cpu_class(void)
2377 uint32_t host_pvr
= mfpvr();
2378 PowerPCCPUClass
*pvr_pcc
;
2380 pvr_pcc
= ppc_cpu_class_by_pvr(host_pvr
);
2381 if (pvr_pcc
== NULL
) {
2382 pvr_pcc
= ppc_cpu_class_by_pvr_mask(host_pvr
);
2388 static int kvm_ppc_register_host_cpu_type(void)
2390 TypeInfo type_info
= {
2391 .name
= TYPE_HOST_POWERPC_CPU
,
2392 .instance_init
= kvmppc_host_cpu_initfn
,
2393 .class_init
= kvmppc_host_cpu_class_init
,
2395 PowerPCCPUClass
*pvr_pcc
;
2398 pvr_pcc
= kvm_ppc_get_host_cpu_class();
2399 if (pvr_pcc
== NULL
) {
2402 type_info
.parent
= object_class_get_name(OBJECT_CLASS(pvr_pcc
));
2403 type_register(&type_info
);
2405 /* Register generic family CPU class for a family */
2406 pvr_pcc
= ppc_cpu_get_family_class(pvr_pcc
);
2407 dc
= DEVICE_CLASS(pvr_pcc
);
2408 type_info
.parent
= object_class_get_name(OBJECT_CLASS(pvr_pcc
));
2409 type_info
.name
= g_strdup_printf("%s-"TYPE_POWERPC_CPU
, dc
->desc
);
2410 type_register(&type_info
);
2412 #if defined(TARGET_PPC64)
2413 type_info
.name
= g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE
, "host");
2414 type_info
.parent
= TYPE_SPAPR_CPU_CORE
,
2415 type_info
.instance_size
= sizeof(sPAPRCPUCore
);
2416 type_info
.instance_init
= NULL
;
2417 type_info
.class_init
= spapr_cpu_core_class_init
;
2418 type_info
.class_data
= (void *) "host";
2419 type_register(&type_info
);
2420 g_free((void *)type_info
.name
);
2422 /* Register generic spapr CPU family class for current host CPU type */
2423 type_info
.name
= g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE
, dc
->desc
);
2424 type_info
.class_data
= (void *) dc
->desc
;
2425 type_register(&type_info
);
2426 g_free((void *)type_info
.name
);
2432 int kvmppc_define_rtas_kernel_token(uint32_t token
, const char *function
)
2434 struct kvm_rtas_token_args args
= {
2438 if (!kvm_check_extension(kvm_state
, KVM_CAP_PPC_RTAS
)) {
2442 strncpy(args
.name
, function
, sizeof(args
.name
));
2444 return kvm_vm_ioctl(kvm_state
, KVM_PPC_RTAS_DEFINE_TOKEN
, &args
);
2447 int kvmppc_get_htab_fd(bool write
)
2449 struct kvm_get_htab_fd s
= {
2450 .flags
= write
? KVM_GET_HTAB_WRITE
: 0,
2455 fprintf(stderr
, "KVM version doesn't support saving the hash table\n");
2459 return kvm_vm_ioctl(kvm_state
, KVM_PPC_GET_HTAB_FD
, &s
);
2462 int kvmppc_save_htab(QEMUFile
*f
, int fd
, size_t bufsize
, int64_t max_ns
)
2464 int64_t starttime
= qemu_clock_get_ns(QEMU_CLOCK_REALTIME
);
2465 uint8_t buf
[bufsize
];
2469 rc
= read(fd
, buf
, bufsize
);
2471 fprintf(stderr
, "Error reading data from KVM HTAB fd: %s\n",
2475 uint8_t *buffer
= buf
;
2478 struct kvm_get_htab_header
*head
=
2479 (struct kvm_get_htab_header
*) buffer
;
2480 size_t chunksize
= sizeof(*head
) +
2481 HASH_PTE_SIZE_64
* head
->n_valid
;
2483 qemu_put_be32(f
, head
->index
);
2484 qemu_put_be16(f
, head
->n_valid
);
2485 qemu_put_be16(f
, head
->n_invalid
);
2486 qemu_put_buffer(f
, (void *)(head
+ 1),
2487 HASH_PTE_SIZE_64
* head
->n_valid
);
2489 buffer
+= chunksize
;
2495 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME
) - starttime
) < max_ns
)));
2497 return (rc
== 0) ? 1 : 0;
2500 int kvmppc_load_htab_chunk(QEMUFile
*f
, int fd
, uint32_t index
,
2501 uint16_t n_valid
, uint16_t n_invalid
)
2503 struct kvm_get_htab_header
*buf
;
2504 size_t chunksize
= sizeof(*buf
) + n_valid
*HASH_PTE_SIZE_64
;
2507 buf
= alloca(chunksize
);
2509 buf
->n_valid
= n_valid
;
2510 buf
->n_invalid
= n_invalid
;
2512 qemu_get_buffer(f
, (void *)(buf
+ 1), HASH_PTE_SIZE_64
*n_valid
);
2514 rc
= write(fd
, buf
, chunksize
);
2516 fprintf(stderr
, "Error writing KVM hash table: %s\n",
2520 if (rc
!= chunksize
) {
2521 /* We should never get a short write on a single chunk */
2522 fprintf(stderr
, "Short write, restoring KVM hash table\n");
2528 bool kvm_arch_stop_on_emulation_error(CPUState
*cpu
)
2533 int kvm_arch_on_sigbus_vcpu(CPUState
*cpu
, int code
, void *addr
)
2538 int kvm_arch_on_sigbus(int code
, void *addr
)
2543 void kvm_arch_init_irq_routing(KVMState
*s
)
2547 struct kvm_get_htab_buf
{
2548 struct kvm_get_htab_header header
;
2550 * We require one extra byte for read
2552 target_ulong hpte
[(HPTES_PER_GROUP
* 2) + 1];
2555 uint64_t kvmppc_hash64_read_pteg(PowerPCCPU
*cpu
, target_ulong pte_index
)
2558 struct kvm_get_htab_fd ghf
;
2559 struct kvm_get_htab_buf
*hpte_buf
;
2562 ghf
.start_index
= pte_index
;
2563 htab_fd
= kvm_vm_ioctl(kvm_state
, KVM_PPC_GET_HTAB_FD
, &ghf
);
2568 hpte_buf
= g_malloc0(sizeof(*hpte_buf
));
2570 * Read the hpte group
2572 if (read(htab_fd
, hpte_buf
, sizeof(*hpte_buf
)) < 0) {
2577 return (uint64_t)(uintptr_t) hpte_buf
->hpte
;
2586 void kvmppc_hash64_free_pteg(uint64_t token
)
2588 struct kvm_get_htab_buf
*htab_buf
;
2590 htab_buf
= container_of((void *)(uintptr_t) token
, struct kvm_get_htab_buf
,
2596 void kvmppc_hash64_write_pte(CPUPPCState
*env
, target_ulong pte_index
,
2597 target_ulong pte0
, target_ulong pte1
)
2600 struct kvm_get_htab_fd ghf
;
2601 struct kvm_get_htab_buf hpte_buf
;
2604 ghf
.start_index
= 0; /* Ignored */
2605 htab_fd
= kvm_vm_ioctl(kvm_state
, KVM_PPC_GET_HTAB_FD
, &ghf
);
2610 hpte_buf
.header
.n_valid
= 1;
2611 hpte_buf
.header
.n_invalid
= 0;
2612 hpte_buf
.header
.index
= pte_index
;
2613 hpte_buf
.hpte
[0] = pte0
;
2614 hpte_buf
.hpte
[1] = pte1
;
2616 * Write the hpte entry.
2617 * CAUTION: write() has the warn_unused_result attribute. Hence we
2618 * need to check the return value, even though we do nothing.
2620 if (write(htab_fd
, &hpte_buf
, sizeof(hpte_buf
)) < 0) {
2632 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry
*route
,
2633 uint64_t address
, uint32_t data
, PCIDevice
*dev
)
2638 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry
*route
,
2639 int vector
, PCIDevice
*dev
)
2644 int kvm_arch_release_virq_post(int virq
)
2649 int kvm_arch_msi_data_to_gsi(uint32_t data
)
2651 return data
& 0xffff;
2654 int kvmppc_enable_hwrng(void)
2656 if (!kvm_enabled() || !kvm_check_extension(kvm_state
, KVM_CAP_PPC_HWRNG
)) {
2660 return kvmppc_enable_hcall(kvm_state
, H_RANDOM
);