2 * PowerPC implementation of KVM hooks
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
17 #include "qemu/osdep.h"
19 #include <sys/ioctl.h>
22 #include <linux/kvm.h>
24 #include "qemu-common.h"
25 #include "qemu/error-report.h"
27 #include "qemu/timer.h"
28 #include "sysemu/sysemu.h"
29 #include "sysemu/kvm.h"
30 #include "sysemu/numa.h"
32 #include "sysemu/cpus.h"
33 #include "sysemu/device_tree.h"
34 #include "mmu-hash64.h"
36 #include "hw/sysbus.h"
37 #include "hw/ppc/spapr.h"
38 #include "hw/ppc/spapr_vio.h"
39 #include "hw/ppc/ppc.h"
40 #include "sysemu/watchdog.h"
42 #include "exec/gdbstub.h"
43 #include "exec/memattrs.h"
44 #include "sysemu/hostmem.h"
45 #include "qemu/cutils.h"
46 #if defined(TARGET_PPC64)
47 #include "hw/ppc/spapr_cpu_core.h"
53 #define DPRINTF(fmt, ...) \
54 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
56 #define DPRINTF(fmt, ...) \
60 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
62 const KVMCapabilityInfo kvm_arch_required_capabilities
[] = {
66 static int cap_interrupt_unset
= false;
67 static int cap_interrupt_level
= false;
68 static int cap_segstate
;
69 static int cap_booke_sregs
;
70 static int cap_ppc_smt
;
71 static int cap_ppc_rma
;
72 static int cap_spapr_tce
;
73 static int cap_spapr_multitce
;
74 static int cap_spapr_vfio
;
76 static int cap_one_reg
;
78 static int cap_ppc_watchdog
;
80 static int cap_htab_fd
;
81 static int cap_fixup_hcalls
;
83 static uint32_t debug_inst_opcode
;
85 /* XXX We have a race condition where we actually have a level triggered
86 * interrupt, but the infrastructure can't expose that yet, so the guest
87 * takes but ignores it, goes to sleep and never gets notified that there's
88 * still an interrupt pending.
90 * As a quick workaround, let's just wake up again 20 ms after we injected
91 * an interrupt. That way we can assure that we're always reinjecting
92 * interrupts in case the guest swallowed them.
94 static QEMUTimer
*idle_timer
;
96 static void kvm_kick_cpu(void *opaque
)
98 PowerPCCPU
*cpu
= opaque
;
100 qemu_cpu_kick(CPU(cpu
));
103 static int kvm_ppc_register_host_cpu_type(void);
105 int kvm_arch_init(MachineState
*ms
, KVMState
*s
)
107 cap_interrupt_unset
= kvm_check_extension(s
, KVM_CAP_PPC_UNSET_IRQ
);
108 cap_interrupt_level
= kvm_check_extension(s
, KVM_CAP_PPC_IRQ_LEVEL
);
109 cap_segstate
= kvm_check_extension(s
, KVM_CAP_PPC_SEGSTATE
);
110 cap_booke_sregs
= kvm_check_extension(s
, KVM_CAP_PPC_BOOKE_SREGS
);
111 cap_ppc_smt
= kvm_check_extension(s
, KVM_CAP_PPC_SMT
);
112 cap_ppc_rma
= kvm_check_extension(s
, KVM_CAP_PPC_RMA
);
113 cap_spapr_tce
= kvm_check_extension(s
, KVM_CAP_SPAPR_TCE
);
114 cap_spapr_multitce
= kvm_check_extension(s
, KVM_CAP_SPAPR_MULTITCE
);
115 cap_spapr_vfio
= false;
116 cap_one_reg
= kvm_check_extension(s
, KVM_CAP_ONE_REG
);
117 cap_hior
= kvm_check_extension(s
, KVM_CAP_PPC_HIOR
);
118 cap_epr
= kvm_check_extension(s
, KVM_CAP_PPC_EPR
);
119 cap_ppc_watchdog
= kvm_check_extension(s
, KVM_CAP_PPC_BOOKE_WATCHDOG
);
120 /* Note: we don't set cap_papr here, because this capability is
121 * only activated after this by kvmppc_set_papr() */
122 cap_htab_fd
= kvm_check_extension(s
, KVM_CAP_PPC_HTAB_FD
);
123 cap_fixup_hcalls
= kvm_check_extension(s
, KVM_CAP_PPC_FIXUP_HCALL
);
125 if (!cap_interrupt_level
) {
126 fprintf(stderr
, "KVM: Couldn't find level irq capability. Expect the "
127 "VM to stall at times!\n");
130 kvm_ppc_register_host_cpu_type();
135 static int kvm_arch_sync_sregs(PowerPCCPU
*cpu
)
137 CPUPPCState
*cenv
= &cpu
->env
;
138 CPUState
*cs
= CPU(cpu
);
139 struct kvm_sregs sregs
;
142 if (cenv
->excp_model
== POWERPC_EXCP_BOOKE
) {
143 /* What we're really trying to say is "if we're on BookE, we use
144 the native PVR for now". This is the only sane way to check
145 it though, so we potentially confuse users that they can run
146 BookE guests on BookS. Let's hope nobody dares enough :) */
150 fprintf(stderr
, "kvm error: missing PVR setting capability\n");
155 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_SREGS
, &sregs
);
160 sregs
.pvr
= cenv
->spr
[SPR_PVR
];
161 return kvm_vcpu_ioctl(cs
, KVM_SET_SREGS
, &sregs
);
164 /* Set up a shared TLB array with KVM */
165 static int kvm_booke206_tlb_init(PowerPCCPU
*cpu
)
167 CPUPPCState
*env
= &cpu
->env
;
168 CPUState
*cs
= CPU(cpu
);
169 struct kvm_book3e_206_tlb_params params
= {};
170 struct kvm_config_tlb cfg
= {};
171 unsigned int entries
= 0;
174 if (!kvm_enabled() ||
175 !kvm_check_extension(cs
->kvm_state
, KVM_CAP_SW_TLB
)) {
179 assert(ARRAY_SIZE(params
.tlb_sizes
) == BOOKE206_MAX_TLBN
);
181 for (i
= 0; i
< BOOKE206_MAX_TLBN
; i
++) {
182 params
.tlb_sizes
[i
] = booke206_tlb_size(env
, i
);
183 params
.tlb_ways
[i
] = booke206_tlb_ways(env
, i
);
184 entries
+= params
.tlb_sizes
[i
];
187 assert(entries
== env
->nb_tlb
);
188 assert(sizeof(struct kvm_book3e_206_tlb_entry
) == sizeof(ppcmas_tlb_t
));
190 env
->tlb_dirty
= true;
192 cfg
.array
= (uintptr_t)env
->tlb
.tlbm
;
193 cfg
.array_len
= sizeof(ppcmas_tlb_t
) * entries
;
194 cfg
.params
= (uintptr_t)¶ms
;
195 cfg
.mmu_type
= KVM_MMU_FSL_BOOKE_NOHV
;
197 ret
= kvm_vcpu_enable_cap(cs
, KVM_CAP_SW_TLB
, 0, (uintptr_t)&cfg
);
199 fprintf(stderr
, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
200 __func__
, strerror(-ret
));
204 env
->kvm_sw_tlb
= true;
209 #if defined(TARGET_PPC64)
210 static void kvm_get_fallback_smmu_info(PowerPCCPU
*cpu
,
211 struct kvm_ppc_smmu_info
*info
)
213 CPUPPCState
*env
= &cpu
->env
;
214 CPUState
*cs
= CPU(cpu
);
216 memset(info
, 0, sizeof(*info
));
218 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
219 * need to "guess" what the supported page sizes are.
221 * For that to work we make a few assumptions:
223 * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
224 * KVM which only supports 4K and 16M pages, but supports them
225 * regardless of the backing store characteritics. We also don't
226 * support 1T segments.
228 * This is safe as if HV KVM ever supports that capability or PR
229 * KVM grows supports for more page/segment sizes, those versions
230 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
231 * will not hit this fallback
233 * - Else we are running HV KVM. This means we only support page
234 * sizes that fit in the backing store. Additionally we only
235 * advertize 64K pages if the processor is ARCH 2.06 and we assume
236 * P7 encodings for the SLB and hash table. Here too, we assume
237 * support for any newer processor will mean a kernel that
238 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
241 if (kvm_check_extension(cs
->kvm_state
, KVM_CAP_PPC_GET_PVINFO
)) {
246 /* Standard 4k base page size segment */
247 info
->sps
[0].page_shift
= 12;
248 info
->sps
[0].slb_enc
= 0;
249 info
->sps
[0].enc
[0].page_shift
= 12;
250 info
->sps
[0].enc
[0].pte_enc
= 0;
252 /* Standard 16M large page size segment */
253 info
->sps
[1].page_shift
= 24;
254 info
->sps
[1].slb_enc
= SLB_VSID_L
;
255 info
->sps
[1].enc
[0].page_shift
= 24;
256 info
->sps
[1].enc
[0].pte_enc
= 0;
260 /* HV KVM has backing store size restrictions */
261 info
->flags
= KVM_PPC_PAGE_SIZES_REAL
;
263 if (env
->mmu_model
& POWERPC_MMU_1TSEG
) {
264 info
->flags
|= KVM_PPC_1T_SEGMENTS
;
267 if (env
->mmu_model
== POWERPC_MMU_2_06
||
268 env
->mmu_model
== POWERPC_MMU_2_07
) {
274 /* Standard 4k base page size segment */
275 info
->sps
[i
].page_shift
= 12;
276 info
->sps
[i
].slb_enc
= 0;
277 info
->sps
[i
].enc
[0].page_shift
= 12;
278 info
->sps
[i
].enc
[0].pte_enc
= 0;
281 /* 64K on MMU 2.06 and later */
282 if (env
->mmu_model
== POWERPC_MMU_2_06
||
283 env
->mmu_model
== POWERPC_MMU_2_07
) {
284 info
->sps
[i
].page_shift
= 16;
285 info
->sps
[i
].slb_enc
= 0x110;
286 info
->sps
[i
].enc
[0].page_shift
= 16;
287 info
->sps
[i
].enc
[0].pte_enc
= 1;
291 /* Standard 16M large page size segment */
292 info
->sps
[i
].page_shift
= 24;
293 info
->sps
[i
].slb_enc
= SLB_VSID_L
;
294 info
->sps
[i
].enc
[0].page_shift
= 24;
295 info
->sps
[i
].enc
[0].pte_enc
= 0;
299 static void kvm_get_smmu_info(PowerPCCPU
*cpu
, struct kvm_ppc_smmu_info
*info
)
301 CPUState
*cs
= CPU(cpu
);
304 if (kvm_check_extension(cs
->kvm_state
, KVM_CAP_PPC_GET_SMMU_INFO
)) {
305 ret
= kvm_vm_ioctl(cs
->kvm_state
, KVM_PPC_GET_SMMU_INFO
, info
);
311 kvm_get_fallback_smmu_info(cpu
, info
);
314 static long gethugepagesize(const char *mem_path
)
320 ret
= statfs(mem_path
, &fs
);
321 } while (ret
!= 0 && errno
== EINTR
);
324 fprintf(stderr
, "Couldn't statfs() memory path: %s\n",
329 #define HUGETLBFS_MAGIC 0x958458f6
331 if (fs
.f_type
!= HUGETLBFS_MAGIC
) {
332 /* Explicit mempath, but it's ordinary pages */
333 return getpagesize();
336 /* It's hugepage, return the huge page size */
341 * FIXME TOCTTOU: this iterates over memory backends' mem-path, which
342 * may or may not name the same files / on the same filesystem now as
343 * when we actually open and map them. Iterate over the file
344 * descriptors instead, and use qemu_fd_getpagesize().
346 static int find_max_supported_pagesize(Object
*obj
, void *opaque
)
349 long *hpsize_min
= opaque
;
351 if (object_dynamic_cast(obj
, TYPE_MEMORY_BACKEND
)) {
352 mem_path
= object_property_get_str(obj
, "mem-path", NULL
);
354 long hpsize
= gethugepagesize(mem_path
);
355 if (hpsize
< *hpsize_min
) {
356 *hpsize_min
= hpsize
;
359 *hpsize_min
= getpagesize();
366 static long getrampagesize(void)
368 long hpsize
= LONG_MAX
;
372 return gethugepagesize(mem_path
);
375 /* it's possible we have memory-backend objects with
376 * hugepage-backed RAM. these may get mapped into system
377 * address space via -numa parameters or memory hotplug
378 * hooks. we want to take these into account, but we
379 * also want to make sure these supported hugepage
380 * sizes are applicable across the entire range of memory
381 * we may boot from, so we take the min across all
382 * backends, and assume normal pages in cases where a
383 * backend isn't backed by hugepages.
385 memdev_root
= object_resolve_path("/objects", NULL
);
387 return getpagesize();
390 object_child_foreach(memdev_root
, find_max_supported_pagesize
, &hpsize
);
392 if (hpsize
== LONG_MAX
) {
393 return getpagesize();
396 if (nb_numa_nodes
== 0 && hpsize
> getpagesize()) {
397 /* No NUMA nodes and normal RAM without -mem-path ==> no huge pages! */
400 error_report("Huge page support disabled (n/a for main memory).");
403 return getpagesize();
409 static bool kvm_valid_page_size(uint32_t flags
, long rampgsize
, uint32_t shift
)
411 if (!(flags
& KVM_PPC_PAGE_SIZES_REAL
)) {
415 return (1ul << shift
) <= rampgsize
;
418 static void kvm_fixup_page_sizes(PowerPCCPU
*cpu
)
420 static struct kvm_ppc_smmu_info smmu_info
;
421 static bool has_smmu_info
;
422 CPUPPCState
*env
= &cpu
->env
;
426 /* We only handle page sizes for 64-bit server guests for now */
427 if (!(env
->mmu_model
& POWERPC_MMU_64
)) {
431 /* Collect MMU info from kernel if not already */
432 if (!has_smmu_info
) {
433 kvm_get_smmu_info(cpu
, &smmu_info
);
434 has_smmu_info
= true;
437 rampagesize
= getrampagesize();
439 /* Convert to QEMU form */
440 memset(&env
->sps
, 0, sizeof(env
->sps
));
442 /* If we have HV KVM, we need to forbid CI large pages if our
443 * host page size is smaller than 64K.
445 if (smmu_info
.flags
& KVM_PPC_PAGE_SIZES_REAL
) {
446 env
->ci_large_pages
= getpagesize() >= 0x10000;
450 * XXX This loop should be an entry wide AND of the capabilities that
451 * the selected CPU has with the capabilities that KVM supports.
453 for (ik
= iq
= 0; ik
< KVM_PPC_PAGE_SIZES_MAX_SZ
; ik
++) {
454 struct ppc_one_seg_page_size
*qsps
= &env
->sps
.sps
[iq
];
455 struct kvm_ppc_one_seg_page_size
*ksps
= &smmu_info
.sps
[ik
];
457 if (!kvm_valid_page_size(smmu_info
.flags
, rampagesize
,
461 qsps
->page_shift
= ksps
->page_shift
;
462 qsps
->slb_enc
= ksps
->slb_enc
;
463 for (jk
= jq
= 0; jk
< KVM_PPC_PAGE_SIZES_MAX_SZ
; jk
++) {
464 if (!kvm_valid_page_size(smmu_info
.flags
, rampagesize
,
465 ksps
->enc
[jk
].page_shift
)) {
468 qsps
->enc
[jq
].page_shift
= ksps
->enc
[jk
].page_shift
;
469 qsps
->enc
[jq
].pte_enc
= ksps
->enc
[jk
].pte_enc
;
470 if (++jq
>= PPC_PAGE_SIZES_MAX_SZ
) {
474 if (++iq
>= PPC_PAGE_SIZES_MAX_SZ
) {
478 env
->slb_nr
= smmu_info
.slb_size
;
479 if (!(smmu_info
.flags
& KVM_PPC_1T_SEGMENTS
)) {
480 env
->mmu_model
&= ~POWERPC_MMU_1TSEG
;
483 #else /* defined (TARGET_PPC64) */
485 static inline void kvm_fixup_page_sizes(PowerPCCPU
*cpu
)
489 #endif /* !defined (TARGET_PPC64) */
491 unsigned long kvm_arch_vcpu_id(CPUState
*cpu
)
493 return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu
));
496 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
497 * book3s supports only 1 watchpoint, so array size
498 * of 4 is sufficient for now.
500 #define MAX_HW_BKPTS 4
502 static struct HWBreakpoint
{
505 } hw_debug_points
[MAX_HW_BKPTS
];
507 static CPUWatchpoint hw_watchpoint
;
509 /* Default there is no breakpoint and watchpoint supported */
510 static int max_hw_breakpoint
;
511 static int max_hw_watchpoint
;
512 static int nb_hw_breakpoint
;
513 static int nb_hw_watchpoint
;
515 static void kvmppc_hw_debug_points_init(CPUPPCState
*cenv
)
517 if (cenv
->excp_model
== POWERPC_EXCP_BOOKE
) {
518 max_hw_breakpoint
= 2;
519 max_hw_watchpoint
= 2;
522 if ((max_hw_breakpoint
+ max_hw_watchpoint
) > MAX_HW_BKPTS
) {
523 fprintf(stderr
, "Error initializing h/w breakpoints\n");
528 int kvm_arch_init_vcpu(CPUState
*cs
)
530 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
531 CPUPPCState
*cenv
= &cpu
->env
;
534 /* Gather server mmu info from KVM and update the CPU state */
535 kvm_fixup_page_sizes(cpu
);
537 /* Synchronize sregs with kvm */
538 ret
= kvm_arch_sync_sregs(cpu
);
540 if (ret
== -EINVAL
) {
541 error_report("Register sync failed... If you're using kvm-hv.ko,"
542 " only \"-cpu host\" is possible");
547 idle_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL
, kvm_kick_cpu
, cpu
);
549 /* Some targets support access to KVM's guest TLB. */
550 switch (cenv
->mmu_model
) {
551 case POWERPC_MMU_BOOKE206
:
552 ret
= kvm_booke206_tlb_init(cpu
);
558 kvm_get_one_reg(cs
, KVM_REG_PPC_DEBUG_INST
, &debug_inst_opcode
);
559 kvmppc_hw_debug_points_init(cenv
);
564 static void kvm_sw_tlb_put(PowerPCCPU
*cpu
)
566 CPUPPCState
*env
= &cpu
->env
;
567 CPUState
*cs
= CPU(cpu
);
568 struct kvm_dirty_tlb dirty_tlb
;
569 unsigned char *bitmap
;
572 if (!env
->kvm_sw_tlb
) {
576 bitmap
= g_malloc((env
->nb_tlb
+ 7) / 8);
577 memset(bitmap
, 0xFF, (env
->nb_tlb
+ 7) / 8);
579 dirty_tlb
.bitmap
= (uintptr_t)bitmap
;
580 dirty_tlb
.num_dirty
= env
->nb_tlb
;
582 ret
= kvm_vcpu_ioctl(cs
, KVM_DIRTY_TLB
, &dirty_tlb
);
584 fprintf(stderr
, "%s: KVM_DIRTY_TLB: %s\n",
585 __func__
, strerror(-ret
));
591 static void kvm_get_one_spr(CPUState
*cs
, uint64_t id
, int spr
)
593 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
594 CPUPPCState
*env
= &cpu
->env
;
599 struct kvm_one_reg reg
= {
601 .addr
= (uintptr_t) &val
,
605 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
607 trace_kvm_failed_spr_get(spr
, strerror(errno
));
609 switch (id
& KVM_REG_SIZE_MASK
) {
610 case KVM_REG_SIZE_U32
:
611 env
->spr
[spr
] = val
.u32
;
614 case KVM_REG_SIZE_U64
:
615 env
->spr
[spr
] = val
.u64
;
619 /* Don't handle this size yet */
625 static void kvm_put_one_spr(CPUState
*cs
, uint64_t id
, int spr
)
627 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
628 CPUPPCState
*env
= &cpu
->env
;
633 struct kvm_one_reg reg
= {
635 .addr
= (uintptr_t) &val
,
639 switch (id
& KVM_REG_SIZE_MASK
) {
640 case KVM_REG_SIZE_U32
:
641 val
.u32
= env
->spr
[spr
];
644 case KVM_REG_SIZE_U64
:
645 val
.u64
= env
->spr
[spr
];
649 /* Don't handle this size yet */
653 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
655 trace_kvm_failed_spr_set(spr
, strerror(errno
));
659 static int kvm_put_fp(CPUState
*cs
)
661 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
662 CPUPPCState
*env
= &cpu
->env
;
663 struct kvm_one_reg reg
;
667 if (env
->insns_flags
& PPC_FLOAT
) {
668 uint64_t fpscr
= env
->fpscr
;
669 bool vsx
= !!(env
->insns_flags2
& PPC2_VSX
);
671 reg
.id
= KVM_REG_PPC_FPSCR
;
672 reg
.addr
= (uintptr_t)&fpscr
;
673 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
675 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno
));
679 for (i
= 0; i
< 32; i
++) {
682 #ifdef HOST_WORDS_BIGENDIAN
683 vsr
[0] = float64_val(env
->fpr
[i
]);
684 vsr
[1] = env
->vsr
[i
];
686 vsr
[0] = env
->vsr
[i
];
687 vsr
[1] = float64_val(env
->fpr
[i
]);
689 reg
.addr
= (uintptr_t) &vsr
;
690 reg
.id
= vsx
? KVM_REG_PPC_VSR(i
) : KVM_REG_PPC_FPR(i
);
692 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
694 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx
? "VSR" : "FPR",
701 if (env
->insns_flags
& PPC_ALTIVEC
) {
702 reg
.id
= KVM_REG_PPC_VSCR
;
703 reg
.addr
= (uintptr_t)&env
->vscr
;
704 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
706 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno
));
710 for (i
= 0; i
< 32; i
++) {
711 reg
.id
= KVM_REG_PPC_VR(i
);
712 reg
.addr
= (uintptr_t)&env
->avr
[i
];
713 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
715 DPRINTF("Unable to set VR%d to KVM: %s\n", i
, strerror(errno
));
724 static int kvm_get_fp(CPUState
*cs
)
726 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
727 CPUPPCState
*env
= &cpu
->env
;
728 struct kvm_one_reg reg
;
732 if (env
->insns_flags
& PPC_FLOAT
) {
734 bool vsx
= !!(env
->insns_flags2
& PPC2_VSX
);
736 reg
.id
= KVM_REG_PPC_FPSCR
;
737 reg
.addr
= (uintptr_t)&fpscr
;
738 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
740 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno
));
746 for (i
= 0; i
< 32; i
++) {
749 reg
.addr
= (uintptr_t) &vsr
;
750 reg
.id
= vsx
? KVM_REG_PPC_VSR(i
) : KVM_REG_PPC_FPR(i
);
752 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
754 DPRINTF("Unable to get %s%d from KVM: %s\n",
755 vsx
? "VSR" : "FPR", i
, strerror(errno
));
758 #ifdef HOST_WORDS_BIGENDIAN
759 env
->fpr
[i
] = vsr
[0];
761 env
->vsr
[i
] = vsr
[1];
764 env
->fpr
[i
] = vsr
[1];
766 env
->vsr
[i
] = vsr
[0];
773 if (env
->insns_flags
& PPC_ALTIVEC
) {
774 reg
.id
= KVM_REG_PPC_VSCR
;
775 reg
.addr
= (uintptr_t)&env
->vscr
;
776 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
778 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno
));
782 for (i
= 0; i
< 32; i
++) {
783 reg
.id
= KVM_REG_PPC_VR(i
);
784 reg
.addr
= (uintptr_t)&env
->avr
[i
];
785 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
787 DPRINTF("Unable to get VR%d from KVM: %s\n",
797 #if defined(TARGET_PPC64)
798 static int kvm_get_vpa(CPUState
*cs
)
800 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
801 CPUPPCState
*env
= &cpu
->env
;
802 struct kvm_one_reg reg
;
805 reg
.id
= KVM_REG_PPC_VPA_ADDR
;
806 reg
.addr
= (uintptr_t)&env
->vpa_addr
;
807 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
809 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno
));
813 assert((uintptr_t)&env
->slb_shadow_size
814 == ((uintptr_t)&env
->slb_shadow_addr
+ 8));
815 reg
.id
= KVM_REG_PPC_VPA_SLB
;
816 reg
.addr
= (uintptr_t)&env
->slb_shadow_addr
;
817 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
819 DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
824 assert((uintptr_t)&env
->dtl_size
== ((uintptr_t)&env
->dtl_addr
+ 8));
825 reg
.id
= KVM_REG_PPC_VPA_DTL
;
826 reg
.addr
= (uintptr_t)&env
->dtl_addr
;
827 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
829 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
837 static int kvm_put_vpa(CPUState
*cs
)
839 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
840 CPUPPCState
*env
= &cpu
->env
;
841 struct kvm_one_reg reg
;
844 /* SLB shadow or DTL can't be registered unless a master VPA is
845 * registered. That means when restoring state, if a VPA *is*
846 * registered, we need to set that up first. If not, we need to
847 * deregister the others before deregistering the master VPA */
848 assert(env
->vpa_addr
|| !(env
->slb_shadow_addr
|| env
->dtl_addr
));
851 reg
.id
= KVM_REG_PPC_VPA_ADDR
;
852 reg
.addr
= (uintptr_t)&env
->vpa_addr
;
853 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
855 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno
));
860 assert((uintptr_t)&env
->slb_shadow_size
861 == ((uintptr_t)&env
->slb_shadow_addr
+ 8));
862 reg
.id
= KVM_REG_PPC_VPA_SLB
;
863 reg
.addr
= (uintptr_t)&env
->slb_shadow_addr
;
864 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
866 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno
));
870 assert((uintptr_t)&env
->dtl_size
== ((uintptr_t)&env
->dtl_addr
+ 8));
871 reg
.id
= KVM_REG_PPC_VPA_DTL
;
872 reg
.addr
= (uintptr_t)&env
->dtl_addr
;
873 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
875 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
880 if (!env
->vpa_addr
) {
881 reg
.id
= KVM_REG_PPC_VPA_ADDR
;
882 reg
.addr
= (uintptr_t)&env
->vpa_addr
;
883 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
885 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno
));
892 #endif /* TARGET_PPC64 */
894 int kvmppc_put_books_sregs(PowerPCCPU
*cpu
)
896 CPUPPCState
*env
= &cpu
->env
;
897 struct kvm_sregs sregs
;
900 sregs
.pvr
= env
->spr
[SPR_PVR
];
902 sregs
.u
.s
.sdr1
= env
->spr
[SPR_SDR1
];
906 for (i
= 0; i
< ARRAY_SIZE(env
->slb
); i
++) {
907 sregs
.u
.s
.ppc64
.slb
[i
].slbe
= env
->slb
[i
].esid
;
908 if (env
->slb
[i
].esid
& SLB_ESID_V
) {
909 sregs
.u
.s
.ppc64
.slb
[i
].slbe
|= i
;
911 sregs
.u
.s
.ppc64
.slb
[i
].slbv
= env
->slb
[i
].vsid
;
916 for (i
= 0; i
< 16; i
++) {
917 sregs
.u
.s
.ppc32
.sr
[i
] = env
->sr
[i
];
921 for (i
= 0; i
< 8; i
++) {
922 /* Beware. We have to swap upper and lower bits here */
923 sregs
.u
.s
.ppc32
.dbat
[i
] = ((uint64_t)env
->DBAT
[0][i
] << 32)
925 sregs
.u
.s
.ppc32
.ibat
[i
] = ((uint64_t)env
->IBAT
[0][i
] << 32)
929 return kvm_vcpu_ioctl(CPU(cpu
), KVM_SET_SREGS
, &sregs
);
932 int kvm_arch_put_registers(CPUState
*cs
, int level
)
934 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
935 CPUPPCState
*env
= &cpu
->env
;
936 struct kvm_regs regs
;
940 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_REGS
, ®s
);
947 regs
.xer
= cpu_read_xer(env
);
951 regs
.srr0
= env
->spr
[SPR_SRR0
];
952 regs
.srr1
= env
->spr
[SPR_SRR1
];
954 regs
.sprg0
= env
->spr
[SPR_SPRG0
];
955 regs
.sprg1
= env
->spr
[SPR_SPRG1
];
956 regs
.sprg2
= env
->spr
[SPR_SPRG2
];
957 regs
.sprg3
= env
->spr
[SPR_SPRG3
];
958 regs
.sprg4
= env
->spr
[SPR_SPRG4
];
959 regs
.sprg5
= env
->spr
[SPR_SPRG5
];
960 regs
.sprg6
= env
->spr
[SPR_SPRG6
];
961 regs
.sprg7
= env
->spr
[SPR_SPRG7
];
963 regs
.pid
= env
->spr
[SPR_BOOKE_PID
];
965 for (i
= 0;i
< 32; i
++)
966 regs
.gpr
[i
] = env
->gpr
[i
];
969 for (i
= 0; i
< 8; i
++) {
970 regs
.cr
|= (env
->crf
[i
] & 15) << (4 * (7 - i
));
973 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_REGS
, ®s
);
979 if (env
->tlb_dirty
) {
981 env
->tlb_dirty
= false;
984 if (cap_segstate
&& (level
>= KVM_PUT_RESET_STATE
)) {
985 ret
= kvmppc_put_books_sregs(cpu
);
991 if (cap_hior
&& (level
>= KVM_PUT_RESET_STATE
)) {
992 kvm_put_one_spr(cs
, KVM_REG_PPC_HIOR
, SPR_HIOR
);
998 /* We deliberately ignore errors here, for kernels which have
999 * the ONE_REG calls, but don't support the specific
1000 * registers, there's a reasonable chance things will still
1001 * work, at least until we try to migrate. */
1002 for (i
= 0; i
< 1024; i
++) {
1003 uint64_t id
= env
->spr_cb
[i
].one_reg_id
;
1006 kvm_put_one_spr(cs
, id
, i
);
1012 for (i
= 0; i
< ARRAY_SIZE(env
->tm_gpr
); i
++) {
1013 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_GPR(i
), &env
->tm_gpr
[i
]);
1015 for (i
= 0; i
< ARRAY_SIZE(env
->tm_vsr
); i
++) {
1016 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_VSR(i
), &env
->tm_vsr
[i
]);
1018 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_CR
, &env
->tm_cr
);
1019 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_LR
, &env
->tm_lr
);
1020 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_CTR
, &env
->tm_ctr
);
1021 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_FPSCR
, &env
->tm_fpscr
);
1022 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_AMR
, &env
->tm_amr
);
1023 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_PPR
, &env
->tm_ppr
);
1024 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_VRSAVE
, &env
->tm_vrsave
);
1025 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_VSCR
, &env
->tm_vscr
);
1026 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_DSCR
, &env
->tm_dscr
);
1027 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_TAR
, &env
->tm_tar
);
1031 if (kvm_put_vpa(cs
) < 0) {
1032 DPRINTF("Warning: Unable to set VPA information to KVM\n");
1036 kvm_set_one_reg(cs
, KVM_REG_PPC_TB_OFFSET
, &env
->tb_env
->tb_offset
);
1037 #endif /* TARGET_PPC64 */
1043 static void kvm_sync_excp(CPUPPCState
*env
, int vector
, int ivor
)
1045 env
->excp_vectors
[vector
] = env
->spr
[ivor
] + env
->spr
[SPR_BOOKE_IVPR
];
1048 static int kvmppc_get_booke_sregs(PowerPCCPU
*cpu
)
1050 CPUPPCState
*env
= &cpu
->env
;
1051 struct kvm_sregs sregs
;
1054 ret
= kvm_vcpu_ioctl(CPU(cpu
), KVM_GET_SREGS
, &sregs
);
1059 if (sregs
.u
.e
.features
& KVM_SREGS_E_BASE
) {
1060 env
->spr
[SPR_BOOKE_CSRR0
] = sregs
.u
.e
.csrr0
;
1061 env
->spr
[SPR_BOOKE_CSRR1
] = sregs
.u
.e
.csrr1
;
1062 env
->spr
[SPR_BOOKE_ESR
] = sregs
.u
.e
.esr
;
1063 env
->spr
[SPR_BOOKE_DEAR
] = sregs
.u
.e
.dear
;
1064 env
->spr
[SPR_BOOKE_MCSR
] = sregs
.u
.e
.mcsr
;
1065 env
->spr
[SPR_BOOKE_TSR
] = sregs
.u
.e
.tsr
;
1066 env
->spr
[SPR_BOOKE_TCR
] = sregs
.u
.e
.tcr
;
1067 env
->spr
[SPR_DECR
] = sregs
.u
.e
.dec
;
1068 env
->spr
[SPR_TBL
] = sregs
.u
.e
.tb
& 0xffffffff;
1069 env
->spr
[SPR_TBU
] = sregs
.u
.e
.tb
>> 32;
1070 env
->spr
[SPR_VRSAVE
] = sregs
.u
.e
.vrsave
;
1073 if (sregs
.u
.e
.features
& KVM_SREGS_E_ARCH206
) {
1074 env
->spr
[SPR_BOOKE_PIR
] = sregs
.u
.e
.pir
;
1075 env
->spr
[SPR_BOOKE_MCSRR0
] = sregs
.u
.e
.mcsrr0
;
1076 env
->spr
[SPR_BOOKE_MCSRR1
] = sregs
.u
.e
.mcsrr1
;
1077 env
->spr
[SPR_BOOKE_DECAR
] = sregs
.u
.e
.decar
;
1078 env
->spr
[SPR_BOOKE_IVPR
] = sregs
.u
.e
.ivpr
;
1081 if (sregs
.u
.e
.features
& KVM_SREGS_E_64
) {
1082 env
->spr
[SPR_BOOKE_EPCR
] = sregs
.u
.e
.epcr
;
1085 if (sregs
.u
.e
.features
& KVM_SREGS_E_SPRG8
) {
1086 env
->spr
[SPR_BOOKE_SPRG8
] = sregs
.u
.e
.sprg8
;
1089 if (sregs
.u
.e
.features
& KVM_SREGS_E_IVOR
) {
1090 env
->spr
[SPR_BOOKE_IVOR0
] = sregs
.u
.e
.ivor_low
[0];
1091 kvm_sync_excp(env
, POWERPC_EXCP_CRITICAL
, SPR_BOOKE_IVOR0
);
1092 env
->spr
[SPR_BOOKE_IVOR1
] = sregs
.u
.e
.ivor_low
[1];
1093 kvm_sync_excp(env
, POWERPC_EXCP_MCHECK
, SPR_BOOKE_IVOR1
);
1094 env
->spr
[SPR_BOOKE_IVOR2
] = sregs
.u
.e
.ivor_low
[2];
1095 kvm_sync_excp(env
, POWERPC_EXCP_DSI
, SPR_BOOKE_IVOR2
);
1096 env
->spr
[SPR_BOOKE_IVOR3
] = sregs
.u
.e
.ivor_low
[3];
1097 kvm_sync_excp(env
, POWERPC_EXCP_ISI
, SPR_BOOKE_IVOR3
);
1098 env
->spr
[SPR_BOOKE_IVOR4
] = sregs
.u
.e
.ivor_low
[4];
1099 kvm_sync_excp(env
, POWERPC_EXCP_EXTERNAL
, SPR_BOOKE_IVOR4
);
1100 env
->spr
[SPR_BOOKE_IVOR5
] = sregs
.u
.e
.ivor_low
[5];
1101 kvm_sync_excp(env
, POWERPC_EXCP_ALIGN
, SPR_BOOKE_IVOR5
);
1102 env
->spr
[SPR_BOOKE_IVOR6
] = sregs
.u
.e
.ivor_low
[6];
1103 kvm_sync_excp(env
, POWERPC_EXCP_PROGRAM
, SPR_BOOKE_IVOR6
);
1104 env
->spr
[SPR_BOOKE_IVOR7
] = sregs
.u
.e
.ivor_low
[7];
1105 kvm_sync_excp(env
, POWERPC_EXCP_FPU
, SPR_BOOKE_IVOR7
);
1106 env
->spr
[SPR_BOOKE_IVOR8
] = sregs
.u
.e
.ivor_low
[8];
1107 kvm_sync_excp(env
, POWERPC_EXCP_SYSCALL
, SPR_BOOKE_IVOR8
);
1108 env
->spr
[SPR_BOOKE_IVOR9
] = sregs
.u
.e
.ivor_low
[9];
1109 kvm_sync_excp(env
, POWERPC_EXCP_APU
, SPR_BOOKE_IVOR9
);
1110 env
->spr
[SPR_BOOKE_IVOR10
] = sregs
.u
.e
.ivor_low
[10];
1111 kvm_sync_excp(env
, POWERPC_EXCP_DECR
, SPR_BOOKE_IVOR10
);
1112 env
->spr
[SPR_BOOKE_IVOR11
] = sregs
.u
.e
.ivor_low
[11];
1113 kvm_sync_excp(env
, POWERPC_EXCP_FIT
, SPR_BOOKE_IVOR11
);
1114 env
->spr
[SPR_BOOKE_IVOR12
] = sregs
.u
.e
.ivor_low
[12];
1115 kvm_sync_excp(env
, POWERPC_EXCP_WDT
, SPR_BOOKE_IVOR12
);
1116 env
->spr
[SPR_BOOKE_IVOR13
] = sregs
.u
.e
.ivor_low
[13];
1117 kvm_sync_excp(env
, POWERPC_EXCP_DTLB
, SPR_BOOKE_IVOR13
);
1118 env
->spr
[SPR_BOOKE_IVOR14
] = sregs
.u
.e
.ivor_low
[14];
1119 kvm_sync_excp(env
, POWERPC_EXCP_ITLB
, SPR_BOOKE_IVOR14
);
1120 env
->spr
[SPR_BOOKE_IVOR15
] = sregs
.u
.e
.ivor_low
[15];
1121 kvm_sync_excp(env
, POWERPC_EXCP_DEBUG
, SPR_BOOKE_IVOR15
);
1123 if (sregs
.u
.e
.features
& KVM_SREGS_E_SPE
) {
1124 env
->spr
[SPR_BOOKE_IVOR32
] = sregs
.u
.e
.ivor_high
[0];
1125 kvm_sync_excp(env
, POWERPC_EXCP_SPEU
, SPR_BOOKE_IVOR32
);
1126 env
->spr
[SPR_BOOKE_IVOR33
] = sregs
.u
.e
.ivor_high
[1];
1127 kvm_sync_excp(env
, POWERPC_EXCP_EFPDI
, SPR_BOOKE_IVOR33
);
1128 env
->spr
[SPR_BOOKE_IVOR34
] = sregs
.u
.e
.ivor_high
[2];
1129 kvm_sync_excp(env
, POWERPC_EXCP_EFPRI
, SPR_BOOKE_IVOR34
);
1132 if (sregs
.u
.e
.features
& KVM_SREGS_E_PM
) {
1133 env
->spr
[SPR_BOOKE_IVOR35
] = sregs
.u
.e
.ivor_high
[3];
1134 kvm_sync_excp(env
, POWERPC_EXCP_EPERFM
, SPR_BOOKE_IVOR35
);
1137 if (sregs
.u
.e
.features
& KVM_SREGS_E_PC
) {
1138 env
->spr
[SPR_BOOKE_IVOR36
] = sregs
.u
.e
.ivor_high
[4];
1139 kvm_sync_excp(env
, POWERPC_EXCP_DOORI
, SPR_BOOKE_IVOR36
);
1140 env
->spr
[SPR_BOOKE_IVOR37
] = sregs
.u
.e
.ivor_high
[5];
1141 kvm_sync_excp(env
, POWERPC_EXCP_DOORCI
, SPR_BOOKE_IVOR37
);
1145 if (sregs
.u
.e
.features
& KVM_SREGS_E_ARCH206_MMU
) {
1146 env
->spr
[SPR_BOOKE_MAS0
] = sregs
.u
.e
.mas0
;
1147 env
->spr
[SPR_BOOKE_MAS1
] = sregs
.u
.e
.mas1
;
1148 env
->spr
[SPR_BOOKE_MAS2
] = sregs
.u
.e
.mas2
;
1149 env
->spr
[SPR_BOOKE_MAS3
] = sregs
.u
.e
.mas7_3
& 0xffffffff;
1150 env
->spr
[SPR_BOOKE_MAS4
] = sregs
.u
.e
.mas4
;
1151 env
->spr
[SPR_BOOKE_MAS6
] = sregs
.u
.e
.mas6
;
1152 env
->spr
[SPR_BOOKE_MAS7
] = sregs
.u
.e
.mas7_3
>> 32;
1153 env
->spr
[SPR_MMUCFG
] = sregs
.u
.e
.mmucfg
;
1154 env
->spr
[SPR_BOOKE_TLB0CFG
] = sregs
.u
.e
.tlbcfg
[0];
1155 env
->spr
[SPR_BOOKE_TLB1CFG
] = sregs
.u
.e
.tlbcfg
[1];
1158 if (sregs
.u
.e
.features
& KVM_SREGS_EXP
) {
1159 env
->spr
[SPR_BOOKE_EPR
] = sregs
.u
.e
.epr
;
1162 if (sregs
.u
.e
.features
& KVM_SREGS_E_PD
) {
1163 env
->spr
[SPR_BOOKE_EPLC
] = sregs
.u
.e
.eplc
;
1164 env
->spr
[SPR_BOOKE_EPSC
] = sregs
.u
.e
.epsc
;
1167 if (sregs
.u
.e
.impl_id
== KVM_SREGS_E_IMPL_FSL
) {
1168 env
->spr
[SPR_E500_SVR
] = sregs
.u
.e
.impl
.fsl
.svr
;
1169 env
->spr
[SPR_Exxx_MCAR
] = sregs
.u
.e
.impl
.fsl
.mcar
;
1170 env
->spr
[SPR_HID0
] = sregs
.u
.e
.impl
.fsl
.hid0
;
1172 if (sregs
.u
.e
.impl
.fsl
.features
& KVM_SREGS_E_FSL_PIDn
) {
1173 env
->spr
[SPR_BOOKE_PID1
] = sregs
.u
.e
.impl
.fsl
.pid1
;
1174 env
->spr
[SPR_BOOKE_PID2
] = sregs
.u
.e
.impl
.fsl
.pid2
;
1181 static int kvmppc_get_books_sregs(PowerPCCPU
*cpu
)
1183 CPUPPCState
*env
= &cpu
->env
;
1184 struct kvm_sregs sregs
;
1188 ret
= kvm_vcpu_ioctl(CPU(cpu
), KVM_GET_SREGS
, &sregs
);
1193 if (!env
->external_htab
) {
1194 ppc_store_sdr1(env
, sregs
.u
.s
.sdr1
);
1200 * The packed SLB array we get from KVM_GET_SREGS only contains
1201 * information about valid entries. So we flush our internal copy
1202 * to get rid of stale ones, then put all valid SLB entries back
1205 memset(env
->slb
, 0, sizeof(env
->slb
));
1206 for (i
= 0; i
< ARRAY_SIZE(env
->slb
); i
++) {
1207 target_ulong rb
= sregs
.u
.s
.ppc64
.slb
[i
].slbe
;
1208 target_ulong rs
= sregs
.u
.s
.ppc64
.slb
[i
].slbv
;
1210 * Only restore valid entries
1212 if (rb
& SLB_ESID_V
) {
1213 ppc_store_slb(cpu
, rb
& 0xfff, rb
& ~0xfffULL
, rs
);
1219 for (i
= 0; i
< 16; i
++) {
1220 env
->sr
[i
] = sregs
.u
.s
.ppc32
.sr
[i
];
1224 for (i
= 0; i
< 8; i
++) {
1225 env
->DBAT
[0][i
] = sregs
.u
.s
.ppc32
.dbat
[i
] & 0xffffffff;
1226 env
->DBAT
[1][i
] = sregs
.u
.s
.ppc32
.dbat
[i
] >> 32;
1227 env
->IBAT
[0][i
] = sregs
.u
.s
.ppc32
.ibat
[i
] & 0xffffffff;
1228 env
->IBAT
[1][i
] = sregs
.u
.s
.ppc32
.ibat
[i
] >> 32;
1234 int kvm_arch_get_registers(CPUState
*cs
)
1236 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
1237 CPUPPCState
*env
= &cpu
->env
;
1238 struct kvm_regs regs
;
1242 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_REGS
, ®s
);
1247 for (i
= 7; i
>= 0; i
--) {
1248 env
->crf
[i
] = cr
& 15;
1252 env
->ctr
= regs
.ctr
;
1254 cpu_write_xer(env
, regs
.xer
);
1255 env
->msr
= regs
.msr
;
1258 env
->spr
[SPR_SRR0
] = regs
.srr0
;
1259 env
->spr
[SPR_SRR1
] = regs
.srr1
;
1261 env
->spr
[SPR_SPRG0
] = regs
.sprg0
;
1262 env
->spr
[SPR_SPRG1
] = regs
.sprg1
;
1263 env
->spr
[SPR_SPRG2
] = regs
.sprg2
;
1264 env
->spr
[SPR_SPRG3
] = regs
.sprg3
;
1265 env
->spr
[SPR_SPRG4
] = regs
.sprg4
;
1266 env
->spr
[SPR_SPRG5
] = regs
.sprg5
;
1267 env
->spr
[SPR_SPRG6
] = regs
.sprg6
;
1268 env
->spr
[SPR_SPRG7
] = regs
.sprg7
;
1270 env
->spr
[SPR_BOOKE_PID
] = regs
.pid
;
1272 for (i
= 0;i
< 32; i
++)
1273 env
->gpr
[i
] = regs
.gpr
[i
];
1277 if (cap_booke_sregs
) {
1278 ret
= kvmppc_get_booke_sregs(cpu
);
1285 ret
= kvmppc_get_books_sregs(cpu
);
1292 kvm_get_one_spr(cs
, KVM_REG_PPC_HIOR
, SPR_HIOR
);
1298 /* We deliberately ignore errors here, for kernels which have
1299 * the ONE_REG calls, but don't support the specific
1300 * registers, there's a reasonable chance things will still
1301 * work, at least until we try to migrate. */
1302 for (i
= 0; i
< 1024; i
++) {
1303 uint64_t id
= env
->spr_cb
[i
].one_reg_id
;
1306 kvm_get_one_spr(cs
, id
, i
);
1312 for (i
= 0; i
< ARRAY_SIZE(env
->tm_gpr
); i
++) {
1313 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_GPR(i
), &env
->tm_gpr
[i
]);
1315 for (i
= 0; i
< ARRAY_SIZE(env
->tm_vsr
); i
++) {
1316 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_VSR(i
), &env
->tm_vsr
[i
]);
1318 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_CR
, &env
->tm_cr
);
1319 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_LR
, &env
->tm_lr
);
1320 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_CTR
, &env
->tm_ctr
);
1321 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_FPSCR
, &env
->tm_fpscr
);
1322 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_AMR
, &env
->tm_amr
);
1323 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_PPR
, &env
->tm_ppr
);
1324 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_VRSAVE
, &env
->tm_vrsave
);
1325 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_VSCR
, &env
->tm_vscr
);
1326 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_DSCR
, &env
->tm_dscr
);
1327 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_TAR
, &env
->tm_tar
);
1331 if (kvm_get_vpa(cs
) < 0) {
1332 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1336 kvm_get_one_reg(cs
, KVM_REG_PPC_TB_OFFSET
, &env
->tb_env
->tb_offset
);
1343 int kvmppc_set_interrupt(PowerPCCPU
*cpu
, int irq
, int level
)
1345 unsigned virq
= level
? KVM_INTERRUPT_SET_LEVEL
: KVM_INTERRUPT_UNSET
;
1347 if (irq
!= PPC_INTERRUPT_EXT
) {
1351 if (!kvm_enabled() || !cap_interrupt_unset
|| !cap_interrupt_level
) {
1355 kvm_vcpu_ioctl(CPU(cpu
), KVM_INTERRUPT
, &virq
);
1360 #if defined(TARGET_PPCEMB)
1361 #define PPC_INPUT_INT PPC40x_INPUT_INT
1362 #elif defined(TARGET_PPC64)
1363 #define PPC_INPUT_INT PPC970_INPUT_INT
1365 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1368 void kvm_arch_pre_run(CPUState
*cs
, struct kvm_run
*run
)
1370 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
1371 CPUPPCState
*env
= &cpu
->env
;
1375 qemu_mutex_lock_iothread();
1377 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1378 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1379 if (!cap_interrupt_level
&&
1380 run
->ready_for_interrupt_injection
&&
1381 (cs
->interrupt_request
& CPU_INTERRUPT_HARD
) &&
1382 (env
->irq_input_state
& (1<<PPC_INPUT_INT
)))
1384 /* For now KVM disregards the 'irq' argument. However, in the
1385 * future KVM could cache it in-kernel to avoid a heavyweight exit
1386 * when reading the UIC.
1388 irq
= KVM_INTERRUPT_SET
;
1390 DPRINTF("injected interrupt %d\n", irq
);
1391 r
= kvm_vcpu_ioctl(cs
, KVM_INTERRUPT
, &irq
);
1393 printf("cpu %d fail inject %x\n", cs
->cpu_index
, irq
);
1396 /* Always wake up soon in case the interrupt was level based */
1397 timer_mod(idle_timer
, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) +
1398 (NANOSECONDS_PER_SECOND
/ 50));
1401 /* We don't know if there are more interrupts pending after this. However,
1402 * the guest will return to userspace in the course of handling this one
1403 * anyways, so we will get a chance to deliver the rest. */
1405 qemu_mutex_unlock_iothread();
1408 MemTxAttrs
kvm_arch_post_run(CPUState
*cs
, struct kvm_run
*run
)
1410 return MEMTXATTRS_UNSPECIFIED
;
1413 int kvm_arch_process_async_events(CPUState
*cs
)
1418 static int kvmppc_handle_halt(PowerPCCPU
*cpu
)
1420 CPUState
*cs
= CPU(cpu
);
1421 CPUPPCState
*env
= &cpu
->env
;
1423 if (!(cs
->interrupt_request
& CPU_INTERRUPT_HARD
) && (msr_ee
)) {
1425 cs
->exception_index
= EXCP_HLT
;
1431 /* map dcr access to existing qemu dcr emulation */
1432 static int kvmppc_handle_dcr_read(CPUPPCState
*env
, uint32_t dcrn
, uint32_t *data
)
1434 if (ppc_dcr_read(env
->dcr_env
, dcrn
, data
) < 0)
1435 fprintf(stderr
, "Read to unhandled DCR (0x%x)\n", dcrn
);
1440 static int kvmppc_handle_dcr_write(CPUPPCState
*env
, uint32_t dcrn
, uint32_t data
)
1442 if (ppc_dcr_write(env
->dcr_env
, dcrn
, data
) < 0)
1443 fprintf(stderr
, "Write to unhandled DCR (0x%x)\n", dcrn
);
1448 int kvm_arch_insert_sw_breakpoint(CPUState
*cs
, struct kvm_sw_breakpoint
*bp
)
1450 /* Mixed endian case is not handled */
1451 uint32_t sc
= debug_inst_opcode
;
1453 if (cpu_memory_rw_debug(cs
, bp
->pc
, (uint8_t *)&bp
->saved_insn
,
1455 cpu_memory_rw_debug(cs
, bp
->pc
, (uint8_t *)&sc
, sizeof(sc
), 1)) {
1462 int kvm_arch_remove_sw_breakpoint(CPUState
*cs
, struct kvm_sw_breakpoint
*bp
)
1466 if (cpu_memory_rw_debug(cs
, bp
->pc
, (uint8_t *)&sc
, sizeof(sc
), 0) ||
1467 sc
!= debug_inst_opcode
||
1468 cpu_memory_rw_debug(cs
, bp
->pc
, (uint8_t *)&bp
->saved_insn
,
1476 static int find_hw_breakpoint(target_ulong addr
, int type
)
1480 assert((nb_hw_breakpoint
+ nb_hw_watchpoint
)
1481 <= ARRAY_SIZE(hw_debug_points
));
1483 for (n
= 0; n
< nb_hw_breakpoint
+ nb_hw_watchpoint
; n
++) {
1484 if (hw_debug_points
[n
].addr
== addr
&&
1485 hw_debug_points
[n
].type
== type
) {
1493 static int find_hw_watchpoint(target_ulong addr
, int *flag
)
1497 n
= find_hw_breakpoint(addr
, GDB_WATCHPOINT_ACCESS
);
1499 *flag
= BP_MEM_ACCESS
;
1503 n
= find_hw_breakpoint(addr
, GDB_WATCHPOINT_WRITE
);
1505 *flag
= BP_MEM_WRITE
;
1509 n
= find_hw_breakpoint(addr
, GDB_WATCHPOINT_READ
);
1511 *flag
= BP_MEM_READ
;
1518 int kvm_arch_insert_hw_breakpoint(target_ulong addr
,
1519 target_ulong len
, int type
)
1521 if ((nb_hw_breakpoint
+ nb_hw_watchpoint
) >= ARRAY_SIZE(hw_debug_points
)) {
1525 hw_debug_points
[nb_hw_breakpoint
+ nb_hw_watchpoint
].addr
= addr
;
1526 hw_debug_points
[nb_hw_breakpoint
+ nb_hw_watchpoint
].type
= type
;
1529 case GDB_BREAKPOINT_HW
:
1530 if (nb_hw_breakpoint
>= max_hw_breakpoint
) {
1534 if (find_hw_breakpoint(addr
, type
) >= 0) {
1541 case GDB_WATCHPOINT_WRITE
:
1542 case GDB_WATCHPOINT_READ
:
1543 case GDB_WATCHPOINT_ACCESS
:
1544 if (nb_hw_watchpoint
>= max_hw_watchpoint
) {
1548 if (find_hw_breakpoint(addr
, type
) >= 0) {
1562 int kvm_arch_remove_hw_breakpoint(target_ulong addr
,
1563 target_ulong len
, int type
)
1567 n
= find_hw_breakpoint(addr
, type
);
1573 case GDB_BREAKPOINT_HW
:
1577 case GDB_WATCHPOINT_WRITE
:
1578 case GDB_WATCHPOINT_READ
:
1579 case GDB_WATCHPOINT_ACCESS
:
1586 hw_debug_points
[n
] = hw_debug_points
[nb_hw_breakpoint
+ nb_hw_watchpoint
];
1591 void kvm_arch_remove_all_hw_breakpoints(void)
1593 nb_hw_breakpoint
= nb_hw_watchpoint
= 0;
1596 void kvm_arch_update_guest_debug(CPUState
*cs
, struct kvm_guest_debug
*dbg
)
1600 /* Software Breakpoint updates */
1601 if (kvm_sw_breakpoints_active(cs
)) {
1602 dbg
->control
|= KVM_GUESTDBG_ENABLE
| KVM_GUESTDBG_USE_SW_BP
;
1605 assert((nb_hw_breakpoint
+ nb_hw_watchpoint
)
1606 <= ARRAY_SIZE(hw_debug_points
));
1607 assert((nb_hw_breakpoint
+ nb_hw_watchpoint
) <= ARRAY_SIZE(dbg
->arch
.bp
));
1609 if (nb_hw_breakpoint
+ nb_hw_watchpoint
> 0) {
1610 dbg
->control
|= KVM_GUESTDBG_ENABLE
| KVM_GUESTDBG_USE_HW_BP
;
1611 memset(dbg
->arch
.bp
, 0, sizeof(dbg
->arch
.bp
));
1612 for (n
= 0; n
< nb_hw_breakpoint
+ nb_hw_watchpoint
; n
++) {
1613 switch (hw_debug_points
[n
].type
) {
1614 case GDB_BREAKPOINT_HW
:
1615 dbg
->arch
.bp
[n
].type
= KVMPPC_DEBUG_BREAKPOINT
;
1617 case GDB_WATCHPOINT_WRITE
:
1618 dbg
->arch
.bp
[n
].type
= KVMPPC_DEBUG_WATCH_WRITE
;
1620 case GDB_WATCHPOINT_READ
:
1621 dbg
->arch
.bp
[n
].type
= KVMPPC_DEBUG_WATCH_READ
;
1623 case GDB_WATCHPOINT_ACCESS
:
1624 dbg
->arch
.bp
[n
].type
= KVMPPC_DEBUG_WATCH_WRITE
|
1625 KVMPPC_DEBUG_WATCH_READ
;
1628 cpu_abort(cs
, "Unsupported breakpoint type\n");
1630 dbg
->arch
.bp
[n
].addr
= hw_debug_points
[n
].addr
;
1635 static int kvm_handle_debug(PowerPCCPU
*cpu
, struct kvm_run
*run
)
1637 CPUState
*cs
= CPU(cpu
);
1638 CPUPPCState
*env
= &cpu
->env
;
1639 struct kvm_debug_exit_arch
*arch_info
= &run
->debug
.arch
;
1644 if (cs
->singlestep_enabled
) {
1646 } else if (arch_info
->status
) {
1647 if (nb_hw_breakpoint
+ nb_hw_watchpoint
> 0) {
1648 if (arch_info
->status
& KVMPPC_DEBUG_BREAKPOINT
) {
1649 n
= find_hw_breakpoint(arch_info
->address
, GDB_BREAKPOINT_HW
);
1653 } else if (arch_info
->status
& (KVMPPC_DEBUG_WATCH_READ
|
1654 KVMPPC_DEBUG_WATCH_WRITE
)) {
1655 n
= find_hw_watchpoint(arch_info
->address
, &flag
);
1658 cs
->watchpoint_hit
= &hw_watchpoint
;
1659 hw_watchpoint
.vaddr
= hw_debug_points
[n
].addr
;
1660 hw_watchpoint
.flags
= flag
;
1664 } else if (kvm_find_sw_breakpoint(cs
, arch_info
->address
)) {
1667 /* QEMU is not able to handle debug exception, so inject
1668 * program exception to guest;
1669 * Yes program exception NOT debug exception !!
1670 * When QEMU is using debug resources then debug exception must
1671 * be always set. To achieve this we set MSR_DE and also set
1672 * MSRP_DEP so guest cannot change MSR_DE.
1673 * When emulating debug resource for guest we want guest
1674 * to control MSR_DE (enable/disable debug interrupt on need).
1675 * Supporting both configurations are NOT possible.
1676 * So the result is that we cannot share debug resources
1677 * between QEMU and Guest on BOOKE architecture.
1678 * In the current design QEMU gets the priority over guest,
1679 * this means that if QEMU is using debug resources then guest
1681 * For software breakpoint QEMU uses a privileged instruction;
1682 * So there cannot be any reason that we are here for guest
1683 * set debug exception, only possibility is guest executed a
1684 * privileged / illegal instruction and that's why we are
1685 * injecting a program interrupt.
1688 cpu_synchronize_state(cs
);
1689 /* env->nip is PC, so increment this by 4 to use
1690 * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1693 cs
->exception_index
= POWERPC_EXCP_PROGRAM
;
1694 env
->error_code
= POWERPC_EXCP_INVAL
;
1695 ppc_cpu_do_interrupt(cs
);
1701 int kvm_arch_handle_exit(CPUState
*cs
, struct kvm_run
*run
)
1703 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
1704 CPUPPCState
*env
= &cpu
->env
;
1707 qemu_mutex_lock_iothread();
1709 switch (run
->exit_reason
) {
1711 if (run
->dcr
.is_write
) {
1712 DPRINTF("handle dcr write\n");
1713 ret
= kvmppc_handle_dcr_write(env
, run
->dcr
.dcrn
, run
->dcr
.data
);
1715 DPRINTF("handle dcr read\n");
1716 ret
= kvmppc_handle_dcr_read(env
, run
->dcr
.dcrn
, &run
->dcr
.data
);
1720 DPRINTF("handle halt\n");
1721 ret
= kvmppc_handle_halt(cpu
);
1723 #if defined(TARGET_PPC64)
1724 case KVM_EXIT_PAPR_HCALL
:
1725 DPRINTF("handle PAPR hypercall\n");
1726 run
->papr_hcall
.ret
= spapr_hypercall(cpu
,
1728 run
->papr_hcall
.args
);
1733 DPRINTF("handle epr\n");
1734 run
->epr
.epr
= ldl_phys(cs
->as
, env
->mpic_iack
);
1737 case KVM_EXIT_WATCHDOG
:
1738 DPRINTF("handle watchdog expiry\n");
1739 watchdog_perform_action();
1743 case KVM_EXIT_DEBUG
:
1744 DPRINTF("handle debug exception\n");
1745 if (kvm_handle_debug(cpu
, run
)) {
1749 /* re-enter, this exception was guest-internal */
1754 fprintf(stderr
, "KVM: unknown exit reason %d\n", run
->exit_reason
);
1759 qemu_mutex_unlock_iothread();
1763 int kvmppc_or_tsr_bits(PowerPCCPU
*cpu
, uint32_t tsr_bits
)
1765 CPUState
*cs
= CPU(cpu
);
1766 uint32_t bits
= tsr_bits
;
1767 struct kvm_one_reg reg
= {
1768 .id
= KVM_REG_PPC_OR_TSR
,
1769 .addr
= (uintptr_t) &bits
,
1772 return kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
1775 int kvmppc_clear_tsr_bits(PowerPCCPU
*cpu
, uint32_t tsr_bits
)
1778 CPUState
*cs
= CPU(cpu
);
1779 uint32_t bits
= tsr_bits
;
1780 struct kvm_one_reg reg
= {
1781 .id
= KVM_REG_PPC_CLEAR_TSR
,
1782 .addr
= (uintptr_t) &bits
,
1785 return kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
1788 int kvmppc_set_tcr(PowerPCCPU
*cpu
)
1790 CPUState
*cs
= CPU(cpu
);
1791 CPUPPCState
*env
= &cpu
->env
;
1792 uint32_t tcr
= env
->spr
[SPR_BOOKE_TCR
];
1794 struct kvm_one_reg reg
= {
1795 .id
= KVM_REG_PPC_TCR
,
1796 .addr
= (uintptr_t) &tcr
,
1799 return kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
1802 int kvmppc_booke_watchdog_enable(PowerPCCPU
*cpu
)
1804 CPUState
*cs
= CPU(cpu
);
1807 if (!kvm_enabled()) {
1811 if (!cap_ppc_watchdog
) {
1812 printf("warning: KVM does not support watchdog");
1816 ret
= kvm_vcpu_enable_cap(cs
, KVM_CAP_PPC_BOOKE_WATCHDOG
, 0);
1818 fprintf(stderr
, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1819 __func__
, strerror(-ret
));
1826 static int read_cpuinfo(const char *field
, char *value
, int len
)
1830 int field_len
= strlen(field
);
1833 f
= fopen("/proc/cpuinfo", "r");
1839 if (!fgets(line
, sizeof(line
), f
)) {
1842 if (!strncmp(line
, field
, field_len
)) {
1843 pstrcpy(value
, len
, line
);
1854 uint32_t kvmppc_get_tbfreq(void)
1858 uint32_t retval
= NANOSECONDS_PER_SECOND
;
1860 if (read_cpuinfo("timebase", line
, sizeof(line
))) {
1864 if (!(ns
= strchr(line
, ':'))) {
1873 bool kvmppc_get_host_serial(char **value
)
1875 return g_file_get_contents("/proc/device-tree/system-id", value
, NULL
,
1879 bool kvmppc_get_host_model(char **value
)
1881 return g_file_get_contents("/proc/device-tree/model", value
, NULL
, NULL
);
1884 /* Try to find a device tree node for a CPU with clock-frequency property */
1885 static int kvmppc_find_cpu_dt(char *buf
, int buf_len
)
1887 struct dirent
*dirp
;
1890 if ((dp
= opendir(PROC_DEVTREE_CPU
)) == NULL
) {
1891 printf("Can't open directory " PROC_DEVTREE_CPU
"\n");
1896 while ((dirp
= readdir(dp
)) != NULL
) {
1898 snprintf(buf
, buf_len
, "%s%s/clock-frequency", PROC_DEVTREE_CPU
,
1900 f
= fopen(buf
, "r");
1902 snprintf(buf
, buf_len
, "%s%s", PROC_DEVTREE_CPU
, dirp
->d_name
);
1909 if (buf
[0] == '\0') {
1910 printf("Unknown host!\n");
1917 static uint64_t kvmppc_read_int_dt(const char *filename
)
1926 f
= fopen(filename
, "rb");
1931 len
= fread(&u
, 1, sizeof(u
), f
);
1935 /* property is a 32-bit quantity */
1936 return be32_to_cpu(u
.v32
);
1938 return be64_to_cpu(u
.v64
);
1944 /* Read a CPU node property from the host device tree that's a single
1945 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1946 * (can't find or open the property, or doesn't understand the
1948 static uint64_t kvmppc_read_int_cpu_dt(const char *propname
)
1950 char buf
[PATH_MAX
], *tmp
;
1953 if (kvmppc_find_cpu_dt(buf
, sizeof(buf
))) {
1957 tmp
= g_strdup_printf("%s/%s", buf
, propname
);
1958 val
= kvmppc_read_int_dt(tmp
);
1964 uint64_t kvmppc_get_clockfreq(void)
1966 return kvmppc_read_int_cpu_dt("clock-frequency");
1969 uint32_t kvmppc_get_vmx(void)
1971 return kvmppc_read_int_cpu_dt("ibm,vmx");
1974 uint32_t kvmppc_get_dfp(void)
1976 return kvmppc_read_int_cpu_dt("ibm,dfp");
1979 static int kvmppc_get_pvinfo(CPUPPCState
*env
, struct kvm_ppc_pvinfo
*pvinfo
)
1981 PowerPCCPU
*cpu
= ppc_env_get_cpu(env
);
1982 CPUState
*cs
= CPU(cpu
);
1984 if (kvm_vm_check_extension(cs
->kvm_state
, KVM_CAP_PPC_GET_PVINFO
) &&
1985 !kvm_vm_ioctl(cs
->kvm_state
, KVM_PPC_GET_PVINFO
, pvinfo
)) {
1992 int kvmppc_get_hasidle(CPUPPCState
*env
)
1994 struct kvm_ppc_pvinfo pvinfo
;
1996 if (!kvmppc_get_pvinfo(env
, &pvinfo
) &&
1997 (pvinfo
.flags
& KVM_PPC_PVINFO_FLAGS_EV_IDLE
)) {
2004 int kvmppc_get_hypercall(CPUPPCState
*env
, uint8_t *buf
, int buf_len
)
2006 uint32_t *hc
= (uint32_t*)buf
;
2007 struct kvm_ppc_pvinfo pvinfo
;
2009 if (!kvmppc_get_pvinfo(env
, &pvinfo
)) {
2010 memcpy(buf
, pvinfo
.hcall
, buf_len
);
2015 * Fallback to always fail hypercalls regardless of endianness:
2017 * tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
2019 * b .+8 (becomes nop in wrong endian)
2020 * bswap32(li r3, -1)
2023 hc
[0] = cpu_to_be32(0x08000048);
2024 hc
[1] = cpu_to_be32(0x3860ffff);
2025 hc
[2] = cpu_to_be32(0x48000008);
2026 hc
[3] = cpu_to_be32(bswap32(0x3860ffff));
2031 static inline int kvmppc_enable_hcall(KVMState
*s
, target_ulong hcall
)
2033 return kvm_vm_enable_cap(s
, KVM_CAP_PPC_ENABLE_HCALL
, 0, hcall
, 1);
2036 void kvmppc_enable_logical_ci_hcalls(void)
2039 * FIXME: it would be nice if we could detect the cases where
2040 * we're using a device which requires the in kernel
2041 * implementation of these hcalls, but the kernel lacks them and
2042 * produce a warning.
2044 kvmppc_enable_hcall(kvm_state
, H_LOGICAL_CI_LOAD
);
2045 kvmppc_enable_hcall(kvm_state
, H_LOGICAL_CI_STORE
);
2048 void kvmppc_enable_set_mode_hcall(void)
2050 kvmppc_enable_hcall(kvm_state
, H_SET_MODE
);
2053 void kvmppc_set_papr(PowerPCCPU
*cpu
)
2055 CPUState
*cs
= CPU(cpu
);
2058 ret
= kvm_vcpu_enable_cap(cs
, KVM_CAP_PPC_PAPR
, 0);
2060 error_report("This vCPU type or KVM version does not support PAPR");
2064 /* Update the capability flag so we sync the right information
2069 int kvmppc_set_compat(PowerPCCPU
*cpu
, uint32_t cpu_version
)
2071 return kvm_set_one_reg(CPU(cpu
), KVM_REG_PPC_ARCH_COMPAT
, &cpu_version
);
2074 void kvmppc_set_mpic_proxy(PowerPCCPU
*cpu
, int mpic_proxy
)
2076 CPUState
*cs
= CPU(cpu
);
2079 ret
= kvm_vcpu_enable_cap(cs
, KVM_CAP_PPC_EPR
, 0, mpic_proxy
);
2080 if (ret
&& mpic_proxy
) {
2081 error_report("This KVM version does not support EPR");
2086 int kvmppc_smt_threads(void)
2088 return cap_ppc_smt
? cap_ppc_smt
: 1;
2092 off_t
kvmppc_alloc_rma(void **rma
)
2096 struct kvm_allocate_rma ret
;
2098 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2099 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2100 * not necessary on this hardware
2101 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2103 * FIXME: We should allow the user to force contiguous RMA
2104 * allocation in the cap_ppc_rma==1 case.
2106 if (cap_ppc_rma
< 2) {
2110 fd
= kvm_vm_ioctl(kvm_state
, KVM_ALLOCATE_RMA
, &ret
);
2112 fprintf(stderr
, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2117 size
= MIN(ret
.rma_size
, 256ul << 20);
2119 *rma
= mmap(NULL
, size
, PROT_READ
|PROT_WRITE
, MAP_SHARED
, fd
, 0);
2120 if (*rma
== MAP_FAILED
) {
2121 fprintf(stderr
, "KVM: Error mapping RMA: %s\n", strerror(errno
));
2128 uint64_t kvmppc_rma_size(uint64_t current_size
, unsigned int hash_shift
)
2130 struct kvm_ppc_smmu_info info
;
2131 long rampagesize
, best_page_shift
;
2134 if (cap_ppc_rma
>= 2) {
2135 return current_size
;
2138 /* Find the largest hardware supported page size that's less than
2139 * or equal to the (logical) backing page size of guest RAM */
2140 kvm_get_smmu_info(POWERPC_CPU(first_cpu
), &info
);
2141 rampagesize
= getrampagesize();
2142 best_page_shift
= 0;
2144 for (i
= 0; i
< KVM_PPC_PAGE_SIZES_MAX_SZ
; i
++) {
2145 struct kvm_ppc_one_seg_page_size
*sps
= &info
.sps
[i
];
2147 if (!sps
->page_shift
) {
2151 if ((sps
->page_shift
> best_page_shift
)
2152 && ((1UL << sps
->page_shift
) <= rampagesize
)) {
2153 best_page_shift
= sps
->page_shift
;
2157 return MIN(current_size
,
2158 1ULL << (best_page_shift
+ hash_shift
- 7));
2162 bool kvmppc_spapr_use_multitce(void)
2164 return cap_spapr_multitce
;
2167 void *kvmppc_create_spapr_tce(uint32_t liobn
, uint32_t window_size
, int *pfd
,
2170 struct kvm_create_spapr_tce args
= {
2172 .window_size
= window_size
,
2178 /* Must set fd to -1 so we don't try to munmap when called for
2179 * destroying the table, which the upper layers -will- do
2182 if (!cap_spapr_tce
|| (need_vfio
&& !cap_spapr_vfio
)) {
2186 fd
= kvm_vm_ioctl(kvm_state
, KVM_CREATE_SPAPR_TCE
, &args
);
2188 fprintf(stderr
, "KVM: Failed to create TCE table for liobn 0x%x\n",
2193 len
= (window_size
/ SPAPR_TCE_PAGE_SIZE
) * sizeof(uint64_t);
2194 /* FIXME: round this up to page size */
2196 table
= mmap(NULL
, len
, PROT_READ
|PROT_WRITE
, MAP_SHARED
, fd
, 0);
2197 if (table
== MAP_FAILED
) {
2198 fprintf(stderr
, "KVM: Failed to map TCE table for liobn 0x%x\n",
2208 int kvmppc_remove_spapr_tce(void *table
, int fd
, uint32_t nb_table
)
2216 len
= nb_table
* sizeof(uint64_t);
2217 if ((munmap(table
, len
) < 0) ||
2219 fprintf(stderr
, "KVM: Unexpected error removing TCE table: %s",
2221 /* Leak the table */
2227 int kvmppc_reset_htab(int shift_hint
)
2229 uint32_t shift
= shift_hint
;
2231 if (!kvm_enabled()) {
2232 /* Full emulation, tell caller to allocate htab itself */
2235 if (kvm_check_extension(kvm_state
, KVM_CAP_PPC_ALLOC_HTAB
)) {
2237 ret
= kvm_vm_ioctl(kvm_state
, KVM_PPC_ALLOCATE_HTAB
, &shift
);
2238 if (ret
== -ENOTTY
) {
2239 /* At least some versions of PR KVM advertise the
2240 * capability, but don't implement the ioctl(). Oops.
2241 * Return 0 so that we allocate the htab in qemu, as is
2242 * correct for PR. */
2244 } else if (ret
< 0) {
2250 /* We have a kernel that predates the htab reset calls. For PR
2251 * KVM, we need to allocate the htab ourselves, for an HV KVM of
2252 * this era, it has allocated a 16MB fixed size hash table
2253 * already. Kernels of this era have the GET_PVINFO capability
2254 * only on PR, so we use this hack to determine the right
2256 if (kvm_check_extension(kvm_state
, KVM_CAP_PPC_GET_PVINFO
)) {
2257 /* PR - tell caller to allocate htab */
2260 /* HV - assume 16MB kernel allocated htab */
2265 static inline uint32_t mfpvr(void)
2274 static void alter_insns(uint64_t *word
, uint64_t flags
, bool on
)
2283 static void kvmppc_host_cpu_initfn(Object
*obj
)
2285 assert(kvm_enabled());
2288 static void kvmppc_host_cpu_class_init(ObjectClass
*oc
, void *data
)
2290 DeviceClass
*dc
= DEVICE_CLASS(oc
);
2291 PowerPCCPUClass
*pcc
= POWERPC_CPU_CLASS(oc
);
2292 uint32_t vmx
= kvmppc_get_vmx();
2293 uint32_t dfp
= kvmppc_get_dfp();
2294 uint32_t dcache_size
= kvmppc_read_int_cpu_dt("d-cache-size");
2295 uint32_t icache_size
= kvmppc_read_int_cpu_dt("i-cache-size");
2297 /* Now fix up the class with information we can query from the host */
2301 /* Only override when we know what the host supports */
2302 alter_insns(&pcc
->insns_flags
, PPC_ALTIVEC
, vmx
> 0);
2303 alter_insns(&pcc
->insns_flags2
, PPC2_VSX
, vmx
> 1);
2306 /* Only override when we know what the host supports */
2307 alter_insns(&pcc
->insns_flags2
, PPC2_DFP
, dfp
);
2310 if (dcache_size
!= -1) {
2311 pcc
->l1_dcache_size
= dcache_size
;
2314 if (icache_size
!= -1) {
2315 pcc
->l1_icache_size
= icache_size
;
2318 /* Reason: kvmppc_host_cpu_initfn() dies when !kvm_enabled() */
2319 dc
->cannot_destroy_with_object_finalize_yet
= true;
2322 bool kvmppc_has_cap_epr(void)
2327 bool kvmppc_has_cap_htab_fd(void)
2332 bool kvmppc_has_cap_fixup_hcalls(void)
2334 return cap_fixup_hcalls
;
2337 static PowerPCCPUClass
*ppc_cpu_get_family_class(PowerPCCPUClass
*pcc
)
2339 ObjectClass
*oc
= OBJECT_CLASS(pcc
);
2341 while (oc
&& !object_class_is_abstract(oc
)) {
2342 oc
= object_class_get_parent(oc
);
2346 return POWERPC_CPU_CLASS(oc
);
2349 PowerPCCPUClass
*kvm_ppc_get_host_cpu_class(void)
2351 uint32_t host_pvr
= mfpvr();
2352 PowerPCCPUClass
*pvr_pcc
;
2354 pvr_pcc
= ppc_cpu_class_by_pvr(host_pvr
);
2355 if (pvr_pcc
== NULL
) {
2356 pvr_pcc
= ppc_cpu_class_by_pvr_mask(host_pvr
);
2362 #if defined(TARGET_PPC64)
2363 static void spapr_cpu_core_host_initfn(Object
*obj
)
2365 sPAPRCPUCore
*core
= SPAPR_CPU_CORE(obj
);
2366 char *name
= g_strdup_printf("%s-" TYPE_POWERPC_CPU
, "host");
2367 ObjectClass
*oc
= object_class_by_name(name
);
2370 g_free((void *)name
);
2371 core
->cpu_class
= oc
;
2375 static int kvm_ppc_register_host_cpu_type(void)
2377 TypeInfo type_info
= {
2378 .name
= TYPE_HOST_POWERPC_CPU
,
2379 .instance_init
= kvmppc_host_cpu_initfn
,
2380 .class_init
= kvmppc_host_cpu_class_init
,
2382 PowerPCCPUClass
*pvr_pcc
;
2385 pvr_pcc
= kvm_ppc_get_host_cpu_class();
2386 if (pvr_pcc
== NULL
) {
2389 type_info
.parent
= object_class_get_name(OBJECT_CLASS(pvr_pcc
));
2390 type_register(&type_info
);
2392 #if defined(TARGET_PPC64)
2393 type_info
.name
= g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE
, "host");
2394 type_info
.parent
= TYPE_SPAPR_CPU_CORE
,
2395 type_info
.instance_size
= sizeof(sPAPRCPUCore
),
2396 type_info
.instance_init
= spapr_cpu_core_host_initfn
,
2397 type_info
.class_init
= NULL
;
2398 type_register(&type_info
);
2399 g_free((void *)type_info
.name
);
2400 type_info
.instance_size
= 0;
2401 type_info
.instance_init
= NULL
;
2404 /* Register generic family CPU class for a family */
2405 pvr_pcc
= ppc_cpu_get_family_class(pvr_pcc
);
2406 dc
= DEVICE_CLASS(pvr_pcc
);
2407 type_info
.parent
= object_class_get_name(OBJECT_CLASS(pvr_pcc
));
2408 type_info
.name
= g_strdup_printf("%s-"TYPE_POWERPC_CPU
, dc
->desc
);
2409 type_register(&type_info
);
2414 int kvmppc_define_rtas_kernel_token(uint32_t token
, const char *function
)
2416 struct kvm_rtas_token_args args
= {
2420 if (!kvm_check_extension(kvm_state
, KVM_CAP_PPC_RTAS
)) {
2424 strncpy(args
.name
, function
, sizeof(args
.name
));
2426 return kvm_vm_ioctl(kvm_state
, KVM_PPC_RTAS_DEFINE_TOKEN
, &args
);
2429 int kvmppc_get_htab_fd(bool write
)
2431 struct kvm_get_htab_fd s
= {
2432 .flags
= write
? KVM_GET_HTAB_WRITE
: 0,
2437 fprintf(stderr
, "KVM version doesn't support saving the hash table\n");
2441 return kvm_vm_ioctl(kvm_state
, KVM_PPC_GET_HTAB_FD
, &s
);
2444 int kvmppc_save_htab(QEMUFile
*f
, int fd
, size_t bufsize
, int64_t max_ns
)
2446 int64_t starttime
= qemu_clock_get_ns(QEMU_CLOCK_REALTIME
);
2447 uint8_t buf
[bufsize
];
2451 rc
= read(fd
, buf
, bufsize
);
2453 fprintf(stderr
, "Error reading data from KVM HTAB fd: %s\n",
2457 uint8_t *buffer
= buf
;
2460 struct kvm_get_htab_header
*head
=
2461 (struct kvm_get_htab_header
*) buffer
;
2462 size_t chunksize
= sizeof(*head
) +
2463 HASH_PTE_SIZE_64
* head
->n_valid
;
2465 qemu_put_be32(f
, head
->index
);
2466 qemu_put_be16(f
, head
->n_valid
);
2467 qemu_put_be16(f
, head
->n_invalid
);
2468 qemu_put_buffer(f
, (void *)(head
+ 1),
2469 HASH_PTE_SIZE_64
* head
->n_valid
);
2471 buffer
+= chunksize
;
2477 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME
) - starttime
) < max_ns
)));
2479 return (rc
== 0) ? 1 : 0;
2482 int kvmppc_load_htab_chunk(QEMUFile
*f
, int fd
, uint32_t index
,
2483 uint16_t n_valid
, uint16_t n_invalid
)
2485 struct kvm_get_htab_header
*buf
;
2486 size_t chunksize
= sizeof(*buf
) + n_valid
*HASH_PTE_SIZE_64
;
2489 buf
= alloca(chunksize
);
2491 buf
->n_valid
= n_valid
;
2492 buf
->n_invalid
= n_invalid
;
2494 qemu_get_buffer(f
, (void *)(buf
+ 1), HASH_PTE_SIZE_64
*n_valid
);
2496 rc
= write(fd
, buf
, chunksize
);
2498 fprintf(stderr
, "Error writing KVM hash table: %s\n",
2502 if (rc
!= chunksize
) {
2503 /* We should never get a short write on a single chunk */
2504 fprintf(stderr
, "Short write, restoring KVM hash table\n");
2510 bool kvm_arch_stop_on_emulation_error(CPUState
*cpu
)
2515 int kvm_arch_on_sigbus_vcpu(CPUState
*cpu
, int code
, void *addr
)
2520 int kvm_arch_on_sigbus(int code
, void *addr
)
2525 void kvm_arch_init_irq_routing(KVMState
*s
)
2529 struct kvm_get_htab_buf
{
2530 struct kvm_get_htab_header header
;
2532 * We require one extra byte for read
2534 target_ulong hpte
[(HPTES_PER_GROUP
* 2) + 1];
2537 uint64_t kvmppc_hash64_read_pteg(PowerPCCPU
*cpu
, target_ulong pte_index
)
2540 struct kvm_get_htab_fd ghf
;
2541 struct kvm_get_htab_buf
*hpte_buf
;
2544 ghf
.start_index
= pte_index
;
2545 htab_fd
= kvm_vm_ioctl(kvm_state
, KVM_PPC_GET_HTAB_FD
, &ghf
);
2550 hpte_buf
= g_malloc0(sizeof(*hpte_buf
));
2552 * Read the hpte group
2554 if (read(htab_fd
, hpte_buf
, sizeof(*hpte_buf
)) < 0) {
2559 return (uint64_t)(uintptr_t) hpte_buf
->hpte
;
2568 void kvmppc_hash64_free_pteg(uint64_t token
)
2570 struct kvm_get_htab_buf
*htab_buf
;
2572 htab_buf
= container_of((void *)(uintptr_t) token
, struct kvm_get_htab_buf
,
2578 void kvmppc_hash64_write_pte(CPUPPCState
*env
, target_ulong pte_index
,
2579 target_ulong pte0
, target_ulong pte1
)
2582 struct kvm_get_htab_fd ghf
;
2583 struct kvm_get_htab_buf hpte_buf
;
2586 ghf
.start_index
= 0; /* Ignored */
2587 htab_fd
= kvm_vm_ioctl(kvm_state
, KVM_PPC_GET_HTAB_FD
, &ghf
);
2592 hpte_buf
.header
.n_valid
= 1;
2593 hpte_buf
.header
.n_invalid
= 0;
2594 hpte_buf
.header
.index
= pte_index
;
2595 hpte_buf
.hpte
[0] = pte0
;
2596 hpte_buf
.hpte
[1] = pte1
;
2598 * Write the hpte entry.
2599 * CAUTION: write() has the warn_unused_result attribute. Hence we
2600 * need to check the return value, even though we do nothing.
2602 if (write(htab_fd
, &hpte_buf
, sizeof(hpte_buf
)) < 0) {
2614 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry
*route
,
2615 uint64_t address
, uint32_t data
, PCIDevice
*dev
)
2620 int kvm_arch_msi_data_to_gsi(uint32_t data
)
2622 return data
& 0xffff;
2625 int kvmppc_enable_hwrng(void)
2627 if (!kvm_enabled() || !kvm_check_extension(kvm_state
, KVM_CAP_PPC_HWRNG
)) {
2631 return kvmppc_enable_hcall(kvm_state
, H_RANDOM
);