2 * PowerPC implementation of KVM hooks
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
17 #include "qemu/osdep.h"
19 #include <sys/ioctl.h>
23 #include <linux/kvm.h>
25 #include "qemu-common.h"
26 #include "qemu/error-report.h"
27 #include "qemu/timer.h"
28 #include "sysemu/sysemu.h"
29 #include "sysemu/kvm.h"
32 #include "sysemu/cpus.h"
33 #include "sysemu/device_tree.h"
34 #include "mmu-hash64.h"
36 #include "hw/sysbus.h"
37 #include "hw/ppc/spapr.h"
38 #include "hw/ppc/spapr_vio.h"
39 #include "hw/ppc/ppc.h"
40 #include "sysemu/watchdog.h"
42 #include "exec/gdbstub.h"
43 #include "exec/memattrs.h"
44 #include "sysemu/hostmem.h"
45 #include "qemu/cutils.h"
50 #define DPRINTF(fmt, ...) \
51 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
53 #define DPRINTF(fmt, ...) \
57 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
59 const KVMCapabilityInfo kvm_arch_required_capabilities
[] = {
63 static int cap_interrupt_unset
= false;
64 static int cap_interrupt_level
= false;
65 static int cap_segstate
;
66 static int cap_booke_sregs
;
67 static int cap_ppc_smt
;
68 static int cap_ppc_rma
;
69 static int cap_spapr_tce
;
70 static int cap_spapr_multitce
;
71 static int cap_spapr_vfio
;
73 static int cap_one_reg
;
75 static int cap_ppc_watchdog
;
77 static int cap_htab_fd
;
78 static int cap_fixup_hcalls
;
80 static uint32_t debug_inst_opcode
;
82 /* XXX We have a race condition where we actually have a level triggered
83 * interrupt, but the infrastructure can't expose that yet, so the guest
84 * takes but ignores it, goes to sleep and never gets notified that there's
85 * still an interrupt pending.
87 * As a quick workaround, let's just wake up again 20 ms after we injected
88 * an interrupt. That way we can assure that we're always reinjecting
89 * interrupts in case the guest swallowed them.
91 static QEMUTimer
*idle_timer
;
93 static void kvm_kick_cpu(void *opaque
)
95 PowerPCCPU
*cpu
= opaque
;
97 qemu_cpu_kick(CPU(cpu
));
100 static int kvm_ppc_register_host_cpu_type(void);
102 int kvm_arch_init(MachineState
*ms
, KVMState
*s
)
104 cap_interrupt_unset
= kvm_check_extension(s
, KVM_CAP_PPC_UNSET_IRQ
);
105 cap_interrupt_level
= kvm_check_extension(s
, KVM_CAP_PPC_IRQ_LEVEL
);
106 cap_segstate
= kvm_check_extension(s
, KVM_CAP_PPC_SEGSTATE
);
107 cap_booke_sregs
= kvm_check_extension(s
, KVM_CAP_PPC_BOOKE_SREGS
);
108 cap_ppc_smt
= kvm_check_extension(s
, KVM_CAP_PPC_SMT
);
109 cap_ppc_rma
= kvm_check_extension(s
, KVM_CAP_PPC_RMA
);
110 cap_spapr_tce
= kvm_check_extension(s
, KVM_CAP_SPAPR_TCE
);
111 cap_spapr_multitce
= kvm_check_extension(s
, KVM_CAP_SPAPR_MULTITCE
);
112 cap_spapr_vfio
= false;
113 cap_one_reg
= kvm_check_extension(s
, KVM_CAP_ONE_REG
);
114 cap_hior
= kvm_check_extension(s
, KVM_CAP_PPC_HIOR
);
115 cap_epr
= kvm_check_extension(s
, KVM_CAP_PPC_EPR
);
116 cap_ppc_watchdog
= kvm_check_extension(s
, KVM_CAP_PPC_BOOKE_WATCHDOG
);
117 /* Note: we don't set cap_papr here, because this capability is
118 * only activated after this by kvmppc_set_papr() */
119 cap_htab_fd
= kvm_check_extension(s
, KVM_CAP_PPC_HTAB_FD
);
120 cap_fixup_hcalls
= kvm_check_extension(s
, KVM_CAP_PPC_FIXUP_HCALL
);
122 if (!cap_interrupt_level
) {
123 fprintf(stderr
, "KVM: Couldn't find level irq capability. Expect the "
124 "VM to stall at times!\n");
127 kvm_ppc_register_host_cpu_type();
132 static int kvm_arch_sync_sregs(PowerPCCPU
*cpu
)
134 CPUPPCState
*cenv
= &cpu
->env
;
135 CPUState
*cs
= CPU(cpu
);
136 struct kvm_sregs sregs
;
139 if (cenv
->excp_model
== POWERPC_EXCP_BOOKE
) {
140 /* What we're really trying to say is "if we're on BookE, we use
141 the native PVR for now". This is the only sane way to check
142 it though, so we potentially confuse users that they can run
143 BookE guests on BookS. Let's hope nobody dares enough :) */
147 fprintf(stderr
, "kvm error: missing PVR setting capability\n");
152 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_SREGS
, &sregs
);
157 sregs
.pvr
= cenv
->spr
[SPR_PVR
];
158 return kvm_vcpu_ioctl(cs
, KVM_SET_SREGS
, &sregs
);
161 /* Set up a shared TLB array with KVM */
162 static int kvm_booke206_tlb_init(PowerPCCPU
*cpu
)
164 CPUPPCState
*env
= &cpu
->env
;
165 CPUState
*cs
= CPU(cpu
);
166 struct kvm_book3e_206_tlb_params params
= {};
167 struct kvm_config_tlb cfg
= {};
168 unsigned int entries
= 0;
171 if (!kvm_enabled() ||
172 !kvm_check_extension(cs
->kvm_state
, KVM_CAP_SW_TLB
)) {
176 assert(ARRAY_SIZE(params
.tlb_sizes
) == BOOKE206_MAX_TLBN
);
178 for (i
= 0; i
< BOOKE206_MAX_TLBN
; i
++) {
179 params
.tlb_sizes
[i
] = booke206_tlb_size(env
, i
);
180 params
.tlb_ways
[i
] = booke206_tlb_ways(env
, i
);
181 entries
+= params
.tlb_sizes
[i
];
184 assert(entries
== env
->nb_tlb
);
185 assert(sizeof(struct kvm_book3e_206_tlb_entry
) == sizeof(ppcmas_tlb_t
));
187 env
->tlb_dirty
= true;
189 cfg
.array
= (uintptr_t)env
->tlb
.tlbm
;
190 cfg
.array_len
= sizeof(ppcmas_tlb_t
) * entries
;
191 cfg
.params
= (uintptr_t)¶ms
;
192 cfg
.mmu_type
= KVM_MMU_FSL_BOOKE_NOHV
;
194 ret
= kvm_vcpu_enable_cap(cs
, KVM_CAP_SW_TLB
, 0, (uintptr_t)&cfg
);
196 fprintf(stderr
, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
197 __func__
, strerror(-ret
));
201 env
->kvm_sw_tlb
= true;
206 #if defined(TARGET_PPC64)
207 static void kvm_get_fallback_smmu_info(PowerPCCPU
*cpu
,
208 struct kvm_ppc_smmu_info
*info
)
210 CPUPPCState
*env
= &cpu
->env
;
211 CPUState
*cs
= CPU(cpu
);
213 memset(info
, 0, sizeof(*info
));
215 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
216 * need to "guess" what the supported page sizes are.
218 * For that to work we make a few assumptions:
220 * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
221 * KVM which only supports 4K and 16M pages, but supports them
222 * regardless of the backing store characteritics. We also don't
223 * support 1T segments.
225 * This is safe as if HV KVM ever supports that capability or PR
226 * KVM grows supports for more page/segment sizes, those versions
227 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
228 * will not hit this fallback
230 * - Else we are running HV KVM. This means we only support page
231 * sizes that fit in the backing store. Additionally we only
232 * advertize 64K pages if the processor is ARCH 2.06 and we assume
233 * P7 encodings for the SLB and hash table. Here too, we assume
234 * support for any newer processor will mean a kernel that
235 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
238 if (kvm_check_extension(cs
->kvm_state
, KVM_CAP_PPC_GET_PVINFO
)) {
243 /* Standard 4k base page size segment */
244 info
->sps
[0].page_shift
= 12;
245 info
->sps
[0].slb_enc
= 0;
246 info
->sps
[0].enc
[0].page_shift
= 12;
247 info
->sps
[0].enc
[0].pte_enc
= 0;
249 /* Standard 16M large page size segment */
250 info
->sps
[1].page_shift
= 24;
251 info
->sps
[1].slb_enc
= SLB_VSID_L
;
252 info
->sps
[1].enc
[0].page_shift
= 24;
253 info
->sps
[1].enc
[0].pte_enc
= 0;
257 /* HV KVM has backing store size restrictions */
258 info
->flags
= KVM_PPC_PAGE_SIZES_REAL
;
260 if (env
->mmu_model
& POWERPC_MMU_1TSEG
) {
261 info
->flags
|= KVM_PPC_1T_SEGMENTS
;
264 if (env
->mmu_model
== POWERPC_MMU_2_06
||
265 env
->mmu_model
== POWERPC_MMU_2_07
) {
271 /* Standard 4k base page size segment */
272 info
->sps
[i
].page_shift
= 12;
273 info
->sps
[i
].slb_enc
= 0;
274 info
->sps
[i
].enc
[0].page_shift
= 12;
275 info
->sps
[i
].enc
[0].pte_enc
= 0;
278 /* 64K on MMU 2.06 and later */
279 if (env
->mmu_model
== POWERPC_MMU_2_06
||
280 env
->mmu_model
== POWERPC_MMU_2_07
) {
281 info
->sps
[i
].page_shift
= 16;
282 info
->sps
[i
].slb_enc
= 0x110;
283 info
->sps
[i
].enc
[0].page_shift
= 16;
284 info
->sps
[i
].enc
[0].pte_enc
= 1;
288 /* Standard 16M large page size segment */
289 info
->sps
[i
].page_shift
= 24;
290 info
->sps
[i
].slb_enc
= SLB_VSID_L
;
291 info
->sps
[i
].enc
[0].page_shift
= 24;
292 info
->sps
[i
].enc
[0].pte_enc
= 0;
296 static void kvm_get_smmu_info(PowerPCCPU
*cpu
, struct kvm_ppc_smmu_info
*info
)
298 CPUState
*cs
= CPU(cpu
);
301 if (kvm_check_extension(cs
->kvm_state
, KVM_CAP_PPC_GET_SMMU_INFO
)) {
302 ret
= kvm_vm_ioctl(cs
->kvm_state
, KVM_PPC_GET_SMMU_INFO
, info
);
308 kvm_get_fallback_smmu_info(cpu
, info
);
311 static long gethugepagesize(const char *mem_path
)
317 ret
= statfs(mem_path
, &fs
);
318 } while (ret
!= 0 && errno
== EINTR
);
321 fprintf(stderr
, "Couldn't statfs() memory path: %s\n",
326 #define HUGETLBFS_MAGIC 0x958458f6
328 if (fs
.f_type
!= HUGETLBFS_MAGIC
) {
329 /* Explicit mempath, but it's ordinary pages */
330 return getpagesize();
333 /* It's hugepage, return the huge page size */
338 * FIXME TOCTTOU: this iterates over memory backends' mem-path, which
339 * may or may not name the same files / on the same filesystem now as
340 * when we actually open and map them. Iterate over the file
341 * descriptors instead, and use qemu_fd_getpagesize().
343 static int find_max_supported_pagesize(Object
*obj
, void *opaque
)
346 long *hpsize_min
= opaque
;
348 if (object_dynamic_cast(obj
, TYPE_MEMORY_BACKEND
)) {
349 mem_path
= object_property_get_str(obj
, "mem-path", NULL
);
351 long hpsize
= gethugepagesize(mem_path
);
352 if (hpsize
< *hpsize_min
) {
353 *hpsize_min
= hpsize
;
356 *hpsize_min
= getpagesize();
363 static long getrampagesize(void)
365 long hpsize
= LONG_MAX
;
369 return gethugepagesize(mem_path
);
372 /* it's possible we have memory-backend objects with
373 * hugepage-backed RAM. these may get mapped into system
374 * address space via -numa parameters or memory hotplug
375 * hooks. we want to take these into account, but we
376 * also want to make sure these supported hugepage
377 * sizes are applicable across the entire range of memory
378 * we may boot from, so we take the min across all
379 * backends, and assume normal pages in cases where a
380 * backend isn't backed by hugepages.
382 memdev_root
= object_resolve_path("/objects", NULL
);
384 return getpagesize();
387 object_child_foreach(memdev_root
, find_max_supported_pagesize
, &hpsize
);
389 return (hpsize
== LONG_MAX
) ? getpagesize() : hpsize
;
392 static bool kvm_valid_page_size(uint32_t flags
, long rampgsize
, uint32_t shift
)
394 if (!(flags
& KVM_PPC_PAGE_SIZES_REAL
)) {
398 return (1ul << shift
) <= rampgsize
;
401 static void kvm_fixup_page_sizes(PowerPCCPU
*cpu
)
403 static struct kvm_ppc_smmu_info smmu_info
;
404 static bool has_smmu_info
;
405 CPUPPCState
*env
= &cpu
->env
;
409 /* We only handle page sizes for 64-bit server guests for now */
410 if (!(env
->mmu_model
& POWERPC_MMU_64
)) {
414 /* Collect MMU info from kernel if not already */
415 if (!has_smmu_info
) {
416 kvm_get_smmu_info(cpu
, &smmu_info
);
417 has_smmu_info
= true;
420 rampagesize
= getrampagesize();
422 /* Convert to QEMU form */
423 memset(&env
->sps
, 0, sizeof(env
->sps
));
425 /* If we have HV KVM, we need to forbid CI large pages if our
426 * host page size is smaller than 64K.
428 if (smmu_info
.flags
& KVM_PPC_PAGE_SIZES_REAL
) {
429 env
->ci_large_pages
= getpagesize() >= 0x10000;
433 * XXX This loop should be an entry wide AND of the capabilities that
434 * the selected CPU has with the capabilities that KVM supports.
436 for (ik
= iq
= 0; ik
< KVM_PPC_PAGE_SIZES_MAX_SZ
; ik
++) {
437 struct ppc_one_seg_page_size
*qsps
= &env
->sps
.sps
[iq
];
438 struct kvm_ppc_one_seg_page_size
*ksps
= &smmu_info
.sps
[ik
];
440 if (!kvm_valid_page_size(smmu_info
.flags
, rampagesize
,
444 qsps
->page_shift
= ksps
->page_shift
;
445 qsps
->slb_enc
= ksps
->slb_enc
;
446 for (jk
= jq
= 0; jk
< KVM_PPC_PAGE_SIZES_MAX_SZ
; jk
++) {
447 if (!kvm_valid_page_size(smmu_info
.flags
, rampagesize
,
448 ksps
->enc
[jk
].page_shift
)) {
451 qsps
->enc
[jq
].page_shift
= ksps
->enc
[jk
].page_shift
;
452 qsps
->enc
[jq
].pte_enc
= ksps
->enc
[jk
].pte_enc
;
453 if (++jq
>= PPC_PAGE_SIZES_MAX_SZ
) {
457 if (++iq
>= PPC_PAGE_SIZES_MAX_SZ
) {
461 env
->slb_nr
= smmu_info
.slb_size
;
462 if (!(smmu_info
.flags
& KVM_PPC_1T_SEGMENTS
)) {
463 env
->mmu_model
&= ~POWERPC_MMU_1TSEG
;
466 #else /* defined (TARGET_PPC64) */
468 static inline void kvm_fixup_page_sizes(PowerPCCPU
*cpu
)
472 #endif /* !defined (TARGET_PPC64) */
474 unsigned long kvm_arch_vcpu_id(CPUState
*cpu
)
476 return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu
));
479 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
480 * book3s supports only 1 watchpoint, so array size
481 * of 4 is sufficient for now.
483 #define MAX_HW_BKPTS 4
485 static struct HWBreakpoint
{
488 } hw_debug_points
[MAX_HW_BKPTS
];
490 static CPUWatchpoint hw_watchpoint
;
492 /* Default there is no breakpoint and watchpoint supported */
493 static int max_hw_breakpoint
;
494 static int max_hw_watchpoint
;
495 static int nb_hw_breakpoint
;
496 static int nb_hw_watchpoint
;
498 static void kvmppc_hw_debug_points_init(CPUPPCState
*cenv
)
500 if (cenv
->excp_model
== POWERPC_EXCP_BOOKE
) {
501 max_hw_breakpoint
= 2;
502 max_hw_watchpoint
= 2;
505 if ((max_hw_breakpoint
+ max_hw_watchpoint
) > MAX_HW_BKPTS
) {
506 fprintf(stderr
, "Error initializing h/w breakpoints\n");
511 int kvm_arch_init_vcpu(CPUState
*cs
)
513 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
514 CPUPPCState
*cenv
= &cpu
->env
;
517 /* Gather server mmu info from KVM and update the CPU state */
518 kvm_fixup_page_sizes(cpu
);
520 /* Synchronize sregs with kvm */
521 ret
= kvm_arch_sync_sregs(cpu
);
523 if (ret
== -EINVAL
) {
524 error_report("Register sync failed... If you're using kvm-hv.ko,"
525 " only \"-cpu host\" is possible");
530 idle_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL
, kvm_kick_cpu
, cpu
);
532 /* Some targets support access to KVM's guest TLB. */
533 switch (cenv
->mmu_model
) {
534 case POWERPC_MMU_BOOKE206
:
535 ret
= kvm_booke206_tlb_init(cpu
);
541 kvm_get_one_reg(cs
, KVM_REG_PPC_DEBUG_INST
, &debug_inst_opcode
);
542 kvmppc_hw_debug_points_init(cenv
);
547 static void kvm_sw_tlb_put(PowerPCCPU
*cpu
)
549 CPUPPCState
*env
= &cpu
->env
;
550 CPUState
*cs
= CPU(cpu
);
551 struct kvm_dirty_tlb dirty_tlb
;
552 unsigned char *bitmap
;
555 if (!env
->kvm_sw_tlb
) {
559 bitmap
= g_malloc((env
->nb_tlb
+ 7) / 8);
560 memset(bitmap
, 0xFF, (env
->nb_tlb
+ 7) / 8);
562 dirty_tlb
.bitmap
= (uintptr_t)bitmap
;
563 dirty_tlb
.num_dirty
= env
->nb_tlb
;
565 ret
= kvm_vcpu_ioctl(cs
, KVM_DIRTY_TLB
, &dirty_tlb
);
567 fprintf(stderr
, "%s: KVM_DIRTY_TLB: %s\n",
568 __func__
, strerror(-ret
));
574 static void kvm_get_one_spr(CPUState
*cs
, uint64_t id
, int spr
)
576 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
577 CPUPPCState
*env
= &cpu
->env
;
582 struct kvm_one_reg reg
= {
584 .addr
= (uintptr_t) &val
,
588 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
590 trace_kvm_failed_spr_get(spr
, strerror(errno
));
592 switch (id
& KVM_REG_SIZE_MASK
) {
593 case KVM_REG_SIZE_U32
:
594 env
->spr
[spr
] = val
.u32
;
597 case KVM_REG_SIZE_U64
:
598 env
->spr
[spr
] = val
.u64
;
602 /* Don't handle this size yet */
608 static void kvm_put_one_spr(CPUState
*cs
, uint64_t id
, int spr
)
610 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
611 CPUPPCState
*env
= &cpu
->env
;
616 struct kvm_one_reg reg
= {
618 .addr
= (uintptr_t) &val
,
622 switch (id
& KVM_REG_SIZE_MASK
) {
623 case KVM_REG_SIZE_U32
:
624 val
.u32
= env
->spr
[spr
];
627 case KVM_REG_SIZE_U64
:
628 val
.u64
= env
->spr
[spr
];
632 /* Don't handle this size yet */
636 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
638 trace_kvm_failed_spr_set(spr
, strerror(errno
));
642 static int kvm_put_fp(CPUState
*cs
)
644 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
645 CPUPPCState
*env
= &cpu
->env
;
646 struct kvm_one_reg reg
;
650 if (env
->insns_flags
& PPC_FLOAT
) {
651 uint64_t fpscr
= env
->fpscr
;
652 bool vsx
= !!(env
->insns_flags2
& PPC2_VSX
);
654 reg
.id
= KVM_REG_PPC_FPSCR
;
655 reg
.addr
= (uintptr_t)&fpscr
;
656 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
658 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno
));
662 for (i
= 0; i
< 32; i
++) {
665 #ifdef HOST_WORDS_BIGENDIAN
666 vsr
[0] = float64_val(env
->fpr
[i
]);
667 vsr
[1] = env
->vsr
[i
];
669 vsr
[0] = env
->vsr
[i
];
670 vsr
[1] = float64_val(env
->fpr
[i
]);
672 reg
.addr
= (uintptr_t) &vsr
;
673 reg
.id
= vsx
? KVM_REG_PPC_VSR(i
) : KVM_REG_PPC_FPR(i
);
675 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
677 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx
? "VSR" : "FPR",
684 if (env
->insns_flags
& PPC_ALTIVEC
) {
685 reg
.id
= KVM_REG_PPC_VSCR
;
686 reg
.addr
= (uintptr_t)&env
->vscr
;
687 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
689 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno
));
693 for (i
= 0; i
< 32; i
++) {
694 reg
.id
= KVM_REG_PPC_VR(i
);
695 reg
.addr
= (uintptr_t)&env
->avr
[i
];
696 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
698 DPRINTF("Unable to set VR%d to KVM: %s\n", i
, strerror(errno
));
707 static int kvm_get_fp(CPUState
*cs
)
709 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
710 CPUPPCState
*env
= &cpu
->env
;
711 struct kvm_one_reg reg
;
715 if (env
->insns_flags
& PPC_FLOAT
) {
717 bool vsx
= !!(env
->insns_flags2
& PPC2_VSX
);
719 reg
.id
= KVM_REG_PPC_FPSCR
;
720 reg
.addr
= (uintptr_t)&fpscr
;
721 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
723 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno
));
729 for (i
= 0; i
< 32; i
++) {
732 reg
.addr
= (uintptr_t) &vsr
;
733 reg
.id
= vsx
? KVM_REG_PPC_VSR(i
) : KVM_REG_PPC_FPR(i
);
735 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
737 DPRINTF("Unable to get %s%d from KVM: %s\n",
738 vsx
? "VSR" : "FPR", i
, strerror(errno
));
741 #ifdef HOST_WORDS_BIGENDIAN
742 env
->fpr
[i
] = vsr
[0];
744 env
->vsr
[i
] = vsr
[1];
747 env
->fpr
[i
] = vsr
[1];
749 env
->vsr
[i
] = vsr
[0];
756 if (env
->insns_flags
& PPC_ALTIVEC
) {
757 reg
.id
= KVM_REG_PPC_VSCR
;
758 reg
.addr
= (uintptr_t)&env
->vscr
;
759 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
761 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno
));
765 for (i
= 0; i
< 32; i
++) {
766 reg
.id
= KVM_REG_PPC_VR(i
);
767 reg
.addr
= (uintptr_t)&env
->avr
[i
];
768 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
770 DPRINTF("Unable to get VR%d from KVM: %s\n",
780 #if defined(TARGET_PPC64)
781 static int kvm_get_vpa(CPUState
*cs
)
783 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
784 CPUPPCState
*env
= &cpu
->env
;
785 struct kvm_one_reg reg
;
788 reg
.id
= KVM_REG_PPC_VPA_ADDR
;
789 reg
.addr
= (uintptr_t)&env
->vpa_addr
;
790 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
792 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno
));
796 assert((uintptr_t)&env
->slb_shadow_size
797 == ((uintptr_t)&env
->slb_shadow_addr
+ 8));
798 reg
.id
= KVM_REG_PPC_VPA_SLB
;
799 reg
.addr
= (uintptr_t)&env
->slb_shadow_addr
;
800 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
802 DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
807 assert((uintptr_t)&env
->dtl_size
== ((uintptr_t)&env
->dtl_addr
+ 8));
808 reg
.id
= KVM_REG_PPC_VPA_DTL
;
809 reg
.addr
= (uintptr_t)&env
->dtl_addr
;
810 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
812 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
820 static int kvm_put_vpa(CPUState
*cs
)
822 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
823 CPUPPCState
*env
= &cpu
->env
;
824 struct kvm_one_reg reg
;
827 /* SLB shadow or DTL can't be registered unless a master VPA is
828 * registered. That means when restoring state, if a VPA *is*
829 * registered, we need to set that up first. If not, we need to
830 * deregister the others before deregistering the master VPA */
831 assert(env
->vpa_addr
|| !(env
->slb_shadow_addr
|| env
->dtl_addr
));
834 reg
.id
= KVM_REG_PPC_VPA_ADDR
;
835 reg
.addr
= (uintptr_t)&env
->vpa_addr
;
836 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
838 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno
));
843 assert((uintptr_t)&env
->slb_shadow_size
844 == ((uintptr_t)&env
->slb_shadow_addr
+ 8));
845 reg
.id
= KVM_REG_PPC_VPA_SLB
;
846 reg
.addr
= (uintptr_t)&env
->slb_shadow_addr
;
847 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
849 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno
));
853 assert((uintptr_t)&env
->dtl_size
== ((uintptr_t)&env
->dtl_addr
+ 8));
854 reg
.id
= KVM_REG_PPC_VPA_DTL
;
855 reg
.addr
= (uintptr_t)&env
->dtl_addr
;
856 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
858 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
863 if (!env
->vpa_addr
) {
864 reg
.id
= KVM_REG_PPC_VPA_ADDR
;
865 reg
.addr
= (uintptr_t)&env
->vpa_addr
;
866 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
868 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno
));
875 #endif /* TARGET_PPC64 */
877 int kvmppc_put_books_sregs(PowerPCCPU
*cpu
)
879 CPUPPCState
*env
= &cpu
->env
;
880 struct kvm_sregs sregs
;
883 sregs
.pvr
= env
->spr
[SPR_PVR
];
885 sregs
.u
.s
.sdr1
= env
->spr
[SPR_SDR1
];
889 for (i
= 0; i
< ARRAY_SIZE(env
->slb
); i
++) {
890 sregs
.u
.s
.ppc64
.slb
[i
].slbe
= env
->slb
[i
].esid
;
891 if (env
->slb
[i
].esid
& SLB_ESID_V
) {
892 sregs
.u
.s
.ppc64
.slb
[i
].slbe
|= i
;
894 sregs
.u
.s
.ppc64
.slb
[i
].slbv
= env
->slb
[i
].vsid
;
899 for (i
= 0; i
< 16; i
++) {
900 sregs
.u
.s
.ppc32
.sr
[i
] = env
->sr
[i
];
904 for (i
= 0; i
< 8; i
++) {
905 /* Beware. We have to swap upper and lower bits here */
906 sregs
.u
.s
.ppc32
.dbat
[i
] = ((uint64_t)env
->DBAT
[0][i
] << 32)
908 sregs
.u
.s
.ppc32
.ibat
[i
] = ((uint64_t)env
->IBAT
[0][i
] << 32)
912 return kvm_vcpu_ioctl(CPU(cpu
), KVM_SET_SREGS
, &sregs
);
915 int kvm_arch_put_registers(CPUState
*cs
, int level
)
917 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
918 CPUPPCState
*env
= &cpu
->env
;
919 struct kvm_regs regs
;
923 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_REGS
, ®s
);
930 regs
.xer
= cpu_read_xer(env
);
934 regs
.srr0
= env
->spr
[SPR_SRR0
];
935 regs
.srr1
= env
->spr
[SPR_SRR1
];
937 regs
.sprg0
= env
->spr
[SPR_SPRG0
];
938 regs
.sprg1
= env
->spr
[SPR_SPRG1
];
939 regs
.sprg2
= env
->spr
[SPR_SPRG2
];
940 regs
.sprg3
= env
->spr
[SPR_SPRG3
];
941 regs
.sprg4
= env
->spr
[SPR_SPRG4
];
942 regs
.sprg5
= env
->spr
[SPR_SPRG5
];
943 regs
.sprg6
= env
->spr
[SPR_SPRG6
];
944 regs
.sprg7
= env
->spr
[SPR_SPRG7
];
946 regs
.pid
= env
->spr
[SPR_BOOKE_PID
];
948 for (i
= 0;i
< 32; i
++)
949 regs
.gpr
[i
] = env
->gpr
[i
];
952 for (i
= 0; i
< 8; i
++) {
953 regs
.cr
|= (env
->crf
[i
] & 15) << (4 * (7 - i
));
956 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_REGS
, ®s
);
962 if (env
->tlb_dirty
) {
964 env
->tlb_dirty
= false;
967 if (cap_segstate
&& (level
>= KVM_PUT_RESET_STATE
)) {
968 ret
= kvmppc_put_books_sregs(cpu
);
974 if (cap_hior
&& (level
>= KVM_PUT_RESET_STATE
)) {
975 kvm_put_one_spr(cs
, KVM_REG_PPC_HIOR
, SPR_HIOR
);
981 /* We deliberately ignore errors here, for kernels which have
982 * the ONE_REG calls, but don't support the specific
983 * registers, there's a reasonable chance things will still
984 * work, at least until we try to migrate. */
985 for (i
= 0; i
< 1024; i
++) {
986 uint64_t id
= env
->spr_cb
[i
].one_reg_id
;
989 kvm_put_one_spr(cs
, id
, i
);
995 for (i
= 0; i
< ARRAY_SIZE(env
->tm_gpr
); i
++) {
996 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_GPR(i
), &env
->tm_gpr
[i
]);
998 for (i
= 0; i
< ARRAY_SIZE(env
->tm_vsr
); i
++) {
999 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_VSR(i
), &env
->tm_vsr
[i
]);
1001 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_CR
, &env
->tm_cr
);
1002 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_LR
, &env
->tm_lr
);
1003 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_CTR
, &env
->tm_ctr
);
1004 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_FPSCR
, &env
->tm_fpscr
);
1005 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_AMR
, &env
->tm_amr
);
1006 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_PPR
, &env
->tm_ppr
);
1007 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_VRSAVE
, &env
->tm_vrsave
);
1008 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_VSCR
, &env
->tm_vscr
);
1009 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_DSCR
, &env
->tm_dscr
);
1010 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_TAR
, &env
->tm_tar
);
1014 if (kvm_put_vpa(cs
) < 0) {
1015 DPRINTF("Warning: Unable to set VPA information to KVM\n");
1019 kvm_set_one_reg(cs
, KVM_REG_PPC_TB_OFFSET
, &env
->tb_env
->tb_offset
);
1020 #endif /* TARGET_PPC64 */
1026 static void kvm_sync_excp(CPUPPCState
*env
, int vector
, int ivor
)
1028 env
->excp_vectors
[vector
] = env
->spr
[ivor
] + env
->spr
[SPR_BOOKE_IVPR
];
1031 static int kvmppc_get_booke_sregs(PowerPCCPU
*cpu
)
1033 CPUPPCState
*env
= &cpu
->env
;
1034 struct kvm_sregs sregs
;
1037 ret
= kvm_vcpu_ioctl(CPU(cpu
), KVM_GET_SREGS
, &sregs
);
1042 if (sregs
.u
.e
.features
& KVM_SREGS_E_BASE
) {
1043 env
->spr
[SPR_BOOKE_CSRR0
] = sregs
.u
.e
.csrr0
;
1044 env
->spr
[SPR_BOOKE_CSRR1
] = sregs
.u
.e
.csrr1
;
1045 env
->spr
[SPR_BOOKE_ESR
] = sregs
.u
.e
.esr
;
1046 env
->spr
[SPR_BOOKE_DEAR
] = sregs
.u
.e
.dear
;
1047 env
->spr
[SPR_BOOKE_MCSR
] = sregs
.u
.e
.mcsr
;
1048 env
->spr
[SPR_BOOKE_TSR
] = sregs
.u
.e
.tsr
;
1049 env
->spr
[SPR_BOOKE_TCR
] = sregs
.u
.e
.tcr
;
1050 env
->spr
[SPR_DECR
] = sregs
.u
.e
.dec
;
1051 env
->spr
[SPR_TBL
] = sregs
.u
.e
.tb
& 0xffffffff;
1052 env
->spr
[SPR_TBU
] = sregs
.u
.e
.tb
>> 32;
1053 env
->spr
[SPR_VRSAVE
] = sregs
.u
.e
.vrsave
;
1056 if (sregs
.u
.e
.features
& KVM_SREGS_E_ARCH206
) {
1057 env
->spr
[SPR_BOOKE_PIR
] = sregs
.u
.e
.pir
;
1058 env
->spr
[SPR_BOOKE_MCSRR0
] = sregs
.u
.e
.mcsrr0
;
1059 env
->spr
[SPR_BOOKE_MCSRR1
] = sregs
.u
.e
.mcsrr1
;
1060 env
->spr
[SPR_BOOKE_DECAR
] = sregs
.u
.e
.decar
;
1061 env
->spr
[SPR_BOOKE_IVPR
] = sregs
.u
.e
.ivpr
;
1064 if (sregs
.u
.e
.features
& KVM_SREGS_E_64
) {
1065 env
->spr
[SPR_BOOKE_EPCR
] = sregs
.u
.e
.epcr
;
1068 if (sregs
.u
.e
.features
& KVM_SREGS_E_SPRG8
) {
1069 env
->spr
[SPR_BOOKE_SPRG8
] = sregs
.u
.e
.sprg8
;
1072 if (sregs
.u
.e
.features
& KVM_SREGS_E_IVOR
) {
1073 env
->spr
[SPR_BOOKE_IVOR0
] = sregs
.u
.e
.ivor_low
[0];
1074 kvm_sync_excp(env
, POWERPC_EXCP_CRITICAL
, SPR_BOOKE_IVOR0
);
1075 env
->spr
[SPR_BOOKE_IVOR1
] = sregs
.u
.e
.ivor_low
[1];
1076 kvm_sync_excp(env
, POWERPC_EXCP_MCHECK
, SPR_BOOKE_IVOR1
);
1077 env
->spr
[SPR_BOOKE_IVOR2
] = sregs
.u
.e
.ivor_low
[2];
1078 kvm_sync_excp(env
, POWERPC_EXCP_DSI
, SPR_BOOKE_IVOR2
);
1079 env
->spr
[SPR_BOOKE_IVOR3
] = sregs
.u
.e
.ivor_low
[3];
1080 kvm_sync_excp(env
, POWERPC_EXCP_ISI
, SPR_BOOKE_IVOR3
);
1081 env
->spr
[SPR_BOOKE_IVOR4
] = sregs
.u
.e
.ivor_low
[4];
1082 kvm_sync_excp(env
, POWERPC_EXCP_EXTERNAL
, SPR_BOOKE_IVOR4
);
1083 env
->spr
[SPR_BOOKE_IVOR5
] = sregs
.u
.e
.ivor_low
[5];
1084 kvm_sync_excp(env
, POWERPC_EXCP_ALIGN
, SPR_BOOKE_IVOR5
);
1085 env
->spr
[SPR_BOOKE_IVOR6
] = sregs
.u
.e
.ivor_low
[6];
1086 kvm_sync_excp(env
, POWERPC_EXCP_PROGRAM
, SPR_BOOKE_IVOR6
);
1087 env
->spr
[SPR_BOOKE_IVOR7
] = sregs
.u
.e
.ivor_low
[7];
1088 kvm_sync_excp(env
, POWERPC_EXCP_FPU
, SPR_BOOKE_IVOR7
);
1089 env
->spr
[SPR_BOOKE_IVOR8
] = sregs
.u
.e
.ivor_low
[8];
1090 kvm_sync_excp(env
, POWERPC_EXCP_SYSCALL
, SPR_BOOKE_IVOR8
);
1091 env
->spr
[SPR_BOOKE_IVOR9
] = sregs
.u
.e
.ivor_low
[9];
1092 kvm_sync_excp(env
, POWERPC_EXCP_APU
, SPR_BOOKE_IVOR9
);
1093 env
->spr
[SPR_BOOKE_IVOR10
] = sregs
.u
.e
.ivor_low
[10];
1094 kvm_sync_excp(env
, POWERPC_EXCP_DECR
, SPR_BOOKE_IVOR10
);
1095 env
->spr
[SPR_BOOKE_IVOR11
] = sregs
.u
.e
.ivor_low
[11];
1096 kvm_sync_excp(env
, POWERPC_EXCP_FIT
, SPR_BOOKE_IVOR11
);
1097 env
->spr
[SPR_BOOKE_IVOR12
] = sregs
.u
.e
.ivor_low
[12];
1098 kvm_sync_excp(env
, POWERPC_EXCP_WDT
, SPR_BOOKE_IVOR12
);
1099 env
->spr
[SPR_BOOKE_IVOR13
] = sregs
.u
.e
.ivor_low
[13];
1100 kvm_sync_excp(env
, POWERPC_EXCP_DTLB
, SPR_BOOKE_IVOR13
);
1101 env
->spr
[SPR_BOOKE_IVOR14
] = sregs
.u
.e
.ivor_low
[14];
1102 kvm_sync_excp(env
, POWERPC_EXCP_ITLB
, SPR_BOOKE_IVOR14
);
1103 env
->spr
[SPR_BOOKE_IVOR15
] = sregs
.u
.e
.ivor_low
[15];
1104 kvm_sync_excp(env
, POWERPC_EXCP_DEBUG
, SPR_BOOKE_IVOR15
);
1106 if (sregs
.u
.e
.features
& KVM_SREGS_E_SPE
) {
1107 env
->spr
[SPR_BOOKE_IVOR32
] = sregs
.u
.e
.ivor_high
[0];
1108 kvm_sync_excp(env
, POWERPC_EXCP_SPEU
, SPR_BOOKE_IVOR32
);
1109 env
->spr
[SPR_BOOKE_IVOR33
] = sregs
.u
.e
.ivor_high
[1];
1110 kvm_sync_excp(env
, POWERPC_EXCP_EFPDI
, SPR_BOOKE_IVOR33
);
1111 env
->spr
[SPR_BOOKE_IVOR34
] = sregs
.u
.e
.ivor_high
[2];
1112 kvm_sync_excp(env
, POWERPC_EXCP_EFPRI
, SPR_BOOKE_IVOR34
);
1115 if (sregs
.u
.e
.features
& KVM_SREGS_E_PM
) {
1116 env
->spr
[SPR_BOOKE_IVOR35
] = sregs
.u
.e
.ivor_high
[3];
1117 kvm_sync_excp(env
, POWERPC_EXCP_EPERFM
, SPR_BOOKE_IVOR35
);
1120 if (sregs
.u
.e
.features
& KVM_SREGS_E_PC
) {
1121 env
->spr
[SPR_BOOKE_IVOR36
] = sregs
.u
.e
.ivor_high
[4];
1122 kvm_sync_excp(env
, POWERPC_EXCP_DOORI
, SPR_BOOKE_IVOR36
);
1123 env
->spr
[SPR_BOOKE_IVOR37
] = sregs
.u
.e
.ivor_high
[5];
1124 kvm_sync_excp(env
, POWERPC_EXCP_DOORCI
, SPR_BOOKE_IVOR37
);
1128 if (sregs
.u
.e
.features
& KVM_SREGS_E_ARCH206_MMU
) {
1129 env
->spr
[SPR_BOOKE_MAS0
] = sregs
.u
.e
.mas0
;
1130 env
->spr
[SPR_BOOKE_MAS1
] = sregs
.u
.e
.mas1
;
1131 env
->spr
[SPR_BOOKE_MAS2
] = sregs
.u
.e
.mas2
;
1132 env
->spr
[SPR_BOOKE_MAS3
] = sregs
.u
.e
.mas7_3
& 0xffffffff;
1133 env
->spr
[SPR_BOOKE_MAS4
] = sregs
.u
.e
.mas4
;
1134 env
->spr
[SPR_BOOKE_MAS6
] = sregs
.u
.e
.mas6
;
1135 env
->spr
[SPR_BOOKE_MAS7
] = sregs
.u
.e
.mas7_3
>> 32;
1136 env
->spr
[SPR_MMUCFG
] = sregs
.u
.e
.mmucfg
;
1137 env
->spr
[SPR_BOOKE_TLB0CFG
] = sregs
.u
.e
.tlbcfg
[0];
1138 env
->spr
[SPR_BOOKE_TLB1CFG
] = sregs
.u
.e
.tlbcfg
[1];
1141 if (sregs
.u
.e
.features
& KVM_SREGS_EXP
) {
1142 env
->spr
[SPR_BOOKE_EPR
] = sregs
.u
.e
.epr
;
1145 if (sregs
.u
.e
.features
& KVM_SREGS_E_PD
) {
1146 env
->spr
[SPR_BOOKE_EPLC
] = sregs
.u
.e
.eplc
;
1147 env
->spr
[SPR_BOOKE_EPSC
] = sregs
.u
.e
.epsc
;
1150 if (sregs
.u
.e
.impl_id
== KVM_SREGS_E_IMPL_FSL
) {
1151 env
->spr
[SPR_E500_SVR
] = sregs
.u
.e
.impl
.fsl
.svr
;
1152 env
->spr
[SPR_Exxx_MCAR
] = sregs
.u
.e
.impl
.fsl
.mcar
;
1153 env
->spr
[SPR_HID0
] = sregs
.u
.e
.impl
.fsl
.hid0
;
1155 if (sregs
.u
.e
.impl
.fsl
.features
& KVM_SREGS_E_FSL_PIDn
) {
1156 env
->spr
[SPR_BOOKE_PID1
] = sregs
.u
.e
.impl
.fsl
.pid1
;
1157 env
->spr
[SPR_BOOKE_PID2
] = sregs
.u
.e
.impl
.fsl
.pid2
;
1164 static int kvmppc_get_books_sregs(PowerPCCPU
*cpu
)
1166 CPUPPCState
*env
= &cpu
->env
;
1167 struct kvm_sregs sregs
;
1171 ret
= kvm_vcpu_ioctl(CPU(cpu
), KVM_GET_SREGS
, &sregs
);
1176 if (!env
->external_htab
) {
1177 ppc_store_sdr1(env
, sregs
.u
.s
.sdr1
);
1183 * The packed SLB array we get from KVM_GET_SREGS only contains
1184 * information about valid entries. So we flush our internal copy
1185 * to get rid of stale ones, then put all valid SLB entries back
1188 memset(env
->slb
, 0, sizeof(env
->slb
));
1189 for (i
= 0; i
< ARRAY_SIZE(env
->slb
); i
++) {
1190 target_ulong rb
= sregs
.u
.s
.ppc64
.slb
[i
].slbe
;
1191 target_ulong rs
= sregs
.u
.s
.ppc64
.slb
[i
].slbv
;
1193 * Only restore valid entries
1195 if (rb
& SLB_ESID_V
) {
1196 ppc_store_slb(cpu
, rb
& 0xfff, rb
& ~0xfffULL
, rs
);
1202 for (i
= 0; i
< 16; i
++) {
1203 env
->sr
[i
] = sregs
.u
.s
.ppc32
.sr
[i
];
1207 for (i
= 0; i
< 8; i
++) {
1208 env
->DBAT
[0][i
] = sregs
.u
.s
.ppc32
.dbat
[i
] & 0xffffffff;
1209 env
->DBAT
[1][i
] = sregs
.u
.s
.ppc32
.dbat
[i
] >> 32;
1210 env
->IBAT
[0][i
] = sregs
.u
.s
.ppc32
.ibat
[i
] & 0xffffffff;
1211 env
->IBAT
[1][i
] = sregs
.u
.s
.ppc32
.ibat
[i
] >> 32;
1217 int kvm_arch_get_registers(CPUState
*cs
)
1219 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
1220 CPUPPCState
*env
= &cpu
->env
;
1221 struct kvm_regs regs
;
1225 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_REGS
, ®s
);
1230 for (i
= 7; i
>= 0; i
--) {
1231 env
->crf
[i
] = cr
& 15;
1235 env
->ctr
= regs
.ctr
;
1237 cpu_write_xer(env
, regs
.xer
);
1238 env
->msr
= regs
.msr
;
1241 env
->spr
[SPR_SRR0
] = regs
.srr0
;
1242 env
->spr
[SPR_SRR1
] = regs
.srr1
;
1244 env
->spr
[SPR_SPRG0
] = regs
.sprg0
;
1245 env
->spr
[SPR_SPRG1
] = regs
.sprg1
;
1246 env
->spr
[SPR_SPRG2
] = regs
.sprg2
;
1247 env
->spr
[SPR_SPRG3
] = regs
.sprg3
;
1248 env
->spr
[SPR_SPRG4
] = regs
.sprg4
;
1249 env
->spr
[SPR_SPRG5
] = regs
.sprg5
;
1250 env
->spr
[SPR_SPRG6
] = regs
.sprg6
;
1251 env
->spr
[SPR_SPRG7
] = regs
.sprg7
;
1253 env
->spr
[SPR_BOOKE_PID
] = regs
.pid
;
1255 for (i
= 0;i
< 32; i
++)
1256 env
->gpr
[i
] = regs
.gpr
[i
];
1260 if (cap_booke_sregs
) {
1261 ret
= kvmppc_get_booke_sregs(cpu
);
1268 ret
= kvmppc_get_books_sregs(cpu
);
1275 kvm_get_one_spr(cs
, KVM_REG_PPC_HIOR
, SPR_HIOR
);
1281 /* We deliberately ignore errors here, for kernels which have
1282 * the ONE_REG calls, but don't support the specific
1283 * registers, there's a reasonable chance things will still
1284 * work, at least until we try to migrate. */
1285 for (i
= 0; i
< 1024; i
++) {
1286 uint64_t id
= env
->spr_cb
[i
].one_reg_id
;
1289 kvm_get_one_spr(cs
, id
, i
);
1295 for (i
= 0; i
< ARRAY_SIZE(env
->tm_gpr
); i
++) {
1296 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_GPR(i
), &env
->tm_gpr
[i
]);
1298 for (i
= 0; i
< ARRAY_SIZE(env
->tm_vsr
); i
++) {
1299 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_VSR(i
), &env
->tm_vsr
[i
]);
1301 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_CR
, &env
->tm_cr
);
1302 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_LR
, &env
->tm_lr
);
1303 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_CTR
, &env
->tm_ctr
);
1304 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_FPSCR
, &env
->tm_fpscr
);
1305 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_AMR
, &env
->tm_amr
);
1306 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_PPR
, &env
->tm_ppr
);
1307 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_VRSAVE
, &env
->tm_vrsave
);
1308 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_VSCR
, &env
->tm_vscr
);
1309 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_DSCR
, &env
->tm_dscr
);
1310 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_TAR
, &env
->tm_tar
);
1314 if (kvm_get_vpa(cs
) < 0) {
1315 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1319 kvm_get_one_reg(cs
, KVM_REG_PPC_TB_OFFSET
, &env
->tb_env
->tb_offset
);
1326 int kvmppc_set_interrupt(PowerPCCPU
*cpu
, int irq
, int level
)
1328 unsigned virq
= level
? KVM_INTERRUPT_SET_LEVEL
: KVM_INTERRUPT_UNSET
;
1330 if (irq
!= PPC_INTERRUPT_EXT
) {
1334 if (!kvm_enabled() || !cap_interrupt_unset
|| !cap_interrupt_level
) {
1338 kvm_vcpu_ioctl(CPU(cpu
), KVM_INTERRUPT
, &virq
);
1343 #if defined(TARGET_PPCEMB)
1344 #define PPC_INPUT_INT PPC40x_INPUT_INT
1345 #elif defined(TARGET_PPC64)
1346 #define PPC_INPUT_INT PPC970_INPUT_INT
1348 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1351 void kvm_arch_pre_run(CPUState
*cs
, struct kvm_run
*run
)
1353 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
1354 CPUPPCState
*env
= &cpu
->env
;
1358 qemu_mutex_lock_iothread();
1360 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1361 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1362 if (!cap_interrupt_level
&&
1363 run
->ready_for_interrupt_injection
&&
1364 (cs
->interrupt_request
& CPU_INTERRUPT_HARD
) &&
1365 (env
->irq_input_state
& (1<<PPC_INPUT_INT
)))
1367 /* For now KVM disregards the 'irq' argument. However, in the
1368 * future KVM could cache it in-kernel to avoid a heavyweight exit
1369 * when reading the UIC.
1371 irq
= KVM_INTERRUPT_SET
;
1373 DPRINTF("injected interrupt %d\n", irq
);
1374 r
= kvm_vcpu_ioctl(cs
, KVM_INTERRUPT
, &irq
);
1376 printf("cpu %d fail inject %x\n", cs
->cpu_index
, irq
);
1379 /* Always wake up soon in case the interrupt was level based */
1380 timer_mod(idle_timer
, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) +
1381 (NANOSECONDS_PER_SECOND
/ 50));
1384 /* We don't know if there are more interrupts pending after this. However,
1385 * the guest will return to userspace in the course of handling this one
1386 * anyways, so we will get a chance to deliver the rest. */
1388 qemu_mutex_unlock_iothread();
1391 MemTxAttrs
kvm_arch_post_run(CPUState
*cs
, struct kvm_run
*run
)
1393 return MEMTXATTRS_UNSPECIFIED
;
1396 int kvm_arch_process_async_events(CPUState
*cs
)
1401 static int kvmppc_handle_halt(PowerPCCPU
*cpu
)
1403 CPUState
*cs
= CPU(cpu
);
1404 CPUPPCState
*env
= &cpu
->env
;
1406 if (!(cs
->interrupt_request
& CPU_INTERRUPT_HARD
) && (msr_ee
)) {
1408 cs
->exception_index
= EXCP_HLT
;
1414 /* map dcr access to existing qemu dcr emulation */
1415 static int kvmppc_handle_dcr_read(CPUPPCState
*env
, uint32_t dcrn
, uint32_t *data
)
1417 if (ppc_dcr_read(env
->dcr_env
, dcrn
, data
) < 0)
1418 fprintf(stderr
, "Read to unhandled DCR (0x%x)\n", dcrn
);
1423 static int kvmppc_handle_dcr_write(CPUPPCState
*env
, uint32_t dcrn
, uint32_t data
)
1425 if (ppc_dcr_write(env
->dcr_env
, dcrn
, data
) < 0)
1426 fprintf(stderr
, "Write to unhandled DCR (0x%x)\n", dcrn
);
1431 int kvm_arch_insert_sw_breakpoint(CPUState
*cs
, struct kvm_sw_breakpoint
*bp
)
1433 /* Mixed endian case is not handled */
1434 uint32_t sc
= debug_inst_opcode
;
1436 if (cpu_memory_rw_debug(cs
, bp
->pc
, (uint8_t *)&bp
->saved_insn
,
1438 cpu_memory_rw_debug(cs
, bp
->pc
, (uint8_t *)&sc
, sizeof(sc
), 1)) {
1445 int kvm_arch_remove_sw_breakpoint(CPUState
*cs
, struct kvm_sw_breakpoint
*bp
)
1449 if (cpu_memory_rw_debug(cs
, bp
->pc
, (uint8_t *)&sc
, sizeof(sc
), 0) ||
1450 sc
!= debug_inst_opcode
||
1451 cpu_memory_rw_debug(cs
, bp
->pc
, (uint8_t *)&bp
->saved_insn
,
1459 static int find_hw_breakpoint(target_ulong addr
, int type
)
1463 assert((nb_hw_breakpoint
+ nb_hw_watchpoint
)
1464 <= ARRAY_SIZE(hw_debug_points
));
1466 for (n
= 0; n
< nb_hw_breakpoint
+ nb_hw_watchpoint
; n
++) {
1467 if (hw_debug_points
[n
].addr
== addr
&&
1468 hw_debug_points
[n
].type
== type
) {
1476 static int find_hw_watchpoint(target_ulong addr
, int *flag
)
1480 n
= find_hw_breakpoint(addr
, GDB_WATCHPOINT_ACCESS
);
1482 *flag
= BP_MEM_ACCESS
;
1486 n
= find_hw_breakpoint(addr
, GDB_WATCHPOINT_WRITE
);
1488 *flag
= BP_MEM_WRITE
;
1492 n
= find_hw_breakpoint(addr
, GDB_WATCHPOINT_READ
);
1494 *flag
= BP_MEM_READ
;
1501 int kvm_arch_insert_hw_breakpoint(target_ulong addr
,
1502 target_ulong len
, int type
)
1504 if ((nb_hw_breakpoint
+ nb_hw_watchpoint
) >= ARRAY_SIZE(hw_debug_points
)) {
1508 hw_debug_points
[nb_hw_breakpoint
+ nb_hw_watchpoint
].addr
= addr
;
1509 hw_debug_points
[nb_hw_breakpoint
+ nb_hw_watchpoint
].type
= type
;
1512 case GDB_BREAKPOINT_HW
:
1513 if (nb_hw_breakpoint
>= max_hw_breakpoint
) {
1517 if (find_hw_breakpoint(addr
, type
) >= 0) {
1524 case GDB_WATCHPOINT_WRITE
:
1525 case GDB_WATCHPOINT_READ
:
1526 case GDB_WATCHPOINT_ACCESS
:
1527 if (nb_hw_watchpoint
>= max_hw_watchpoint
) {
1531 if (find_hw_breakpoint(addr
, type
) >= 0) {
1545 int kvm_arch_remove_hw_breakpoint(target_ulong addr
,
1546 target_ulong len
, int type
)
1550 n
= find_hw_breakpoint(addr
, type
);
1556 case GDB_BREAKPOINT_HW
:
1560 case GDB_WATCHPOINT_WRITE
:
1561 case GDB_WATCHPOINT_READ
:
1562 case GDB_WATCHPOINT_ACCESS
:
1569 hw_debug_points
[n
] = hw_debug_points
[nb_hw_breakpoint
+ nb_hw_watchpoint
];
1574 void kvm_arch_remove_all_hw_breakpoints(void)
1576 nb_hw_breakpoint
= nb_hw_watchpoint
= 0;
1579 void kvm_arch_update_guest_debug(CPUState
*cs
, struct kvm_guest_debug
*dbg
)
1583 /* Software Breakpoint updates */
1584 if (kvm_sw_breakpoints_active(cs
)) {
1585 dbg
->control
|= KVM_GUESTDBG_ENABLE
| KVM_GUESTDBG_USE_SW_BP
;
1588 assert((nb_hw_breakpoint
+ nb_hw_watchpoint
)
1589 <= ARRAY_SIZE(hw_debug_points
));
1590 assert((nb_hw_breakpoint
+ nb_hw_watchpoint
) <= ARRAY_SIZE(dbg
->arch
.bp
));
1592 if (nb_hw_breakpoint
+ nb_hw_watchpoint
> 0) {
1593 dbg
->control
|= KVM_GUESTDBG_ENABLE
| KVM_GUESTDBG_USE_HW_BP
;
1594 memset(dbg
->arch
.bp
, 0, sizeof(dbg
->arch
.bp
));
1595 for (n
= 0; n
< nb_hw_breakpoint
+ nb_hw_watchpoint
; n
++) {
1596 switch (hw_debug_points
[n
].type
) {
1597 case GDB_BREAKPOINT_HW
:
1598 dbg
->arch
.bp
[n
].type
= KVMPPC_DEBUG_BREAKPOINT
;
1600 case GDB_WATCHPOINT_WRITE
:
1601 dbg
->arch
.bp
[n
].type
= KVMPPC_DEBUG_WATCH_WRITE
;
1603 case GDB_WATCHPOINT_READ
:
1604 dbg
->arch
.bp
[n
].type
= KVMPPC_DEBUG_WATCH_READ
;
1606 case GDB_WATCHPOINT_ACCESS
:
1607 dbg
->arch
.bp
[n
].type
= KVMPPC_DEBUG_WATCH_WRITE
|
1608 KVMPPC_DEBUG_WATCH_READ
;
1611 cpu_abort(cs
, "Unsupported breakpoint type\n");
1613 dbg
->arch
.bp
[n
].addr
= hw_debug_points
[n
].addr
;
1618 static int kvm_handle_debug(PowerPCCPU
*cpu
, struct kvm_run
*run
)
1620 CPUState
*cs
= CPU(cpu
);
1621 CPUPPCState
*env
= &cpu
->env
;
1622 struct kvm_debug_exit_arch
*arch_info
= &run
->debug
.arch
;
1627 if (cs
->singlestep_enabled
) {
1629 } else if (arch_info
->status
) {
1630 if (nb_hw_breakpoint
+ nb_hw_watchpoint
> 0) {
1631 if (arch_info
->status
& KVMPPC_DEBUG_BREAKPOINT
) {
1632 n
= find_hw_breakpoint(arch_info
->address
, GDB_BREAKPOINT_HW
);
1636 } else if (arch_info
->status
& (KVMPPC_DEBUG_WATCH_READ
|
1637 KVMPPC_DEBUG_WATCH_WRITE
)) {
1638 n
= find_hw_watchpoint(arch_info
->address
, &flag
);
1641 cs
->watchpoint_hit
= &hw_watchpoint
;
1642 hw_watchpoint
.vaddr
= hw_debug_points
[n
].addr
;
1643 hw_watchpoint
.flags
= flag
;
1647 } else if (kvm_find_sw_breakpoint(cs
, arch_info
->address
)) {
1650 /* QEMU is not able to handle debug exception, so inject
1651 * program exception to guest;
1652 * Yes program exception NOT debug exception !!
1653 * When QEMU is using debug resources then debug exception must
1654 * be always set. To achieve this we set MSR_DE and also set
1655 * MSRP_DEP so guest cannot change MSR_DE.
1656 * When emulating debug resource for guest we want guest
1657 * to control MSR_DE (enable/disable debug interrupt on need).
1658 * Supporting both configurations are NOT possible.
1659 * So the result is that we cannot share debug resources
1660 * between QEMU and Guest on BOOKE architecture.
1661 * In the current design QEMU gets the priority over guest,
1662 * this means that if QEMU is using debug resources then guest
1664 * For software breakpoint QEMU uses a privileged instruction;
1665 * So there cannot be any reason that we are here for guest
1666 * set debug exception, only possibility is guest executed a
1667 * privileged / illegal instruction and that's why we are
1668 * injecting a program interrupt.
1671 cpu_synchronize_state(cs
);
1672 /* env->nip is PC, so increment this by 4 to use
1673 * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1676 cs
->exception_index
= POWERPC_EXCP_PROGRAM
;
1677 env
->error_code
= POWERPC_EXCP_INVAL
;
1678 ppc_cpu_do_interrupt(cs
);
1684 int kvm_arch_handle_exit(CPUState
*cs
, struct kvm_run
*run
)
1686 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
1687 CPUPPCState
*env
= &cpu
->env
;
1690 qemu_mutex_lock_iothread();
1692 switch (run
->exit_reason
) {
1694 if (run
->dcr
.is_write
) {
1695 DPRINTF("handle dcr write\n");
1696 ret
= kvmppc_handle_dcr_write(env
, run
->dcr
.dcrn
, run
->dcr
.data
);
1698 DPRINTF("handle dcr read\n");
1699 ret
= kvmppc_handle_dcr_read(env
, run
->dcr
.dcrn
, &run
->dcr
.data
);
1703 DPRINTF("handle halt\n");
1704 ret
= kvmppc_handle_halt(cpu
);
1706 #if defined(TARGET_PPC64)
1707 case KVM_EXIT_PAPR_HCALL
:
1708 DPRINTF("handle PAPR hypercall\n");
1709 run
->papr_hcall
.ret
= spapr_hypercall(cpu
,
1711 run
->papr_hcall
.args
);
1716 DPRINTF("handle epr\n");
1717 run
->epr
.epr
= ldl_phys(cs
->as
, env
->mpic_iack
);
1720 case KVM_EXIT_WATCHDOG
:
1721 DPRINTF("handle watchdog expiry\n");
1722 watchdog_perform_action();
1726 case KVM_EXIT_DEBUG
:
1727 DPRINTF("handle debug exception\n");
1728 if (kvm_handle_debug(cpu
, run
)) {
1732 /* re-enter, this exception was guest-internal */
1737 fprintf(stderr
, "KVM: unknown exit reason %d\n", run
->exit_reason
);
1742 qemu_mutex_unlock_iothread();
1746 int kvmppc_or_tsr_bits(PowerPCCPU
*cpu
, uint32_t tsr_bits
)
1748 CPUState
*cs
= CPU(cpu
);
1749 uint32_t bits
= tsr_bits
;
1750 struct kvm_one_reg reg
= {
1751 .id
= KVM_REG_PPC_OR_TSR
,
1752 .addr
= (uintptr_t) &bits
,
1755 return kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
1758 int kvmppc_clear_tsr_bits(PowerPCCPU
*cpu
, uint32_t tsr_bits
)
1761 CPUState
*cs
= CPU(cpu
);
1762 uint32_t bits
= tsr_bits
;
1763 struct kvm_one_reg reg
= {
1764 .id
= KVM_REG_PPC_CLEAR_TSR
,
1765 .addr
= (uintptr_t) &bits
,
1768 return kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
1771 int kvmppc_set_tcr(PowerPCCPU
*cpu
)
1773 CPUState
*cs
= CPU(cpu
);
1774 CPUPPCState
*env
= &cpu
->env
;
1775 uint32_t tcr
= env
->spr
[SPR_BOOKE_TCR
];
1777 struct kvm_one_reg reg
= {
1778 .id
= KVM_REG_PPC_TCR
,
1779 .addr
= (uintptr_t) &tcr
,
1782 return kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
1785 int kvmppc_booke_watchdog_enable(PowerPCCPU
*cpu
)
1787 CPUState
*cs
= CPU(cpu
);
1790 if (!kvm_enabled()) {
1794 if (!cap_ppc_watchdog
) {
1795 printf("warning: KVM does not support watchdog");
1799 ret
= kvm_vcpu_enable_cap(cs
, KVM_CAP_PPC_BOOKE_WATCHDOG
, 0);
1801 fprintf(stderr
, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1802 __func__
, strerror(-ret
));
1809 static int read_cpuinfo(const char *field
, char *value
, int len
)
1813 int field_len
= strlen(field
);
1816 f
= fopen("/proc/cpuinfo", "r");
1822 if (!fgets(line
, sizeof(line
), f
)) {
1825 if (!strncmp(line
, field
, field_len
)) {
1826 pstrcpy(value
, len
, line
);
1837 uint32_t kvmppc_get_tbfreq(void)
1841 uint32_t retval
= NANOSECONDS_PER_SECOND
;
1843 if (read_cpuinfo("timebase", line
, sizeof(line
))) {
1847 if (!(ns
= strchr(line
, ':'))) {
1856 bool kvmppc_get_host_serial(char **value
)
1858 return g_file_get_contents("/proc/device-tree/system-id", value
, NULL
,
1862 bool kvmppc_get_host_model(char **value
)
1864 return g_file_get_contents("/proc/device-tree/model", value
, NULL
, NULL
);
1867 /* Try to find a device tree node for a CPU with clock-frequency property */
1868 static int kvmppc_find_cpu_dt(char *buf
, int buf_len
)
1870 struct dirent
*dirp
;
1873 if ((dp
= opendir(PROC_DEVTREE_CPU
)) == NULL
) {
1874 printf("Can't open directory " PROC_DEVTREE_CPU
"\n");
1879 while ((dirp
= readdir(dp
)) != NULL
) {
1881 snprintf(buf
, buf_len
, "%s%s/clock-frequency", PROC_DEVTREE_CPU
,
1883 f
= fopen(buf
, "r");
1885 snprintf(buf
, buf_len
, "%s%s", PROC_DEVTREE_CPU
, dirp
->d_name
);
1892 if (buf
[0] == '\0') {
1893 printf("Unknown host!\n");
1900 static uint64_t kvmppc_read_int_dt(const char *filename
)
1909 f
= fopen(filename
, "rb");
1914 len
= fread(&u
, 1, sizeof(u
), f
);
1918 /* property is a 32-bit quantity */
1919 return be32_to_cpu(u
.v32
);
1921 return be64_to_cpu(u
.v64
);
1927 /* Read a CPU node property from the host device tree that's a single
1928 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1929 * (can't find or open the property, or doesn't understand the
1931 static uint64_t kvmppc_read_int_cpu_dt(const char *propname
)
1933 char buf
[PATH_MAX
], *tmp
;
1936 if (kvmppc_find_cpu_dt(buf
, sizeof(buf
))) {
1940 tmp
= g_strdup_printf("%s/%s", buf
, propname
);
1941 val
= kvmppc_read_int_dt(tmp
);
1947 uint64_t kvmppc_get_clockfreq(void)
1949 return kvmppc_read_int_cpu_dt("clock-frequency");
1952 uint32_t kvmppc_get_vmx(void)
1954 return kvmppc_read_int_cpu_dt("ibm,vmx");
1957 uint32_t kvmppc_get_dfp(void)
1959 return kvmppc_read_int_cpu_dt("ibm,dfp");
1962 static int kvmppc_get_pvinfo(CPUPPCState
*env
, struct kvm_ppc_pvinfo
*pvinfo
)
1964 PowerPCCPU
*cpu
= ppc_env_get_cpu(env
);
1965 CPUState
*cs
= CPU(cpu
);
1967 if (kvm_vm_check_extension(cs
->kvm_state
, KVM_CAP_PPC_GET_PVINFO
) &&
1968 !kvm_vm_ioctl(cs
->kvm_state
, KVM_PPC_GET_PVINFO
, pvinfo
)) {
1975 int kvmppc_get_hasidle(CPUPPCState
*env
)
1977 struct kvm_ppc_pvinfo pvinfo
;
1979 if (!kvmppc_get_pvinfo(env
, &pvinfo
) &&
1980 (pvinfo
.flags
& KVM_PPC_PVINFO_FLAGS_EV_IDLE
)) {
1987 int kvmppc_get_hypercall(CPUPPCState
*env
, uint8_t *buf
, int buf_len
)
1989 uint32_t *hc
= (uint32_t*)buf
;
1990 struct kvm_ppc_pvinfo pvinfo
;
1992 if (!kvmppc_get_pvinfo(env
, &pvinfo
)) {
1993 memcpy(buf
, pvinfo
.hcall
, buf_len
);
1998 * Fallback to always fail hypercalls regardless of endianness:
2000 * tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
2002 * b .+8 (becomes nop in wrong endian)
2003 * bswap32(li r3, -1)
2006 hc
[0] = cpu_to_be32(0x08000048);
2007 hc
[1] = cpu_to_be32(0x3860ffff);
2008 hc
[2] = cpu_to_be32(0x48000008);
2009 hc
[3] = cpu_to_be32(bswap32(0x3860ffff));
2014 static inline int kvmppc_enable_hcall(KVMState
*s
, target_ulong hcall
)
2016 return kvm_vm_enable_cap(s
, KVM_CAP_PPC_ENABLE_HCALL
, 0, hcall
, 1);
2019 void kvmppc_enable_logical_ci_hcalls(void)
2022 * FIXME: it would be nice if we could detect the cases where
2023 * we're using a device which requires the in kernel
2024 * implementation of these hcalls, but the kernel lacks them and
2025 * produce a warning.
2027 kvmppc_enable_hcall(kvm_state
, H_LOGICAL_CI_LOAD
);
2028 kvmppc_enable_hcall(kvm_state
, H_LOGICAL_CI_STORE
);
2031 void kvmppc_enable_set_mode_hcall(void)
2033 kvmppc_enable_hcall(kvm_state
, H_SET_MODE
);
2036 void kvmppc_set_papr(PowerPCCPU
*cpu
)
2038 CPUState
*cs
= CPU(cpu
);
2041 ret
= kvm_vcpu_enable_cap(cs
, KVM_CAP_PPC_PAPR
, 0);
2043 error_report("This vCPU type or KVM version does not support PAPR");
2047 /* Update the capability flag so we sync the right information
2052 int kvmppc_set_compat(PowerPCCPU
*cpu
, uint32_t cpu_version
)
2054 return kvm_set_one_reg(CPU(cpu
), KVM_REG_PPC_ARCH_COMPAT
, &cpu_version
);
2057 void kvmppc_set_mpic_proxy(PowerPCCPU
*cpu
, int mpic_proxy
)
2059 CPUState
*cs
= CPU(cpu
);
2062 ret
= kvm_vcpu_enable_cap(cs
, KVM_CAP_PPC_EPR
, 0, mpic_proxy
);
2063 if (ret
&& mpic_proxy
) {
2064 error_report("This KVM version does not support EPR");
2069 int kvmppc_smt_threads(void)
2071 return cap_ppc_smt
? cap_ppc_smt
: 1;
2075 off_t
kvmppc_alloc_rma(void **rma
)
2079 struct kvm_allocate_rma ret
;
2081 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2082 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2083 * not necessary on this hardware
2084 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2086 * FIXME: We should allow the user to force contiguous RMA
2087 * allocation in the cap_ppc_rma==1 case.
2089 if (cap_ppc_rma
< 2) {
2093 fd
= kvm_vm_ioctl(kvm_state
, KVM_ALLOCATE_RMA
, &ret
);
2095 fprintf(stderr
, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2100 size
= MIN(ret
.rma_size
, 256ul << 20);
2102 *rma
= mmap(NULL
, size
, PROT_READ
|PROT_WRITE
, MAP_SHARED
, fd
, 0);
2103 if (*rma
== MAP_FAILED
) {
2104 fprintf(stderr
, "KVM: Error mapping RMA: %s\n", strerror(errno
));
2111 uint64_t kvmppc_rma_size(uint64_t current_size
, unsigned int hash_shift
)
2113 struct kvm_ppc_smmu_info info
;
2114 long rampagesize
, best_page_shift
;
2117 if (cap_ppc_rma
>= 2) {
2118 return current_size
;
2121 /* Find the largest hardware supported page size that's less than
2122 * or equal to the (logical) backing page size of guest RAM */
2123 kvm_get_smmu_info(POWERPC_CPU(first_cpu
), &info
);
2124 rampagesize
= getrampagesize();
2125 best_page_shift
= 0;
2127 for (i
= 0; i
< KVM_PPC_PAGE_SIZES_MAX_SZ
; i
++) {
2128 struct kvm_ppc_one_seg_page_size
*sps
= &info
.sps
[i
];
2130 if (!sps
->page_shift
) {
2134 if ((sps
->page_shift
> best_page_shift
)
2135 && ((1UL << sps
->page_shift
) <= rampagesize
)) {
2136 best_page_shift
= sps
->page_shift
;
2140 return MIN(current_size
,
2141 1ULL << (best_page_shift
+ hash_shift
- 7));
2145 bool kvmppc_spapr_use_multitce(void)
2147 return cap_spapr_multitce
;
2150 void *kvmppc_create_spapr_tce(uint32_t liobn
, uint32_t window_size
, int *pfd
,
2153 struct kvm_create_spapr_tce args
= {
2155 .window_size
= window_size
,
2161 /* Must set fd to -1 so we don't try to munmap when called for
2162 * destroying the table, which the upper layers -will- do
2165 if (!cap_spapr_tce
|| (need_vfio
&& !cap_spapr_vfio
)) {
2169 fd
= kvm_vm_ioctl(kvm_state
, KVM_CREATE_SPAPR_TCE
, &args
);
2171 fprintf(stderr
, "KVM: Failed to create TCE table for liobn 0x%x\n",
2176 len
= (window_size
/ SPAPR_TCE_PAGE_SIZE
) * sizeof(uint64_t);
2177 /* FIXME: round this up to page size */
2179 table
= mmap(NULL
, len
, PROT_READ
|PROT_WRITE
, MAP_SHARED
, fd
, 0);
2180 if (table
== MAP_FAILED
) {
2181 fprintf(stderr
, "KVM: Failed to map TCE table for liobn 0x%x\n",
2191 int kvmppc_remove_spapr_tce(void *table
, int fd
, uint32_t nb_table
)
2199 len
= nb_table
* sizeof(uint64_t);
2200 if ((munmap(table
, len
) < 0) ||
2202 fprintf(stderr
, "KVM: Unexpected error removing TCE table: %s",
2204 /* Leak the table */
2210 int kvmppc_reset_htab(int shift_hint
)
2212 uint32_t shift
= shift_hint
;
2214 if (!kvm_enabled()) {
2215 /* Full emulation, tell caller to allocate htab itself */
2218 if (kvm_check_extension(kvm_state
, KVM_CAP_PPC_ALLOC_HTAB
)) {
2220 ret
= kvm_vm_ioctl(kvm_state
, KVM_PPC_ALLOCATE_HTAB
, &shift
);
2221 if (ret
== -ENOTTY
) {
2222 /* At least some versions of PR KVM advertise the
2223 * capability, but don't implement the ioctl(). Oops.
2224 * Return 0 so that we allocate the htab in qemu, as is
2225 * correct for PR. */
2227 } else if (ret
< 0) {
2233 /* We have a kernel that predates the htab reset calls. For PR
2234 * KVM, we need to allocate the htab ourselves, for an HV KVM of
2235 * this era, it has allocated a 16MB fixed size hash table
2236 * already. Kernels of this era have the GET_PVINFO capability
2237 * only on PR, so we use this hack to determine the right
2239 if (kvm_check_extension(kvm_state
, KVM_CAP_PPC_GET_PVINFO
)) {
2240 /* PR - tell caller to allocate htab */
2243 /* HV - assume 16MB kernel allocated htab */
2248 static inline uint32_t mfpvr(void)
2257 static void alter_insns(uint64_t *word
, uint64_t flags
, bool on
)
2266 static void kvmppc_host_cpu_initfn(Object
*obj
)
2268 assert(kvm_enabled());
2271 static void kvmppc_host_cpu_class_init(ObjectClass
*oc
, void *data
)
2273 DeviceClass
*dc
= DEVICE_CLASS(oc
);
2274 PowerPCCPUClass
*pcc
= POWERPC_CPU_CLASS(oc
);
2275 uint32_t vmx
= kvmppc_get_vmx();
2276 uint32_t dfp
= kvmppc_get_dfp();
2277 uint32_t dcache_size
= kvmppc_read_int_cpu_dt("d-cache-size");
2278 uint32_t icache_size
= kvmppc_read_int_cpu_dt("i-cache-size");
2280 /* Now fix up the class with information we can query from the host */
2284 /* Only override when we know what the host supports */
2285 alter_insns(&pcc
->insns_flags
, PPC_ALTIVEC
, vmx
> 0);
2286 alter_insns(&pcc
->insns_flags2
, PPC2_VSX
, vmx
> 1);
2289 /* Only override when we know what the host supports */
2290 alter_insns(&pcc
->insns_flags2
, PPC2_DFP
, dfp
);
2293 if (dcache_size
!= -1) {
2294 pcc
->l1_dcache_size
= dcache_size
;
2297 if (icache_size
!= -1) {
2298 pcc
->l1_icache_size
= icache_size
;
2301 /* Reason: kvmppc_host_cpu_initfn() dies when !kvm_enabled() */
2302 dc
->cannot_destroy_with_object_finalize_yet
= true;
2305 bool kvmppc_has_cap_epr(void)
2310 bool kvmppc_has_cap_htab_fd(void)
2315 bool kvmppc_has_cap_fixup_hcalls(void)
2317 return cap_fixup_hcalls
;
2320 static PowerPCCPUClass
*ppc_cpu_get_family_class(PowerPCCPUClass
*pcc
)
2322 ObjectClass
*oc
= OBJECT_CLASS(pcc
);
2324 while (oc
&& !object_class_is_abstract(oc
)) {
2325 oc
= object_class_get_parent(oc
);
2329 return POWERPC_CPU_CLASS(oc
);
2332 static int kvm_ppc_register_host_cpu_type(void)
2334 TypeInfo type_info
= {
2335 .name
= TYPE_HOST_POWERPC_CPU
,
2336 .instance_init
= kvmppc_host_cpu_initfn
,
2337 .class_init
= kvmppc_host_cpu_class_init
,
2339 uint32_t host_pvr
= mfpvr();
2340 PowerPCCPUClass
*pvr_pcc
;
2343 pvr_pcc
= ppc_cpu_class_by_pvr(host_pvr
);
2344 if (pvr_pcc
== NULL
) {
2345 pvr_pcc
= ppc_cpu_class_by_pvr_mask(host_pvr
);
2347 if (pvr_pcc
== NULL
) {
2350 type_info
.parent
= object_class_get_name(OBJECT_CLASS(pvr_pcc
));
2351 type_register(&type_info
);
2353 /* Register generic family CPU class for a family */
2354 pvr_pcc
= ppc_cpu_get_family_class(pvr_pcc
);
2355 dc
= DEVICE_CLASS(pvr_pcc
);
2356 type_info
.parent
= object_class_get_name(OBJECT_CLASS(pvr_pcc
));
2357 type_info
.name
= g_strdup_printf("%s-"TYPE_POWERPC_CPU
, dc
->desc
);
2358 type_register(&type_info
);
2363 int kvmppc_define_rtas_kernel_token(uint32_t token
, const char *function
)
2365 struct kvm_rtas_token_args args
= {
2369 if (!kvm_check_extension(kvm_state
, KVM_CAP_PPC_RTAS
)) {
2373 strncpy(args
.name
, function
, sizeof(args
.name
));
2375 return kvm_vm_ioctl(kvm_state
, KVM_PPC_RTAS_DEFINE_TOKEN
, &args
);
2378 int kvmppc_get_htab_fd(bool write
)
2380 struct kvm_get_htab_fd s
= {
2381 .flags
= write
? KVM_GET_HTAB_WRITE
: 0,
2386 fprintf(stderr
, "KVM version doesn't support saving the hash table\n");
2390 return kvm_vm_ioctl(kvm_state
, KVM_PPC_GET_HTAB_FD
, &s
);
2393 int kvmppc_save_htab(QEMUFile
*f
, int fd
, size_t bufsize
, int64_t max_ns
)
2395 int64_t starttime
= qemu_clock_get_ns(QEMU_CLOCK_REALTIME
);
2396 uint8_t buf
[bufsize
];
2400 rc
= read(fd
, buf
, bufsize
);
2402 fprintf(stderr
, "Error reading data from KVM HTAB fd: %s\n",
2406 uint8_t *buffer
= buf
;
2409 struct kvm_get_htab_header
*head
=
2410 (struct kvm_get_htab_header
*) buffer
;
2411 size_t chunksize
= sizeof(*head
) +
2412 HASH_PTE_SIZE_64
* head
->n_valid
;
2414 qemu_put_be32(f
, head
->index
);
2415 qemu_put_be16(f
, head
->n_valid
);
2416 qemu_put_be16(f
, head
->n_invalid
);
2417 qemu_put_buffer(f
, (void *)(head
+ 1),
2418 HASH_PTE_SIZE_64
* head
->n_valid
);
2420 buffer
+= chunksize
;
2426 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME
) - starttime
) < max_ns
)));
2428 return (rc
== 0) ? 1 : 0;
2431 int kvmppc_load_htab_chunk(QEMUFile
*f
, int fd
, uint32_t index
,
2432 uint16_t n_valid
, uint16_t n_invalid
)
2434 struct kvm_get_htab_header
*buf
;
2435 size_t chunksize
= sizeof(*buf
) + n_valid
*HASH_PTE_SIZE_64
;
2438 buf
= alloca(chunksize
);
2440 buf
->n_valid
= n_valid
;
2441 buf
->n_invalid
= n_invalid
;
2443 qemu_get_buffer(f
, (void *)(buf
+ 1), HASH_PTE_SIZE_64
*n_valid
);
2445 rc
= write(fd
, buf
, chunksize
);
2447 fprintf(stderr
, "Error writing KVM hash table: %s\n",
2451 if (rc
!= chunksize
) {
2452 /* We should never get a short write on a single chunk */
2453 fprintf(stderr
, "Short write, restoring KVM hash table\n");
2459 bool kvm_arch_stop_on_emulation_error(CPUState
*cpu
)
2464 int kvm_arch_on_sigbus_vcpu(CPUState
*cpu
, int code
, void *addr
)
2469 int kvm_arch_on_sigbus(int code
, void *addr
)
2474 void kvm_arch_init_irq_routing(KVMState
*s
)
2478 struct kvm_get_htab_buf
{
2479 struct kvm_get_htab_header header
;
2481 * We require one extra byte for read
2483 target_ulong hpte
[(HPTES_PER_GROUP
* 2) + 1];
2486 uint64_t kvmppc_hash64_read_pteg(PowerPCCPU
*cpu
, target_ulong pte_index
)
2489 struct kvm_get_htab_fd ghf
;
2490 struct kvm_get_htab_buf
*hpte_buf
;
2493 ghf
.start_index
= pte_index
;
2494 htab_fd
= kvm_vm_ioctl(kvm_state
, KVM_PPC_GET_HTAB_FD
, &ghf
);
2499 hpte_buf
= g_malloc0(sizeof(*hpte_buf
));
2501 * Read the hpte group
2503 if (read(htab_fd
, hpte_buf
, sizeof(*hpte_buf
)) < 0) {
2508 return (uint64_t)(uintptr_t) hpte_buf
->hpte
;
2517 void kvmppc_hash64_free_pteg(uint64_t token
)
2519 struct kvm_get_htab_buf
*htab_buf
;
2521 htab_buf
= container_of((void *)(uintptr_t) token
, struct kvm_get_htab_buf
,
2527 void kvmppc_hash64_write_pte(CPUPPCState
*env
, target_ulong pte_index
,
2528 target_ulong pte0
, target_ulong pte1
)
2531 struct kvm_get_htab_fd ghf
;
2532 struct kvm_get_htab_buf hpte_buf
;
2535 ghf
.start_index
= 0; /* Ignored */
2536 htab_fd
= kvm_vm_ioctl(kvm_state
, KVM_PPC_GET_HTAB_FD
, &ghf
);
2541 hpte_buf
.header
.n_valid
= 1;
2542 hpte_buf
.header
.n_invalid
= 0;
2543 hpte_buf
.header
.index
= pte_index
;
2544 hpte_buf
.hpte
[0] = pte0
;
2545 hpte_buf
.hpte
[1] = pte1
;
2547 * Write the hpte entry.
2548 * CAUTION: write() has the warn_unused_result attribute. Hence we
2549 * need to check the return value, even though we do nothing.
2551 if (write(htab_fd
, &hpte_buf
, sizeof(hpte_buf
)) < 0) {
2563 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry
*route
,
2564 uint64_t address
, uint32_t data
, PCIDevice
*dev
)
2569 int kvm_arch_msi_data_to_gsi(uint32_t data
)
2571 return data
& 0xffff;
2574 int kvmppc_enable_hwrng(void)
2576 if (!kvm_enabled() || !kvm_check_extension(kvm_state
, KVM_CAP_PPC_HWRNG
)) {
2580 return kvmppc_enable_hcall(kvm_state
, H_RANDOM
);