2 * PowerPC implementation of KVM hooks
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
17 #include "qemu/osdep.h"
19 #include <sys/ioctl.h>
23 #include <linux/kvm.h>
25 #include "qemu-common.h"
26 #include "qemu/error-report.h"
27 #include "qemu/timer.h"
28 #include "sysemu/sysemu.h"
29 #include "sysemu/kvm.h"
32 #include "sysemu/cpus.h"
33 #include "sysemu/device_tree.h"
34 #include "mmu-hash64.h"
36 #include "hw/sysbus.h"
37 #include "hw/ppc/spapr.h"
38 #include "hw/ppc/spapr_vio.h"
39 #include "hw/ppc/ppc.h"
40 #include "sysemu/watchdog.h"
42 #include "exec/gdbstub.h"
43 #include "exec/memattrs.h"
44 #include "sysemu/hostmem.h"
49 #define DPRINTF(fmt, ...) \
50 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
52 #define DPRINTF(fmt, ...) \
56 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
58 const KVMCapabilityInfo kvm_arch_required_capabilities
[] = {
62 static int cap_interrupt_unset
= false;
63 static int cap_interrupt_level
= false;
64 static int cap_segstate
;
65 static int cap_booke_sregs
;
66 static int cap_ppc_smt
;
67 static int cap_ppc_rma
;
68 static int cap_spapr_tce
;
69 static int cap_spapr_multitce
;
70 static int cap_spapr_vfio
;
72 static int cap_one_reg
;
74 static int cap_ppc_watchdog
;
76 static int cap_htab_fd
;
77 static int cap_fixup_hcalls
;
79 static uint32_t debug_inst_opcode
;
81 /* XXX We have a race condition where we actually have a level triggered
82 * interrupt, but the infrastructure can't expose that yet, so the guest
83 * takes but ignores it, goes to sleep and never gets notified that there's
84 * still an interrupt pending.
86 * As a quick workaround, let's just wake up again 20 ms after we injected
87 * an interrupt. That way we can assure that we're always reinjecting
88 * interrupts in case the guest swallowed them.
90 static QEMUTimer
*idle_timer
;
92 static void kvm_kick_cpu(void *opaque
)
94 PowerPCCPU
*cpu
= opaque
;
96 qemu_cpu_kick(CPU(cpu
));
99 static int kvm_ppc_register_host_cpu_type(void);
101 int kvm_arch_init(MachineState
*ms
, KVMState
*s
)
103 cap_interrupt_unset
= kvm_check_extension(s
, KVM_CAP_PPC_UNSET_IRQ
);
104 cap_interrupt_level
= kvm_check_extension(s
, KVM_CAP_PPC_IRQ_LEVEL
);
105 cap_segstate
= kvm_check_extension(s
, KVM_CAP_PPC_SEGSTATE
);
106 cap_booke_sregs
= kvm_check_extension(s
, KVM_CAP_PPC_BOOKE_SREGS
);
107 cap_ppc_smt
= kvm_check_extension(s
, KVM_CAP_PPC_SMT
);
108 cap_ppc_rma
= kvm_check_extension(s
, KVM_CAP_PPC_RMA
);
109 cap_spapr_tce
= kvm_check_extension(s
, KVM_CAP_SPAPR_TCE
);
110 cap_spapr_multitce
= kvm_check_extension(s
, KVM_CAP_SPAPR_MULTITCE
);
111 cap_spapr_vfio
= false;
112 cap_one_reg
= kvm_check_extension(s
, KVM_CAP_ONE_REG
);
113 cap_hior
= kvm_check_extension(s
, KVM_CAP_PPC_HIOR
);
114 cap_epr
= kvm_check_extension(s
, KVM_CAP_PPC_EPR
);
115 cap_ppc_watchdog
= kvm_check_extension(s
, KVM_CAP_PPC_BOOKE_WATCHDOG
);
116 /* Note: we don't set cap_papr here, because this capability is
117 * only activated after this by kvmppc_set_papr() */
118 cap_htab_fd
= kvm_check_extension(s
, KVM_CAP_PPC_HTAB_FD
);
119 cap_fixup_hcalls
= kvm_check_extension(s
, KVM_CAP_PPC_FIXUP_HCALL
);
121 if (!cap_interrupt_level
) {
122 fprintf(stderr
, "KVM: Couldn't find level irq capability. Expect the "
123 "VM to stall at times!\n");
126 kvm_ppc_register_host_cpu_type();
131 static int kvm_arch_sync_sregs(PowerPCCPU
*cpu
)
133 CPUPPCState
*cenv
= &cpu
->env
;
134 CPUState
*cs
= CPU(cpu
);
135 struct kvm_sregs sregs
;
138 if (cenv
->excp_model
== POWERPC_EXCP_BOOKE
) {
139 /* What we're really trying to say is "if we're on BookE, we use
140 the native PVR for now". This is the only sane way to check
141 it though, so we potentially confuse users that they can run
142 BookE guests on BookS. Let's hope nobody dares enough :) */
146 fprintf(stderr
, "kvm error: missing PVR setting capability\n");
151 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_SREGS
, &sregs
);
156 sregs
.pvr
= cenv
->spr
[SPR_PVR
];
157 return kvm_vcpu_ioctl(cs
, KVM_SET_SREGS
, &sregs
);
160 /* Set up a shared TLB array with KVM */
161 static int kvm_booke206_tlb_init(PowerPCCPU
*cpu
)
163 CPUPPCState
*env
= &cpu
->env
;
164 CPUState
*cs
= CPU(cpu
);
165 struct kvm_book3e_206_tlb_params params
= {};
166 struct kvm_config_tlb cfg
= {};
167 unsigned int entries
= 0;
170 if (!kvm_enabled() ||
171 !kvm_check_extension(cs
->kvm_state
, KVM_CAP_SW_TLB
)) {
175 assert(ARRAY_SIZE(params
.tlb_sizes
) == BOOKE206_MAX_TLBN
);
177 for (i
= 0; i
< BOOKE206_MAX_TLBN
; i
++) {
178 params
.tlb_sizes
[i
] = booke206_tlb_size(env
, i
);
179 params
.tlb_ways
[i
] = booke206_tlb_ways(env
, i
);
180 entries
+= params
.tlb_sizes
[i
];
183 assert(entries
== env
->nb_tlb
);
184 assert(sizeof(struct kvm_book3e_206_tlb_entry
) == sizeof(ppcmas_tlb_t
));
186 env
->tlb_dirty
= true;
188 cfg
.array
= (uintptr_t)env
->tlb
.tlbm
;
189 cfg
.array_len
= sizeof(ppcmas_tlb_t
) * entries
;
190 cfg
.params
= (uintptr_t)¶ms
;
191 cfg
.mmu_type
= KVM_MMU_FSL_BOOKE_NOHV
;
193 ret
= kvm_vcpu_enable_cap(cs
, KVM_CAP_SW_TLB
, 0, (uintptr_t)&cfg
);
195 fprintf(stderr
, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
196 __func__
, strerror(-ret
));
200 env
->kvm_sw_tlb
= true;
205 #if defined(TARGET_PPC64)
206 static void kvm_get_fallback_smmu_info(PowerPCCPU
*cpu
,
207 struct kvm_ppc_smmu_info
*info
)
209 CPUPPCState
*env
= &cpu
->env
;
210 CPUState
*cs
= CPU(cpu
);
212 memset(info
, 0, sizeof(*info
));
214 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
215 * need to "guess" what the supported page sizes are.
217 * For that to work we make a few assumptions:
219 * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
220 * KVM which only supports 4K and 16M pages, but supports them
221 * regardless of the backing store characteritics. We also don't
222 * support 1T segments.
224 * This is safe as if HV KVM ever supports that capability or PR
225 * KVM grows supports for more page/segment sizes, those versions
226 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
227 * will not hit this fallback
229 * - Else we are running HV KVM. This means we only support page
230 * sizes that fit in the backing store. Additionally we only
231 * advertize 64K pages if the processor is ARCH 2.06 and we assume
232 * P7 encodings for the SLB and hash table. Here too, we assume
233 * support for any newer processor will mean a kernel that
234 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
237 if (kvm_check_extension(cs
->kvm_state
, KVM_CAP_PPC_GET_PVINFO
)) {
242 /* Standard 4k base page size segment */
243 info
->sps
[0].page_shift
= 12;
244 info
->sps
[0].slb_enc
= 0;
245 info
->sps
[0].enc
[0].page_shift
= 12;
246 info
->sps
[0].enc
[0].pte_enc
= 0;
248 /* Standard 16M large page size segment */
249 info
->sps
[1].page_shift
= 24;
250 info
->sps
[1].slb_enc
= SLB_VSID_L
;
251 info
->sps
[1].enc
[0].page_shift
= 24;
252 info
->sps
[1].enc
[0].pte_enc
= 0;
256 /* HV KVM has backing store size restrictions */
257 info
->flags
= KVM_PPC_PAGE_SIZES_REAL
;
259 if (env
->mmu_model
& POWERPC_MMU_1TSEG
) {
260 info
->flags
|= KVM_PPC_1T_SEGMENTS
;
263 if (env
->mmu_model
== POWERPC_MMU_2_06
||
264 env
->mmu_model
== POWERPC_MMU_2_07
) {
270 /* Standard 4k base page size segment */
271 info
->sps
[i
].page_shift
= 12;
272 info
->sps
[i
].slb_enc
= 0;
273 info
->sps
[i
].enc
[0].page_shift
= 12;
274 info
->sps
[i
].enc
[0].pte_enc
= 0;
277 /* 64K on MMU 2.06 and later */
278 if (env
->mmu_model
== POWERPC_MMU_2_06
||
279 env
->mmu_model
== POWERPC_MMU_2_07
) {
280 info
->sps
[i
].page_shift
= 16;
281 info
->sps
[i
].slb_enc
= 0x110;
282 info
->sps
[i
].enc
[0].page_shift
= 16;
283 info
->sps
[i
].enc
[0].pte_enc
= 1;
287 /* Standard 16M large page size segment */
288 info
->sps
[i
].page_shift
= 24;
289 info
->sps
[i
].slb_enc
= SLB_VSID_L
;
290 info
->sps
[i
].enc
[0].page_shift
= 24;
291 info
->sps
[i
].enc
[0].pte_enc
= 0;
295 static void kvm_get_smmu_info(PowerPCCPU
*cpu
, struct kvm_ppc_smmu_info
*info
)
297 CPUState
*cs
= CPU(cpu
);
300 if (kvm_check_extension(cs
->kvm_state
, KVM_CAP_PPC_GET_SMMU_INFO
)) {
301 ret
= kvm_vm_ioctl(cs
->kvm_state
, KVM_PPC_GET_SMMU_INFO
, info
);
307 kvm_get_fallback_smmu_info(cpu
, info
);
310 static long gethugepagesize(const char *mem_path
)
316 ret
= statfs(mem_path
, &fs
);
317 } while (ret
!= 0 && errno
== EINTR
);
320 fprintf(stderr
, "Couldn't statfs() memory path: %s\n",
325 #define HUGETLBFS_MAGIC 0x958458f6
327 if (fs
.f_type
!= HUGETLBFS_MAGIC
) {
328 /* Explicit mempath, but it's ordinary pages */
329 return getpagesize();
332 /* It's hugepage, return the huge page size */
336 static int find_max_supported_pagesize(Object
*obj
, void *opaque
)
339 long *hpsize_min
= opaque
;
341 if (object_dynamic_cast(obj
, TYPE_MEMORY_BACKEND
)) {
342 mem_path
= object_property_get_str(obj
, "mem-path", NULL
);
344 long hpsize
= gethugepagesize(mem_path
);
345 if (hpsize
< *hpsize_min
) {
346 *hpsize_min
= hpsize
;
349 *hpsize_min
= getpagesize();
356 static long getrampagesize(void)
358 long hpsize
= LONG_MAX
;
362 return gethugepagesize(mem_path
);
365 /* it's possible we have memory-backend objects with
366 * hugepage-backed RAM. these may get mapped into system
367 * address space via -numa parameters or memory hotplug
368 * hooks. we want to take these into account, but we
369 * also want to make sure these supported hugepage
370 * sizes are applicable across the entire range of memory
371 * we may boot from, so we take the min across all
372 * backends, and assume normal pages in cases where a
373 * backend isn't backed by hugepages.
375 memdev_root
= object_resolve_path("/objects", NULL
);
377 return getpagesize();
380 object_child_foreach(memdev_root
, find_max_supported_pagesize
, &hpsize
);
382 return (hpsize
== LONG_MAX
) ? getpagesize() : hpsize
;
385 static bool kvm_valid_page_size(uint32_t flags
, long rampgsize
, uint32_t shift
)
387 if (!(flags
& KVM_PPC_PAGE_SIZES_REAL
)) {
391 return (1ul << shift
) <= rampgsize
;
394 static void kvm_fixup_page_sizes(PowerPCCPU
*cpu
)
396 static struct kvm_ppc_smmu_info smmu_info
;
397 static bool has_smmu_info
;
398 CPUPPCState
*env
= &cpu
->env
;
402 /* We only handle page sizes for 64-bit server guests for now */
403 if (!(env
->mmu_model
& POWERPC_MMU_64
)) {
407 /* Collect MMU info from kernel if not already */
408 if (!has_smmu_info
) {
409 kvm_get_smmu_info(cpu
, &smmu_info
);
410 has_smmu_info
= true;
413 rampagesize
= getrampagesize();
415 /* Convert to QEMU form */
416 memset(&env
->sps
, 0, sizeof(env
->sps
));
418 /* If we have HV KVM, we need to forbid CI large pages if our
419 * host page size is smaller than 64K.
421 if (smmu_info
.flags
& KVM_PPC_PAGE_SIZES_REAL
) {
422 env
->ci_large_pages
= getpagesize() >= 0x10000;
426 * XXX This loop should be an entry wide AND of the capabilities that
427 * the selected CPU has with the capabilities that KVM supports.
429 for (ik
= iq
= 0; ik
< KVM_PPC_PAGE_SIZES_MAX_SZ
; ik
++) {
430 struct ppc_one_seg_page_size
*qsps
= &env
->sps
.sps
[iq
];
431 struct kvm_ppc_one_seg_page_size
*ksps
= &smmu_info
.sps
[ik
];
433 if (!kvm_valid_page_size(smmu_info
.flags
, rampagesize
,
437 qsps
->page_shift
= ksps
->page_shift
;
438 qsps
->slb_enc
= ksps
->slb_enc
;
439 for (jk
= jq
= 0; jk
< KVM_PPC_PAGE_SIZES_MAX_SZ
; jk
++) {
440 if (!kvm_valid_page_size(smmu_info
.flags
, rampagesize
,
441 ksps
->enc
[jk
].page_shift
)) {
444 qsps
->enc
[jq
].page_shift
= ksps
->enc
[jk
].page_shift
;
445 qsps
->enc
[jq
].pte_enc
= ksps
->enc
[jk
].pte_enc
;
446 if (++jq
>= PPC_PAGE_SIZES_MAX_SZ
) {
450 if (++iq
>= PPC_PAGE_SIZES_MAX_SZ
) {
454 env
->slb_nr
= smmu_info
.slb_size
;
455 if (!(smmu_info
.flags
& KVM_PPC_1T_SEGMENTS
)) {
456 env
->mmu_model
&= ~POWERPC_MMU_1TSEG
;
459 #else /* defined (TARGET_PPC64) */
461 static inline void kvm_fixup_page_sizes(PowerPCCPU
*cpu
)
465 #endif /* !defined (TARGET_PPC64) */
467 unsigned long kvm_arch_vcpu_id(CPUState
*cpu
)
469 return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu
));
472 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
473 * book3s supports only 1 watchpoint, so array size
474 * of 4 is sufficient for now.
476 #define MAX_HW_BKPTS 4
478 static struct HWBreakpoint
{
481 } hw_debug_points
[MAX_HW_BKPTS
];
483 static CPUWatchpoint hw_watchpoint
;
485 /* Default there is no breakpoint and watchpoint supported */
486 static int max_hw_breakpoint
;
487 static int max_hw_watchpoint
;
488 static int nb_hw_breakpoint
;
489 static int nb_hw_watchpoint
;
491 static void kvmppc_hw_debug_points_init(CPUPPCState
*cenv
)
493 if (cenv
->excp_model
== POWERPC_EXCP_BOOKE
) {
494 max_hw_breakpoint
= 2;
495 max_hw_watchpoint
= 2;
498 if ((max_hw_breakpoint
+ max_hw_watchpoint
) > MAX_HW_BKPTS
) {
499 fprintf(stderr
, "Error initializing h/w breakpoints\n");
504 int kvm_arch_init_vcpu(CPUState
*cs
)
506 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
507 CPUPPCState
*cenv
= &cpu
->env
;
510 /* Gather server mmu info from KVM and update the CPU state */
511 kvm_fixup_page_sizes(cpu
);
513 /* Synchronize sregs with kvm */
514 ret
= kvm_arch_sync_sregs(cpu
);
516 if (ret
== -EINVAL
) {
517 error_report("Register sync failed... If you're using kvm-hv.ko,"
518 " only \"-cpu host\" is possible");
523 idle_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL
, kvm_kick_cpu
, cpu
);
525 /* Some targets support access to KVM's guest TLB. */
526 switch (cenv
->mmu_model
) {
527 case POWERPC_MMU_BOOKE206
:
528 ret
= kvm_booke206_tlb_init(cpu
);
534 kvm_get_one_reg(cs
, KVM_REG_PPC_DEBUG_INST
, &debug_inst_opcode
);
535 kvmppc_hw_debug_points_init(cenv
);
540 static void kvm_sw_tlb_put(PowerPCCPU
*cpu
)
542 CPUPPCState
*env
= &cpu
->env
;
543 CPUState
*cs
= CPU(cpu
);
544 struct kvm_dirty_tlb dirty_tlb
;
545 unsigned char *bitmap
;
548 if (!env
->kvm_sw_tlb
) {
552 bitmap
= g_malloc((env
->nb_tlb
+ 7) / 8);
553 memset(bitmap
, 0xFF, (env
->nb_tlb
+ 7) / 8);
555 dirty_tlb
.bitmap
= (uintptr_t)bitmap
;
556 dirty_tlb
.num_dirty
= env
->nb_tlb
;
558 ret
= kvm_vcpu_ioctl(cs
, KVM_DIRTY_TLB
, &dirty_tlb
);
560 fprintf(stderr
, "%s: KVM_DIRTY_TLB: %s\n",
561 __func__
, strerror(-ret
));
567 static void kvm_get_one_spr(CPUState
*cs
, uint64_t id
, int spr
)
569 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
570 CPUPPCState
*env
= &cpu
->env
;
575 struct kvm_one_reg reg
= {
577 .addr
= (uintptr_t) &val
,
581 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
583 trace_kvm_failed_spr_get(spr
, strerror(errno
));
585 switch (id
& KVM_REG_SIZE_MASK
) {
586 case KVM_REG_SIZE_U32
:
587 env
->spr
[spr
] = val
.u32
;
590 case KVM_REG_SIZE_U64
:
591 env
->spr
[spr
] = val
.u64
;
595 /* Don't handle this size yet */
601 static void kvm_put_one_spr(CPUState
*cs
, uint64_t id
, int spr
)
603 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
604 CPUPPCState
*env
= &cpu
->env
;
609 struct kvm_one_reg reg
= {
611 .addr
= (uintptr_t) &val
,
615 switch (id
& KVM_REG_SIZE_MASK
) {
616 case KVM_REG_SIZE_U32
:
617 val
.u32
= env
->spr
[spr
];
620 case KVM_REG_SIZE_U64
:
621 val
.u64
= env
->spr
[spr
];
625 /* Don't handle this size yet */
629 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
631 trace_kvm_failed_spr_set(spr
, strerror(errno
));
635 static int kvm_put_fp(CPUState
*cs
)
637 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
638 CPUPPCState
*env
= &cpu
->env
;
639 struct kvm_one_reg reg
;
643 if (env
->insns_flags
& PPC_FLOAT
) {
644 uint64_t fpscr
= env
->fpscr
;
645 bool vsx
= !!(env
->insns_flags2
& PPC2_VSX
);
647 reg
.id
= KVM_REG_PPC_FPSCR
;
648 reg
.addr
= (uintptr_t)&fpscr
;
649 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
651 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno
));
655 for (i
= 0; i
< 32; i
++) {
658 #ifdef HOST_WORDS_BIGENDIAN
659 vsr
[0] = float64_val(env
->fpr
[i
]);
660 vsr
[1] = env
->vsr
[i
];
662 vsr
[0] = env
->vsr
[i
];
663 vsr
[1] = float64_val(env
->fpr
[i
]);
665 reg
.addr
= (uintptr_t) &vsr
;
666 reg
.id
= vsx
? KVM_REG_PPC_VSR(i
) : KVM_REG_PPC_FPR(i
);
668 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
670 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx
? "VSR" : "FPR",
677 if (env
->insns_flags
& PPC_ALTIVEC
) {
678 reg
.id
= KVM_REG_PPC_VSCR
;
679 reg
.addr
= (uintptr_t)&env
->vscr
;
680 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
682 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno
));
686 for (i
= 0; i
< 32; i
++) {
687 reg
.id
= KVM_REG_PPC_VR(i
);
688 reg
.addr
= (uintptr_t)&env
->avr
[i
];
689 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
691 DPRINTF("Unable to set VR%d to KVM: %s\n", i
, strerror(errno
));
700 static int kvm_get_fp(CPUState
*cs
)
702 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
703 CPUPPCState
*env
= &cpu
->env
;
704 struct kvm_one_reg reg
;
708 if (env
->insns_flags
& PPC_FLOAT
) {
710 bool vsx
= !!(env
->insns_flags2
& PPC2_VSX
);
712 reg
.id
= KVM_REG_PPC_FPSCR
;
713 reg
.addr
= (uintptr_t)&fpscr
;
714 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
716 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno
));
722 for (i
= 0; i
< 32; i
++) {
725 reg
.addr
= (uintptr_t) &vsr
;
726 reg
.id
= vsx
? KVM_REG_PPC_VSR(i
) : KVM_REG_PPC_FPR(i
);
728 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
730 DPRINTF("Unable to get %s%d from KVM: %s\n",
731 vsx
? "VSR" : "FPR", i
, strerror(errno
));
734 #ifdef HOST_WORDS_BIGENDIAN
735 env
->fpr
[i
] = vsr
[0];
737 env
->vsr
[i
] = vsr
[1];
740 env
->fpr
[i
] = vsr
[1];
742 env
->vsr
[i
] = vsr
[0];
749 if (env
->insns_flags
& PPC_ALTIVEC
) {
750 reg
.id
= KVM_REG_PPC_VSCR
;
751 reg
.addr
= (uintptr_t)&env
->vscr
;
752 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
754 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno
));
758 for (i
= 0; i
< 32; i
++) {
759 reg
.id
= KVM_REG_PPC_VR(i
);
760 reg
.addr
= (uintptr_t)&env
->avr
[i
];
761 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
763 DPRINTF("Unable to get VR%d from KVM: %s\n",
773 #if defined(TARGET_PPC64)
774 static int kvm_get_vpa(CPUState
*cs
)
776 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
777 CPUPPCState
*env
= &cpu
->env
;
778 struct kvm_one_reg reg
;
781 reg
.id
= KVM_REG_PPC_VPA_ADDR
;
782 reg
.addr
= (uintptr_t)&env
->vpa_addr
;
783 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
785 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno
));
789 assert((uintptr_t)&env
->slb_shadow_size
790 == ((uintptr_t)&env
->slb_shadow_addr
+ 8));
791 reg
.id
= KVM_REG_PPC_VPA_SLB
;
792 reg
.addr
= (uintptr_t)&env
->slb_shadow_addr
;
793 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
795 DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
800 assert((uintptr_t)&env
->dtl_size
== ((uintptr_t)&env
->dtl_addr
+ 8));
801 reg
.id
= KVM_REG_PPC_VPA_DTL
;
802 reg
.addr
= (uintptr_t)&env
->dtl_addr
;
803 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
805 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
813 static int kvm_put_vpa(CPUState
*cs
)
815 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
816 CPUPPCState
*env
= &cpu
->env
;
817 struct kvm_one_reg reg
;
820 /* SLB shadow or DTL can't be registered unless a master VPA is
821 * registered. That means when restoring state, if a VPA *is*
822 * registered, we need to set that up first. If not, we need to
823 * deregister the others before deregistering the master VPA */
824 assert(env
->vpa_addr
|| !(env
->slb_shadow_addr
|| env
->dtl_addr
));
827 reg
.id
= KVM_REG_PPC_VPA_ADDR
;
828 reg
.addr
= (uintptr_t)&env
->vpa_addr
;
829 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
831 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno
));
836 assert((uintptr_t)&env
->slb_shadow_size
837 == ((uintptr_t)&env
->slb_shadow_addr
+ 8));
838 reg
.id
= KVM_REG_PPC_VPA_SLB
;
839 reg
.addr
= (uintptr_t)&env
->slb_shadow_addr
;
840 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
842 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno
));
846 assert((uintptr_t)&env
->dtl_size
== ((uintptr_t)&env
->dtl_addr
+ 8));
847 reg
.id
= KVM_REG_PPC_VPA_DTL
;
848 reg
.addr
= (uintptr_t)&env
->dtl_addr
;
849 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
851 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
856 if (!env
->vpa_addr
) {
857 reg
.id
= KVM_REG_PPC_VPA_ADDR
;
858 reg
.addr
= (uintptr_t)&env
->vpa_addr
;
859 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
861 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno
));
868 #endif /* TARGET_PPC64 */
870 int kvm_arch_put_registers(CPUState
*cs
, int level
)
872 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
873 CPUPPCState
*env
= &cpu
->env
;
874 struct kvm_regs regs
;
878 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_REGS
, ®s
);
885 regs
.xer
= cpu_read_xer(env
);
889 regs
.srr0
= env
->spr
[SPR_SRR0
];
890 regs
.srr1
= env
->spr
[SPR_SRR1
];
892 regs
.sprg0
= env
->spr
[SPR_SPRG0
];
893 regs
.sprg1
= env
->spr
[SPR_SPRG1
];
894 regs
.sprg2
= env
->spr
[SPR_SPRG2
];
895 regs
.sprg3
= env
->spr
[SPR_SPRG3
];
896 regs
.sprg4
= env
->spr
[SPR_SPRG4
];
897 regs
.sprg5
= env
->spr
[SPR_SPRG5
];
898 regs
.sprg6
= env
->spr
[SPR_SPRG6
];
899 regs
.sprg7
= env
->spr
[SPR_SPRG7
];
901 regs
.pid
= env
->spr
[SPR_BOOKE_PID
];
903 for (i
= 0;i
< 32; i
++)
904 regs
.gpr
[i
] = env
->gpr
[i
];
907 for (i
= 0; i
< 8; i
++) {
908 regs
.cr
|= (env
->crf
[i
] & 15) << (4 * (7 - i
));
911 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_REGS
, ®s
);
917 if (env
->tlb_dirty
) {
919 env
->tlb_dirty
= false;
922 if (cap_segstate
&& (level
>= KVM_PUT_RESET_STATE
)) {
923 struct kvm_sregs sregs
;
925 sregs
.pvr
= env
->spr
[SPR_PVR
];
927 sregs
.u
.s
.sdr1
= env
->spr
[SPR_SDR1
];
931 for (i
= 0; i
< ARRAY_SIZE(env
->slb
); i
++) {
932 sregs
.u
.s
.ppc64
.slb
[i
].slbe
= env
->slb
[i
].esid
;
933 if (env
->slb
[i
].esid
& SLB_ESID_V
) {
934 sregs
.u
.s
.ppc64
.slb
[i
].slbe
|= i
;
936 sregs
.u
.s
.ppc64
.slb
[i
].slbv
= env
->slb
[i
].vsid
;
941 for (i
= 0; i
< 16; i
++) {
942 sregs
.u
.s
.ppc32
.sr
[i
] = env
->sr
[i
];
946 for (i
= 0; i
< 8; i
++) {
947 /* Beware. We have to swap upper and lower bits here */
948 sregs
.u
.s
.ppc32
.dbat
[i
] = ((uint64_t)env
->DBAT
[0][i
] << 32)
950 sregs
.u
.s
.ppc32
.ibat
[i
] = ((uint64_t)env
->IBAT
[0][i
] << 32)
954 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_SREGS
, &sregs
);
960 if (cap_hior
&& (level
>= KVM_PUT_RESET_STATE
)) {
961 kvm_put_one_spr(cs
, KVM_REG_PPC_HIOR
, SPR_HIOR
);
967 /* We deliberately ignore errors here, for kernels which have
968 * the ONE_REG calls, but don't support the specific
969 * registers, there's a reasonable chance things will still
970 * work, at least until we try to migrate. */
971 for (i
= 0; i
< 1024; i
++) {
972 uint64_t id
= env
->spr_cb
[i
].one_reg_id
;
975 kvm_put_one_spr(cs
, id
, i
);
981 for (i
= 0; i
< ARRAY_SIZE(env
->tm_gpr
); i
++) {
982 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_GPR(i
), &env
->tm_gpr
[i
]);
984 for (i
= 0; i
< ARRAY_SIZE(env
->tm_vsr
); i
++) {
985 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_VSR(i
), &env
->tm_vsr
[i
]);
987 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_CR
, &env
->tm_cr
);
988 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_LR
, &env
->tm_lr
);
989 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_CTR
, &env
->tm_ctr
);
990 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_FPSCR
, &env
->tm_fpscr
);
991 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_AMR
, &env
->tm_amr
);
992 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_PPR
, &env
->tm_ppr
);
993 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_VRSAVE
, &env
->tm_vrsave
);
994 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_VSCR
, &env
->tm_vscr
);
995 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_DSCR
, &env
->tm_dscr
);
996 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_TAR
, &env
->tm_tar
);
1000 if (kvm_put_vpa(cs
) < 0) {
1001 DPRINTF("Warning: Unable to set VPA information to KVM\n");
1005 kvm_set_one_reg(cs
, KVM_REG_PPC_TB_OFFSET
, &env
->tb_env
->tb_offset
);
1006 #endif /* TARGET_PPC64 */
1012 static void kvm_sync_excp(CPUPPCState
*env
, int vector
, int ivor
)
1014 env
->excp_vectors
[vector
] = env
->spr
[ivor
] + env
->spr
[SPR_BOOKE_IVPR
];
1017 int kvm_arch_get_registers(CPUState
*cs
)
1019 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
1020 CPUPPCState
*env
= &cpu
->env
;
1021 struct kvm_regs regs
;
1022 struct kvm_sregs sregs
;
1026 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_REGS
, ®s
);
1031 for (i
= 7; i
>= 0; i
--) {
1032 env
->crf
[i
] = cr
& 15;
1036 env
->ctr
= regs
.ctr
;
1038 cpu_write_xer(env
, regs
.xer
);
1039 env
->msr
= regs
.msr
;
1042 env
->spr
[SPR_SRR0
] = regs
.srr0
;
1043 env
->spr
[SPR_SRR1
] = regs
.srr1
;
1045 env
->spr
[SPR_SPRG0
] = regs
.sprg0
;
1046 env
->spr
[SPR_SPRG1
] = regs
.sprg1
;
1047 env
->spr
[SPR_SPRG2
] = regs
.sprg2
;
1048 env
->spr
[SPR_SPRG3
] = regs
.sprg3
;
1049 env
->spr
[SPR_SPRG4
] = regs
.sprg4
;
1050 env
->spr
[SPR_SPRG5
] = regs
.sprg5
;
1051 env
->spr
[SPR_SPRG6
] = regs
.sprg6
;
1052 env
->spr
[SPR_SPRG7
] = regs
.sprg7
;
1054 env
->spr
[SPR_BOOKE_PID
] = regs
.pid
;
1056 for (i
= 0;i
< 32; i
++)
1057 env
->gpr
[i
] = regs
.gpr
[i
];
1061 if (cap_booke_sregs
) {
1062 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_SREGS
, &sregs
);
1067 if (sregs
.u
.e
.features
& KVM_SREGS_E_BASE
) {
1068 env
->spr
[SPR_BOOKE_CSRR0
] = sregs
.u
.e
.csrr0
;
1069 env
->spr
[SPR_BOOKE_CSRR1
] = sregs
.u
.e
.csrr1
;
1070 env
->spr
[SPR_BOOKE_ESR
] = sregs
.u
.e
.esr
;
1071 env
->spr
[SPR_BOOKE_DEAR
] = sregs
.u
.e
.dear
;
1072 env
->spr
[SPR_BOOKE_MCSR
] = sregs
.u
.e
.mcsr
;
1073 env
->spr
[SPR_BOOKE_TSR
] = sregs
.u
.e
.tsr
;
1074 env
->spr
[SPR_BOOKE_TCR
] = sregs
.u
.e
.tcr
;
1075 env
->spr
[SPR_DECR
] = sregs
.u
.e
.dec
;
1076 env
->spr
[SPR_TBL
] = sregs
.u
.e
.tb
& 0xffffffff;
1077 env
->spr
[SPR_TBU
] = sregs
.u
.e
.tb
>> 32;
1078 env
->spr
[SPR_VRSAVE
] = sregs
.u
.e
.vrsave
;
1081 if (sregs
.u
.e
.features
& KVM_SREGS_E_ARCH206
) {
1082 env
->spr
[SPR_BOOKE_PIR
] = sregs
.u
.e
.pir
;
1083 env
->spr
[SPR_BOOKE_MCSRR0
] = sregs
.u
.e
.mcsrr0
;
1084 env
->spr
[SPR_BOOKE_MCSRR1
] = sregs
.u
.e
.mcsrr1
;
1085 env
->spr
[SPR_BOOKE_DECAR
] = sregs
.u
.e
.decar
;
1086 env
->spr
[SPR_BOOKE_IVPR
] = sregs
.u
.e
.ivpr
;
1089 if (sregs
.u
.e
.features
& KVM_SREGS_E_64
) {
1090 env
->spr
[SPR_BOOKE_EPCR
] = sregs
.u
.e
.epcr
;
1093 if (sregs
.u
.e
.features
& KVM_SREGS_E_SPRG8
) {
1094 env
->spr
[SPR_BOOKE_SPRG8
] = sregs
.u
.e
.sprg8
;
1097 if (sregs
.u
.e
.features
& KVM_SREGS_E_IVOR
) {
1098 env
->spr
[SPR_BOOKE_IVOR0
] = sregs
.u
.e
.ivor_low
[0];
1099 kvm_sync_excp(env
, POWERPC_EXCP_CRITICAL
, SPR_BOOKE_IVOR0
);
1100 env
->spr
[SPR_BOOKE_IVOR1
] = sregs
.u
.e
.ivor_low
[1];
1101 kvm_sync_excp(env
, POWERPC_EXCP_MCHECK
, SPR_BOOKE_IVOR1
);
1102 env
->spr
[SPR_BOOKE_IVOR2
] = sregs
.u
.e
.ivor_low
[2];
1103 kvm_sync_excp(env
, POWERPC_EXCP_DSI
, SPR_BOOKE_IVOR2
);
1104 env
->spr
[SPR_BOOKE_IVOR3
] = sregs
.u
.e
.ivor_low
[3];
1105 kvm_sync_excp(env
, POWERPC_EXCP_ISI
, SPR_BOOKE_IVOR3
);
1106 env
->spr
[SPR_BOOKE_IVOR4
] = sregs
.u
.e
.ivor_low
[4];
1107 kvm_sync_excp(env
, POWERPC_EXCP_EXTERNAL
, SPR_BOOKE_IVOR4
);
1108 env
->spr
[SPR_BOOKE_IVOR5
] = sregs
.u
.e
.ivor_low
[5];
1109 kvm_sync_excp(env
, POWERPC_EXCP_ALIGN
, SPR_BOOKE_IVOR5
);
1110 env
->spr
[SPR_BOOKE_IVOR6
] = sregs
.u
.e
.ivor_low
[6];
1111 kvm_sync_excp(env
, POWERPC_EXCP_PROGRAM
, SPR_BOOKE_IVOR6
);
1112 env
->spr
[SPR_BOOKE_IVOR7
] = sregs
.u
.e
.ivor_low
[7];
1113 kvm_sync_excp(env
, POWERPC_EXCP_FPU
, SPR_BOOKE_IVOR7
);
1114 env
->spr
[SPR_BOOKE_IVOR8
] = sregs
.u
.e
.ivor_low
[8];
1115 kvm_sync_excp(env
, POWERPC_EXCP_SYSCALL
, SPR_BOOKE_IVOR8
);
1116 env
->spr
[SPR_BOOKE_IVOR9
] = sregs
.u
.e
.ivor_low
[9];
1117 kvm_sync_excp(env
, POWERPC_EXCP_APU
, SPR_BOOKE_IVOR9
);
1118 env
->spr
[SPR_BOOKE_IVOR10
] = sregs
.u
.e
.ivor_low
[10];
1119 kvm_sync_excp(env
, POWERPC_EXCP_DECR
, SPR_BOOKE_IVOR10
);
1120 env
->spr
[SPR_BOOKE_IVOR11
] = sregs
.u
.e
.ivor_low
[11];
1121 kvm_sync_excp(env
, POWERPC_EXCP_FIT
, SPR_BOOKE_IVOR11
);
1122 env
->spr
[SPR_BOOKE_IVOR12
] = sregs
.u
.e
.ivor_low
[12];
1123 kvm_sync_excp(env
, POWERPC_EXCP_WDT
, SPR_BOOKE_IVOR12
);
1124 env
->spr
[SPR_BOOKE_IVOR13
] = sregs
.u
.e
.ivor_low
[13];
1125 kvm_sync_excp(env
, POWERPC_EXCP_DTLB
, SPR_BOOKE_IVOR13
);
1126 env
->spr
[SPR_BOOKE_IVOR14
] = sregs
.u
.e
.ivor_low
[14];
1127 kvm_sync_excp(env
, POWERPC_EXCP_ITLB
, SPR_BOOKE_IVOR14
);
1128 env
->spr
[SPR_BOOKE_IVOR15
] = sregs
.u
.e
.ivor_low
[15];
1129 kvm_sync_excp(env
, POWERPC_EXCP_DEBUG
, SPR_BOOKE_IVOR15
);
1131 if (sregs
.u
.e
.features
& KVM_SREGS_E_SPE
) {
1132 env
->spr
[SPR_BOOKE_IVOR32
] = sregs
.u
.e
.ivor_high
[0];
1133 kvm_sync_excp(env
, POWERPC_EXCP_SPEU
, SPR_BOOKE_IVOR32
);
1134 env
->spr
[SPR_BOOKE_IVOR33
] = sregs
.u
.e
.ivor_high
[1];
1135 kvm_sync_excp(env
, POWERPC_EXCP_EFPDI
, SPR_BOOKE_IVOR33
);
1136 env
->spr
[SPR_BOOKE_IVOR34
] = sregs
.u
.e
.ivor_high
[2];
1137 kvm_sync_excp(env
, POWERPC_EXCP_EFPRI
, SPR_BOOKE_IVOR34
);
1140 if (sregs
.u
.e
.features
& KVM_SREGS_E_PM
) {
1141 env
->spr
[SPR_BOOKE_IVOR35
] = sregs
.u
.e
.ivor_high
[3];
1142 kvm_sync_excp(env
, POWERPC_EXCP_EPERFM
, SPR_BOOKE_IVOR35
);
1145 if (sregs
.u
.e
.features
& KVM_SREGS_E_PC
) {
1146 env
->spr
[SPR_BOOKE_IVOR36
] = sregs
.u
.e
.ivor_high
[4];
1147 kvm_sync_excp(env
, POWERPC_EXCP_DOORI
, SPR_BOOKE_IVOR36
);
1148 env
->spr
[SPR_BOOKE_IVOR37
] = sregs
.u
.e
.ivor_high
[5];
1149 kvm_sync_excp(env
, POWERPC_EXCP_DOORCI
, SPR_BOOKE_IVOR37
);
1153 if (sregs
.u
.e
.features
& KVM_SREGS_E_ARCH206_MMU
) {
1154 env
->spr
[SPR_BOOKE_MAS0
] = sregs
.u
.e
.mas0
;
1155 env
->spr
[SPR_BOOKE_MAS1
] = sregs
.u
.e
.mas1
;
1156 env
->spr
[SPR_BOOKE_MAS2
] = sregs
.u
.e
.mas2
;
1157 env
->spr
[SPR_BOOKE_MAS3
] = sregs
.u
.e
.mas7_3
& 0xffffffff;
1158 env
->spr
[SPR_BOOKE_MAS4
] = sregs
.u
.e
.mas4
;
1159 env
->spr
[SPR_BOOKE_MAS6
] = sregs
.u
.e
.mas6
;
1160 env
->spr
[SPR_BOOKE_MAS7
] = sregs
.u
.e
.mas7_3
>> 32;
1161 env
->spr
[SPR_MMUCFG
] = sregs
.u
.e
.mmucfg
;
1162 env
->spr
[SPR_BOOKE_TLB0CFG
] = sregs
.u
.e
.tlbcfg
[0];
1163 env
->spr
[SPR_BOOKE_TLB1CFG
] = sregs
.u
.e
.tlbcfg
[1];
1166 if (sregs
.u
.e
.features
& KVM_SREGS_EXP
) {
1167 env
->spr
[SPR_BOOKE_EPR
] = sregs
.u
.e
.epr
;
1170 if (sregs
.u
.e
.features
& KVM_SREGS_E_PD
) {
1171 env
->spr
[SPR_BOOKE_EPLC
] = sregs
.u
.e
.eplc
;
1172 env
->spr
[SPR_BOOKE_EPSC
] = sregs
.u
.e
.epsc
;
1175 if (sregs
.u
.e
.impl_id
== KVM_SREGS_E_IMPL_FSL
) {
1176 env
->spr
[SPR_E500_SVR
] = sregs
.u
.e
.impl
.fsl
.svr
;
1177 env
->spr
[SPR_Exxx_MCAR
] = sregs
.u
.e
.impl
.fsl
.mcar
;
1178 env
->spr
[SPR_HID0
] = sregs
.u
.e
.impl
.fsl
.hid0
;
1180 if (sregs
.u
.e
.impl
.fsl
.features
& KVM_SREGS_E_FSL_PIDn
) {
1181 env
->spr
[SPR_BOOKE_PID1
] = sregs
.u
.e
.impl
.fsl
.pid1
;
1182 env
->spr
[SPR_BOOKE_PID2
] = sregs
.u
.e
.impl
.fsl
.pid2
;
1188 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_SREGS
, &sregs
);
1193 if (!env
->external_htab
) {
1194 ppc_store_sdr1(env
, sregs
.u
.s
.sdr1
);
1200 * The packed SLB array we get from KVM_GET_SREGS only contains
1201 * information about valid entries. So we flush our internal
1202 * copy to get rid of stale ones, then put all valid SLB entries
1205 memset(env
->slb
, 0, sizeof(env
->slb
));
1206 for (i
= 0; i
< ARRAY_SIZE(env
->slb
); i
++) {
1207 target_ulong rb
= sregs
.u
.s
.ppc64
.slb
[i
].slbe
;
1208 target_ulong rs
= sregs
.u
.s
.ppc64
.slb
[i
].slbv
;
1210 * Only restore valid entries
1212 if (rb
& SLB_ESID_V
) {
1213 ppc_store_slb(cpu
, rb
& 0xfff, rb
& ~0xfffULL
, rs
);
1219 for (i
= 0; i
< 16; i
++) {
1220 env
->sr
[i
] = sregs
.u
.s
.ppc32
.sr
[i
];
1224 for (i
= 0; i
< 8; i
++) {
1225 env
->DBAT
[0][i
] = sregs
.u
.s
.ppc32
.dbat
[i
] & 0xffffffff;
1226 env
->DBAT
[1][i
] = sregs
.u
.s
.ppc32
.dbat
[i
] >> 32;
1227 env
->IBAT
[0][i
] = sregs
.u
.s
.ppc32
.ibat
[i
] & 0xffffffff;
1228 env
->IBAT
[1][i
] = sregs
.u
.s
.ppc32
.ibat
[i
] >> 32;
1233 kvm_get_one_spr(cs
, KVM_REG_PPC_HIOR
, SPR_HIOR
);
1239 /* We deliberately ignore errors here, for kernels which have
1240 * the ONE_REG calls, but don't support the specific
1241 * registers, there's a reasonable chance things will still
1242 * work, at least until we try to migrate. */
1243 for (i
= 0; i
< 1024; i
++) {
1244 uint64_t id
= env
->spr_cb
[i
].one_reg_id
;
1247 kvm_get_one_spr(cs
, id
, i
);
1253 for (i
= 0; i
< ARRAY_SIZE(env
->tm_gpr
); i
++) {
1254 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_GPR(i
), &env
->tm_gpr
[i
]);
1256 for (i
= 0; i
< ARRAY_SIZE(env
->tm_vsr
); i
++) {
1257 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_VSR(i
), &env
->tm_vsr
[i
]);
1259 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_CR
, &env
->tm_cr
);
1260 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_LR
, &env
->tm_lr
);
1261 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_CTR
, &env
->tm_ctr
);
1262 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_FPSCR
, &env
->tm_fpscr
);
1263 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_AMR
, &env
->tm_amr
);
1264 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_PPR
, &env
->tm_ppr
);
1265 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_VRSAVE
, &env
->tm_vrsave
);
1266 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_VSCR
, &env
->tm_vscr
);
1267 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_DSCR
, &env
->tm_dscr
);
1268 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_TAR
, &env
->tm_tar
);
1272 if (kvm_get_vpa(cs
) < 0) {
1273 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1277 kvm_get_one_reg(cs
, KVM_REG_PPC_TB_OFFSET
, &env
->tb_env
->tb_offset
);
1284 int kvmppc_set_interrupt(PowerPCCPU
*cpu
, int irq
, int level
)
1286 unsigned virq
= level
? KVM_INTERRUPT_SET_LEVEL
: KVM_INTERRUPT_UNSET
;
1288 if (irq
!= PPC_INTERRUPT_EXT
) {
1292 if (!kvm_enabled() || !cap_interrupt_unset
|| !cap_interrupt_level
) {
1296 kvm_vcpu_ioctl(CPU(cpu
), KVM_INTERRUPT
, &virq
);
1301 #if defined(TARGET_PPCEMB)
1302 #define PPC_INPUT_INT PPC40x_INPUT_INT
1303 #elif defined(TARGET_PPC64)
1304 #define PPC_INPUT_INT PPC970_INPUT_INT
1306 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1309 void kvm_arch_pre_run(CPUState
*cs
, struct kvm_run
*run
)
1311 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
1312 CPUPPCState
*env
= &cpu
->env
;
1316 qemu_mutex_lock_iothread();
1318 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1319 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1320 if (!cap_interrupt_level
&&
1321 run
->ready_for_interrupt_injection
&&
1322 (cs
->interrupt_request
& CPU_INTERRUPT_HARD
) &&
1323 (env
->irq_input_state
& (1<<PPC_INPUT_INT
)))
1325 /* For now KVM disregards the 'irq' argument. However, in the
1326 * future KVM could cache it in-kernel to avoid a heavyweight exit
1327 * when reading the UIC.
1329 irq
= KVM_INTERRUPT_SET
;
1331 DPRINTF("injected interrupt %d\n", irq
);
1332 r
= kvm_vcpu_ioctl(cs
, KVM_INTERRUPT
, &irq
);
1334 printf("cpu %d fail inject %x\n", cs
->cpu_index
, irq
);
1337 /* Always wake up soon in case the interrupt was level based */
1338 timer_mod(idle_timer
, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) +
1339 (get_ticks_per_sec() / 50));
1342 /* We don't know if there are more interrupts pending after this. However,
1343 * the guest will return to userspace in the course of handling this one
1344 * anyways, so we will get a chance to deliver the rest. */
1346 qemu_mutex_unlock_iothread();
1349 MemTxAttrs
kvm_arch_post_run(CPUState
*cs
, struct kvm_run
*run
)
1351 return MEMTXATTRS_UNSPECIFIED
;
1354 int kvm_arch_process_async_events(CPUState
*cs
)
1359 static int kvmppc_handle_halt(PowerPCCPU
*cpu
)
1361 CPUState
*cs
= CPU(cpu
);
1362 CPUPPCState
*env
= &cpu
->env
;
1364 if (!(cs
->interrupt_request
& CPU_INTERRUPT_HARD
) && (msr_ee
)) {
1366 cs
->exception_index
= EXCP_HLT
;
1372 /* map dcr access to existing qemu dcr emulation */
1373 static int kvmppc_handle_dcr_read(CPUPPCState
*env
, uint32_t dcrn
, uint32_t *data
)
1375 if (ppc_dcr_read(env
->dcr_env
, dcrn
, data
) < 0)
1376 fprintf(stderr
, "Read to unhandled DCR (0x%x)\n", dcrn
);
1381 static int kvmppc_handle_dcr_write(CPUPPCState
*env
, uint32_t dcrn
, uint32_t data
)
1383 if (ppc_dcr_write(env
->dcr_env
, dcrn
, data
) < 0)
1384 fprintf(stderr
, "Write to unhandled DCR (0x%x)\n", dcrn
);
1389 int kvm_arch_insert_sw_breakpoint(CPUState
*cs
, struct kvm_sw_breakpoint
*bp
)
1391 /* Mixed endian case is not handled */
1392 uint32_t sc
= debug_inst_opcode
;
1394 if (cpu_memory_rw_debug(cs
, bp
->pc
, (uint8_t *)&bp
->saved_insn
,
1396 cpu_memory_rw_debug(cs
, bp
->pc
, (uint8_t *)&sc
, sizeof(sc
), 1)) {
1403 int kvm_arch_remove_sw_breakpoint(CPUState
*cs
, struct kvm_sw_breakpoint
*bp
)
1407 if (cpu_memory_rw_debug(cs
, bp
->pc
, (uint8_t *)&sc
, sizeof(sc
), 0) ||
1408 sc
!= debug_inst_opcode
||
1409 cpu_memory_rw_debug(cs
, bp
->pc
, (uint8_t *)&bp
->saved_insn
,
1417 static int find_hw_breakpoint(target_ulong addr
, int type
)
1421 assert((nb_hw_breakpoint
+ nb_hw_watchpoint
)
1422 <= ARRAY_SIZE(hw_debug_points
));
1424 for (n
= 0; n
< nb_hw_breakpoint
+ nb_hw_watchpoint
; n
++) {
1425 if (hw_debug_points
[n
].addr
== addr
&&
1426 hw_debug_points
[n
].type
== type
) {
1434 static int find_hw_watchpoint(target_ulong addr
, int *flag
)
1438 n
= find_hw_breakpoint(addr
, GDB_WATCHPOINT_ACCESS
);
1440 *flag
= BP_MEM_ACCESS
;
1444 n
= find_hw_breakpoint(addr
, GDB_WATCHPOINT_WRITE
);
1446 *flag
= BP_MEM_WRITE
;
1450 n
= find_hw_breakpoint(addr
, GDB_WATCHPOINT_READ
);
1452 *flag
= BP_MEM_READ
;
1459 int kvm_arch_insert_hw_breakpoint(target_ulong addr
,
1460 target_ulong len
, int type
)
1462 if ((nb_hw_breakpoint
+ nb_hw_watchpoint
) >= ARRAY_SIZE(hw_debug_points
)) {
1466 hw_debug_points
[nb_hw_breakpoint
+ nb_hw_watchpoint
].addr
= addr
;
1467 hw_debug_points
[nb_hw_breakpoint
+ nb_hw_watchpoint
].type
= type
;
1470 case GDB_BREAKPOINT_HW
:
1471 if (nb_hw_breakpoint
>= max_hw_breakpoint
) {
1475 if (find_hw_breakpoint(addr
, type
) >= 0) {
1482 case GDB_WATCHPOINT_WRITE
:
1483 case GDB_WATCHPOINT_READ
:
1484 case GDB_WATCHPOINT_ACCESS
:
1485 if (nb_hw_watchpoint
>= max_hw_watchpoint
) {
1489 if (find_hw_breakpoint(addr
, type
) >= 0) {
1503 int kvm_arch_remove_hw_breakpoint(target_ulong addr
,
1504 target_ulong len
, int type
)
1508 n
= find_hw_breakpoint(addr
, type
);
1514 case GDB_BREAKPOINT_HW
:
1518 case GDB_WATCHPOINT_WRITE
:
1519 case GDB_WATCHPOINT_READ
:
1520 case GDB_WATCHPOINT_ACCESS
:
1527 hw_debug_points
[n
] = hw_debug_points
[nb_hw_breakpoint
+ nb_hw_watchpoint
];
1532 void kvm_arch_remove_all_hw_breakpoints(void)
1534 nb_hw_breakpoint
= nb_hw_watchpoint
= 0;
1537 void kvm_arch_update_guest_debug(CPUState
*cs
, struct kvm_guest_debug
*dbg
)
1541 /* Software Breakpoint updates */
1542 if (kvm_sw_breakpoints_active(cs
)) {
1543 dbg
->control
|= KVM_GUESTDBG_ENABLE
| KVM_GUESTDBG_USE_SW_BP
;
1546 assert((nb_hw_breakpoint
+ nb_hw_watchpoint
)
1547 <= ARRAY_SIZE(hw_debug_points
));
1548 assert((nb_hw_breakpoint
+ nb_hw_watchpoint
) <= ARRAY_SIZE(dbg
->arch
.bp
));
1550 if (nb_hw_breakpoint
+ nb_hw_watchpoint
> 0) {
1551 dbg
->control
|= KVM_GUESTDBG_ENABLE
| KVM_GUESTDBG_USE_HW_BP
;
1552 memset(dbg
->arch
.bp
, 0, sizeof(dbg
->arch
.bp
));
1553 for (n
= 0; n
< nb_hw_breakpoint
+ nb_hw_watchpoint
; n
++) {
1554 switch (hw_debug_points
[n
].type
) {
1555 case GDB_BREAKPOINT_HW
:
1556 dbg
->arch
.bp
[n
].type
= KVMPPC_DEBUG_BREAKPOINT
;
1558 case GDB_WATCHPOINT_WRITE
:
1559 dbg
->arch
.bp
[n
].type
= KVMPPC_DEBUG_WATCH_WRITE
;
1561 case GDB_WATCHPOINT_READ
:
1562 dbg
->arch
.bp
[n
].type
= KVMPPC_DEBUG_WATCH_READ
;
1564 case GDB_WATCHPOINT_ACCESS
:
1565 dbg
->arch
.bp
[n
].type
= KVMPPC_DEBUG_WATCH_WRITE
|
1566 KVMPPC_DEBUG_WATCH_READ
;
1569 cpu_abort(cs
, "Unsupported breakpoint type\n");
1571 dbg
->arch
.bp
[n
].addr
= hw_debug_points
[n
].addr
;
1576 static int kvm_handle_debug(PowerPCCPU
*cpu
, struct kvm_run
*run
)
1578 CPUState
*cs
= CPU(cpu
);
1579 CPUPPCState
*env
= &cpu
->env
;
1580 struct kvm_debug_exit_arch
*arch_info
= &run
->debug
.arch
;
1585 if (cs
->singlestep_enabled
) {
1587 } else if (arch_info
->status
) {
1588 if (nb_hw_breakpoint
+ nb_hw_watchpoint
> 0) {
1589 if (arch_info
->status
& KVMPPC_DEBUG_BREAKPOINT
) {
1590 n
= find_hw_breakpoint(arch_info
->address
, GDB_BREAKPOINT_HW
);
1594 } else if (arch_info
->status
& (KVMPPC_DEBUG_WATCH_READ
|
1595 KVMPPC_DEBUG_WATCH_WRITE
)) {
1596 n
= find_hw_watchpoint(arch_info
->address
, &flag
);
1599 cs
->watchpoint_hit
= &hw_watchpoint
;
1600 hw_watchpoint
.vaddr
= hw_debug_points
[n
].addr
;
1601 hw_watchpoint
.flags
= flag
;
1605 } else if (kvm_find_sw_breakpoint(cs
, arch_info
->address
)) {
1608 /* QEMU is not able to handle debug exception, so inject
1609 * program exception to guest;
1610 * Yes program exception NOT debug exception !!
1611 * When QEMU is using debug resources then debug exception must
1612 * be always set. To achieve this we set MSR_DE and also set
1613 * MSRP_DEP so guest cannot change MSR_DE.
1614 * When emulating debug resource for guest we want guest
1615 * to control MSR_DE (enable/disable debug interrupt on need).
1616 * Supporting both configurations are NOT possible.
1617 * So the result is that we cannot share debug resources
1618 * between QEMU and Guest on BOOKE architecture.
1619 * In the current design QEMU gets the priority over guest,
1620 * this means that if QEMU is using debug resources then guest
1622 * For software breakpoint QEMU uses a privileged instruction;
1623 * So there cannot be any reason that we are here for guest
1624 * set debug exception, only possibility is guest executed a
1625 * privileged / illegal instruction and that's why we are
1626 * injecting a program interrupt.
1629 cpu_synchronize_state(cs
);
1630 /* env->nip is PC, so increment this by 4 to use
1631 * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1634 cs
->exception_index
= POWERPC_EXCP_PROGRAM
;
1635 env
->error_code
= POWERPC_EXCP_INVAL
;
1636 ppc_cpu_do_interrupt(cs
);
1642 int kvm_arch_handle_exit(CPUState
*cs
, struct kvm_run
*run
)
1644 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
1645 CPUPPCState
*env
= &cpu
->env
;
1648 qemu_mutex_lock_iothread();
1650 switch (run
->exit_reason
) {
1652 if (run
->dcr
.is_write
) {
1653 DPRINTF("handle dcr write\n");
1654 ret
= kvmppc_handle_dcr_write(env
, run
->dcr
.dcrn
, run
->dcr
.data
);
1656 DPRINTF("handle dcr read\n");
1657 ret
= kvmppc_handle_dcr_read(env
, run
->dcr
.dcrn
, &run
->dcr
.data
);
1661 DPRINTF("handle halt\n");
1662 ret
= kvmppc_handle_halt(cpu
);
1664 #if defined(TARGET_PPC64)
1665 case KVM_EXIT_PAPR_HCALL
:
1666 DPRINTF("handle PAPR hypercall\n");
1667 run
->papr_hcall
.ret
= spapr_hypercall(cpu
,
1669 run
->papr_hcall
.args
);
1674 DPRINTF("handle epr\n");
1675 run
->epr
.epr
= ldl_phys(cs
->as
, env
->mpic_iack
);
1678 case KVM_EXIT_WATCHDOG
:
1679 DPRINTF("handle watchdog expiry\n");
1680 watchdog_perform_action();
1684 case KVM_EXIT_DEBUG
:
1685 DPRINTF("handle debug exception\n");
1686 if (kvm_handle_debug(cpu
, run
)) {
1690 /* re-enter, this exception was guest-internal */
1695 fprintf(stderr
, "KVM: unknown exit reason %d\n", run
->exit_reason
);
1700 qemu_mutex_unlock_iothread();
1704 int kvmppc_or_tsr_bits(PowerPCCPU
*cpu
, uint32_t tsr_bits
)
1706 CPUState
*cs
= CPU(cpu
);
1707 uint32_t bits
= tsr_bits
;
1708 struct kvm_one_reg reg
= {
1709 .id
= KVM_REG_PPC_OR_TSR
,
1710 .addr
= (uintptr_t) &bits
,
1713 return kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
1716 int kvmppc_clear_tsr_bits(PowerPCCPU
*cpu
, uint32_t tsr_bits
)
1719 CPUState
*cs
= CPU(cpu
);
1720 uint32_t bits
= tsr_bits
;
1721 struct kvm_one_reg reg
= {
1722 .id
= KVM_REG_PPC_CLEAR_TSR
,
1723 .addr
= (uintptr_t) &bits
,
1726 return kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
1729 int kvmppc_set_tcr(PowerPCCPU
*cpu
)
1731 CPUState
*cs
= CPU(cpu
);
1732 CPUPPCState
*env
= &cpu
->env
;
1733 uint32_t tcr
= env
->spr
[SPR_BOOKE_TCR
];
1735 struct kvm_one_reg reg
= {
1736 .id
= KVM_REG_PPC_TCR
,
1737 .addr
= (uintptr_t) &tcr
,
1740 return kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
1743 int kvmppc_booke_watchdog_enable(PowerPCCPU
*cpu
)
1745 CPUState
*cs
= CPU(cpu
);
1748 if (!kvm_enabled()) {
1752 if (!cap_ppc_watchdog
) {
1753 printf("warning: KVM does not support watchdog");
1757 ret
= kvm_vcpu_enable_cap(cs
, KVM_CAP_PPC_BOOKE_WATCHDOG
, 0);
1759 fprintf(stderr
, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1760 __func__
, strerror(-ret
));
1767 static int read_cpuinfo(const char *field
, char *value
, int len
)
1771 int field_len
= strlen(field
);
1774 f
= fopen("/proc/cpuinfo", "r");
1780 if (!fgets(line
, sizeof(line
), f
)) {
1783 if (!strncmp(line
, field
, field_len
)) {
1784 pstrcpy(value
, len
, line
);
1795 uint32_t kvmppc_get_tbfreq(void)
1799 uint32_t retval
= get_ticks_per_sec();
1801 if (read_cpuinfo("timebase", line
, sizeof(line
))) {
1805 if (!(ns
= strchr(line
, ':'))) {
1814 bool kvmppc_get_host_serial(char **value
)
1816 return g_file_get_contents("/proc/device-tree/system-id", value
, NULL
,
1820 bool kvmppc_get_host_model(char **value
)
1822 return g_file_get_contents("/proc/device-tree/model", value
, NULL
, NULL
);
1825 /* Try to find a device tree node for a CPU with clock-frequency property */
1826 static int kvmppc_find_cpu_dt(char *buf
, int buf_len
)
1828 struct dirent
*dirp
;
1831 if ((dp
= opendir(PROC_DEVTREE_CPU
)) == NULL
) {
1832 printf("Can't open directory " PROC_DEVTREE_CPU
"\n");
1837 while ((dirp
= readdir(dp
)) != NULL
) {
1839 snprintf(buf
, buf_len
, "%s%s/clock-frequency", PROC_DEVTREE_CPU
,
1841 f
= fopen(buf
, "r");
1843 snprintf(buf
, buf_len
, "%s%s", PROC_DEVTREE_CPU
, dirp
->d_name
);
1850 if (buf
[0] == '\0') {
1851 printf("Unknown host!\n");
1858 static uint64_t kvmppc_read_int_dt(const char *filename
)
1867 f
= fopen(filename
, "rb");
1872 len
= fread(&u
, 1, sizeof(u
), f
);
1876 /* property is a 32-bit quantity */
1877 return be32_to_cpu(u
.v32
);
1879 return be64_to_cpu(u
.v64
);
1885 /* Read a CPU node property from the host device tree that's a single
1886 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1887 * (can't find or open the property, or doesn't understand the
1889 static uint64_t kvmppc_read_int_cpu_dt(const char *propname
)
1891 char buf
[PATH_MAX
], *tmp
;
1894 if (kvmppc_find_cpu_dt(buf
, sizeof(buf
))) {
1898 tmp
= g_strdup_printf("%s/%s", buf
, propname
);
1899 val
= kvmppc_read_int_dt(tmp
);
1905 uint64_t kvmppc_get_clockfreq(void)
1907 return kvmppc_read_int_cpu_dt("clock-frequency");
1910 uint32_t kvmppc_get_vmx(void)
1912 return kvmppc_read_int_cpu_dt("ibm,vmx");
1915 uint32_t kvmppc_get_dfp(void)
1917 return kvmppc_read_int_cpu_dt("ibm,dfp");
1920 static int kvmppc_get_pvinfo(CPUPPCState
*env
, struct kvm_ppc_pvinfo
*pvinfo
)
1922 PowerPCCPU
*cpu
= ppc_env_get_cpu(env
);
1923 CPUState
*cs
= CPU(cpu
);
1925 if (kvm_vm_check_extension(cs
->kvm_state
, KVM_CAP_PPC_GET_PVINFO
) &&
1926 !kvm_vm_ioctl(cs
->kvm_state
, KVM_PPC_GET_PVINFO
, pvinfo
)) {
1933 int kvmppc_get_hasidle(CPUPPCState
*env
)
1935 struct kvm_ppc_pvinfo pvinfo
;
1937 if (!kvmppc_get_pvinfo(env
, &pvinfo
) &&
1938 (pvinfo
.flags
& KVM_PPC_PVINFO_FLAGS_EV_IDLE
)) {
1945 int kvmppc_get_hypercall(CPUPPCState
*env
, uint8_t *buf
, int buf_len
)
1947 uint32_t *hc
= (uint32_t*)buf
;
1948 struct kvm_ppc_pvinfo pvinfo
;
1950 if (!kvmppc_get_pvinfo(env
, &pvinfo
)) {
1951 memcpy(buf
, pvinfo
.hcall
, buf_len
);
1956 * Fallback to always fail hypercalls regardless of endianness:
1958 * tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
1960 * b .+8 (becomes nop in wrong endian)
1961 * bswap32(li r3, -1)
1964 hc
[0] = cpu_to_be32(0x08000048);
1965 hc
[1] = cpu_to_be32(0x3860ffff);
1966 hc
[2] = cpu_to_be32(0x48000008);
1967 hc
[3] = cpu_to_be32(bswap32(0x3860ffff));
1972 static inline int kvmppc_enable_hcall(KVMState
*s
, target_ulong hcall
)
1974 return kvm_vm_enable_cap(s
, KVM_CAP_PPC_ENABLE_HCALL
, 0, hcall
, 1);
1977 void kvmppc_enable_logical_ci_hcalls(void)
1980 * FIXME: it would be nice if we could detect the cases where
1981 * we're using a device which requires the in kernel
1982 * implementation of these hcalls, but the kernel lacks them and
1983 * produce a warning.
1985 kvmppc_enable_hcall(kvm_state
, H_LOGICAL_CI_LOAD
);
1986 kvmppc_enable_hcall(kvm_state
, H_LOGICAL_CI_STORE
);
1989 void kvmppc_enable_set_mode_hcall(void)
1991 kvmppc_enable_hcall(kvm_state
, H_SET_MODE
);
1994 void kvmppc_set_papr(PowerPCCPU
*cpu
)
1996 CPUState
*cs
= CPU(cpu
);
1999 ret
= kvm_vcpu_enable_cap(cs
, KVM_CAP_PPC_PAPR
, 0);
2001 error_report("This vCPU type or KVM version does not support PAPR");
2005 /* Update the capability flag so we sync the right information
2010 int kvmppc_set_compat(PowerPCCPU
*cpu
, uint32_t cpu_version
)
2012 return kvm_set_one_reg(CPU(cpu
), KVM_REG_PPC_ARCH_COMPAT
, &cpu_version
);
2015 void kvmppc_set_mpic_proxy(PowerPCCPU
*cpu
, int mpic_proxy
)
2017 CPUState
*cs
= CPU(cpu
);
2020 ret
= kvm_vcpu_enable_cap(cs
, KVM_CAP_PPC_EPR
, 0, mpic_proxy
);
2021 if (ret
&& mpic_proxy
) {
2022 error_report("This KVM version does not support EPR");
2027 int kvmppc_smt_threads(void)
2029 return cap_ppc_smt
? cap_ppc_smt
: 1;
2033 off_t
kvmppc_alloc_rma(void **rma
)
2037 struct kvm_allocate_rma ret
;
2039 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2040 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2041 * not necessary on this hardware
2042 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2044 * FIXME: We should allow the user to force contiguous RMA
2045 * allocation in the cap_ppc_rma==1 case.
2047 if (cap_ppc_rma
< 2) {
2051 fd
= kvm_vm_ioctl(kvm_state
, KVM_ALLOCATE_RMA
, &ret
);
2053 fprintf(stderr
, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2058 size
= MIN(ret
.rma_size
, 256ul << 20);
2060 *rma
= mmap(NULL
, size
, PROT_READ
|PROT_WRITE
, MAP_SHARED
, fd
, 0);
2061 if (*rma
== MAP_FAILED
) {
2062 fprintf(stderr
, "KVM: Error mapping RMA: %s\n", strerror(errno
));
2069 uint64_t kvmppc_rma_size(uint64_t current_size
, unsigned int hash_shift
)
2071 struct kvm_ppc_smmu_info info
;
2072 long rampagesize
, best_page_shift
;
2075 if (cap_ppc_rma
>= 2) {
2076 return current_size
;
2079 /* Find the largest hardware supported page size that's less than
2080 * or equal to the (logical) backing page size of guest RAM */
2081 kvm_get_smmu_info(POWERPC_CPU(first_cpu
), &info
);
2082 rampagesize
= getrampagesize();
2083 best_page_shift
= 0;
2085 for (i
= 0; i
< KVM_PPC_PAGE_SIZES_MAX_SZ
; i
++) {
2086 struct kvm_ppc_one_seg_page_size
*sps
= &info
.sps
[i
];
2088 if (!sps
->page_shift
) {
2092 if ((sps
->page_shift
> best_page_shift
)
2093 && ((1UL << sps
->page_shift
) <= rampagesize
)) {
2094 best_page_shift
= sps
->page_shift
;
2098 return MIN(current_size
,
2099 1ULL << (best_page_shift
+ hash_shift
- 7));
2103 bool kvmppc_spapr_use_multitce(void)
2105 return cap_spapr_multitce
;
2108 void *kvmppc_create_spapr_tce(uint32_t liobn
, uint32_t window_size
, int *pfd
,
2111 struct kvm_create_spapr_tce args
= {
2113 .window_size
= window_size
,
2119 /* Must set fd to -1 so we don't try to munmap when called for
2120 * destroying the table, which the upper layers -will- do
2123 if (!cap_spapr_tce
|| (need_vfio
&& !cap_spapr_vfio
)) {
2127 fd
= kvm_vm_ioctl(kvm_state
, KVM_CREATE_SPAPR_TCE
, &args
);
2129 fprintf(stderr
, "KVM: Failed to create TCE table for liobn 0x%x\n",
2134 len
= (window_size
/ SPAPR_TCE_PAGE_SIZE
) * sizeof(uint64_t);
2135 /* FIXME: round this up to page size */
2137 table
= mmap(NULL
, len
, PROT_READ
|PROT_WRITE
, MAP_SHARED
, fd
, 0);
2138 if (table
== MAP_FAILED
) {
2139 fprintf(stderr
, "KVM: Failed to map TCE table for liobn 0x%x\n",
2149 int kvmppc_remove_spapr_tce(void *table
, int fd
, uint32_t nb_table
)
2157 len
= nb_table
* sizeof(uint64_t);
2158 if ((munmap(table
, len
) < 0) ||
2160 fprintf(stderr
, "KVM: Unexpected error removing TCE table: %s",
2162 /* Leak the table */
2168 int kvmppc_reset_htab(int shift_hint
)
2170 uint32_t shift
= shift_hint
;
2172 if (!kvm_enabled()) {
2173 /* Full emulation, tell caller to allocate htab itself */
2176 if (kvm_check_extension(kvm_state
, KVM_CAP_PPC_ALLOC_HTAB
)) {
2178 ret
= kvm_vm_ioctl(kvm_state
, KVM_PPC_ALLOCATE_HTAB
, &shift
);
2179 if (ret
== -ENOTTY
) {
2180 /* At least some versions of PR KVM advertise the
2181 * capability, but don't implement the ioctl(). Oops.
2182 * Return 0 so that we allocate the htab in qemu, as is
2183 * correct for PR. */
2185 } else if (ret
< 0) {
2191 /* We have a kernel that predates the htab reset calls. For PR
2192 * KVM, we need to allocate the htab ourselves, for an HV KVM of
2193 * this era, it has allocated a 16MB fixed size hash table
2194 * already. Kernels of this era have the GET_PVINFO capability
2195 * only on PR, so we use this hack to determine the right
2197 if (kvm_check_extension(kvm_state
, KVM_CAP_PPC_GET_PVINFO
)) {
2198 /* PR - tell caller to allocate htab */
2201 /* HV - assume 16MB kernel allocated htab */
2206 static inline uint32_t mfpvr(void)
2215 static void alter_insns(uint64_t *word
, uint64_t flags
, bool on
)
2224 static void kvmppc_host_cpu_initfn(Object
*obj
)
2226 assert(kvm_enabled());
2229 static void kvmppc_host_cpu_class_init(ObjectClass
*oc
, void *data
)
2231 DeviceClass
*dc
= DEVICE_CLASS(oc
);
2232 PowerPCCPUClass
*pcc
= POWERPC_CPU_CLASS(oc
);
2233 uint32_t vmx
= kvmppc_get_vmx();
2234 uint32_t dfp
= kvmppc_get_dfp();
2235 uint32_t dcache_size
= kvmppc_read_int_cpu_dt("d-cache-size");
2236 uint32_t icache_size
= kvmppc_read_int_cpu_dt("i-cache-size");
2238 /* Now fix up the class with information we can query from the host */
2242 /* Only override when we know what the host supports */
2243 alter_insns(&pcc
->insns_flags
, PPC_ALTIVEC
, vmx
> 0);
2244 alter_insns(&pcc
->insns_flags2
, PPC2_VSX
, vmx
> 1);
2247 /* Only override when we know what the host supports */
2248 alter_insns(&pcc
->insns_flags2
, PPC2_DFP
, dfp
);
2251 if (dcache_size
!= -1) {
2252 pcc
->l1_dcache_size
= dcache_size
;
2255 if (icache_size
!= -1) {
2256 pcc
->l1_icache_size
= icache_size
;
2259 /* Reason: kvmppc_host_cpu_initfn() dies when !kvm_enabled() */
2260 dc
->cannot_destroy_with_object_finalize_yet
= true;
2263 bool kvmppc_has_cap_epr(void)
2268 bool kvmppc_has_cap_htab_fd(void)
2273 bool kvmppc_has_cap_fixup_hcalls(void)
2275 return cap_fixup_hcalls
;
2278 static PowerPCCPUClass
*ppc_cpu_get_family_class(PowerPCCPUClass
*pcc
)
2280 ObjectClass
*oc
= OBJECT_CLASS(pcc
);
2282 while (oc
&& !object_class_is_abstract(oc
)) {
2283 oc
= object_class_get_parent(oc
);
2287 return POWERPC_CPU_CLASS(oc
);
2290 static int kvm_ppc_register_host_cpu_type(void)
2292 TypeInfo type_info
= {
2293 .name
= TYPE_HOST_POWERPC_CPU
,
2294 .instance_init
= kvmppc_host_cpu_initfn
,
2295 .class_init
= kvmppc_host_cpu_class_init
,
2297 uint32_t host_pvr
= mfpvr();
2298 PowerPCCPUClass
*pvr_pcc
;
2301 pvr_pcc
= ppc_cpu_class_by_pvr(host_pvr
);
2302 if (pvr_pcc
== NULL
) {
2303 pvr_pcc
= ppc_cpu_class_by_pvr_mask(host_pvr
);
2305 if (pvr_pcc
== NULL
) {
2308 type_info
.parent
= object_class_get_name(OBJECT_CLASS(pvr_pcc
));
2309 type_register(&type_info
);
2311 /* Register generic family CPU class for a family */
2312 pvr_pcc
= ppc_cpu_get_family_class(pvr_pcc
);
2313 dc
= DEVICE_CLASS(pvr_pcc
);
2314 type_info
.parent
= object_class_get_name(OBJECT_CLASS(pvr_pcc
));
2315 type_info
.name
= g_strdup_printf("%s-"TYPE_POWERPC_CPU
, dc
->desc
);
2316 type_register(&type_info
);
2321 int kvmppc_define_rtas_kernel_token(uint32_t token
, const char *function
)
2323 struct kvm_rtas_token_args args
= {
2327 if (!kvm_check_extension(kvm_state
, KVM_CAP_PPC_RTAS
)) {
2331 strncpy(args
.name
, function
, sizeof(args
.name
));
2333 return kvm_vm_ioctl(kvm_state
, KVM_PPC_RTAS_DEFINE_TOKEN
, &args
);
2336 int kvmppc_get_htab_fd(bool write
)
2338 struct kvm_get_htab_fd s
= {
2339 .flags
= write
? KVM_GET_HTAB_WRITE
: 0,
2344 fprintf(stderr
, "KVM version doesn't support saving the hash table\n");
2348 return kvm_vm_ioctl(kvm_state
, KVM_PPC_GET_HTAB_FD
, &s
);
2351 int kvmppc_save_htab(QEMUFile
*f
, int fd
, size_t bufsize
, int64_t max_ns
)
2353 int64_t starttime
= qemu_clock_get_ns(QEMU_CLOCK_REALTIME
);
2354 uint8_t buf
[bufsize
];
2358 rc
= read(fd
, buf
, bufsize
);
2360 fprintf(stderr
, "Error reading data from KVM HTAB fd: %s\n",
2364 uint8_t *buffer
= buf
;
2367 struct kvm_get_htab_header
*head
=
2368 (struct kvm_get_htab_header
*) buffer
;
2369 size_t chunksize
= sizeof(*head
) +
2370 HASH_PTE_SIZE_64
* head
->n_valid
;
2372 qemu_put_be32(f
, head
->index
);
2373 qemu_put_be16(f
, head
->n_valid
);
2374 qemu_put_be16(f
, head
->n_invalid
);
2375 qemu_put_buffer(f
, (void *)(head
+ 1),
2376 HASH_PTE_SIZE_64
* head
->n_valid
);
2378 buffer
+= chunksize
;
2384 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME
) - starttime
) < max_ns
)));
2386 return (rc
== 0) ? 1 : 0;
2389 int kvmppc_load_htab_chunk(QEMUFile
*f
, int fd
, uint32_t index
,
2390 uint16_t n_valid
, uint16_t n_invalid
)
2392 struct kvm_get_htab_header
*buf
;
2393 size_t chunksize
= sizeof(*buf
) + n_valid
*HASH_PTE_SIZE_64
;
2396 buf
= alloca(chunksize
);
2398 buf
->n_valid
= n_valid
;
2399 buf
->n_invalid
= n_invalid
;
2401 qemu_get_buffer(f
, (void *)(buf
+ 1), HASH_PTE_SIZE_64
*n_valid
);
2403 rc
= write(fd
, buf
, chunksize
);
2405 fprintf(stderr
, "Error writing KVM hash table: %s\n",
2409 if (rc
!= chunksize
) {
2410 /* We should never get a short write on a single chunk */
2411 fprintf(stderr
, "Short write, restoring KVM hash table\n");
2417 bool kvm_arch_stop_on_emulation_error(CPUState
*cpu
)
2422 int kvm_arch_on_sigbus_vcpu(CPUState
*cpu
, int code
, void *addr
)
2427 int kvm_arch_on_sigbus(int code
, void *addr
)
2432 void kvm_arch_init_irq_routing(KVMState
*s
)
2436 struct kvm_get_htab_buf
{
2437 struct kvm_get_htab_header header
;
2439 * We require one extra byte for read
2441 target_ulong hpte
[(HPTES_PER_GROUP
* 2) + 1];
2444 uint64_t kvmppc_hash64_read_pteg(PowerPCCPU
*cpu
, target_ulong pte_index
)
2447 struct kvm_get_htab_fd ghf
;
2448 struct kvm_get_htab_buf
*hpte_buf
;
2451 ghf
.start_index
= pte_index
;
2452 htab_fd
= kvm_vm_ioctl(kvm_state
, KVM_PPC_GET_HTAB_FD
, &ghf
);
2457 hpte_buf
= g_malloc0(sizeof(*hpte_buf
));
2459 * Read the hpte group
2461 if (read(htab_fd
, hpte_buf
, sizeof(*hpte_buf
)) < 0) {
2466 return (uint64_t)(uintptr_t) hpte_buf
->hpte
;
2475 void kvmppc_hash64_free_pteg(uint64_t token
)
2477 struct kvm_get_htab_buf
*htab_buf
;
2479 htab_buf
= container_of((void *)(uintptr_t) token
, struct kvm_get_htab_buf
,
2485 void kvmppc_hash64_write_pte(CPUPPCState
*env
, target_ulong pte_index
,
2486 target_ulong pte0
, target_ulong pte1
)
2489 struct kvm_get_htab_fd ghf
;
2490 struct kvm_get_htab_buf hpte_buf
;
2493 ghf
.start_index
= 0; /* Ignored */
2494 htab_fd
= kvm_vm_ioctl(kvm_state
, KVM_PPC_GET_HTAB_FD
, &ghf
);
2499 hpte_buf
.header
.n_valid
= 1;
2500 hpte_buf
.header
.n_invalid
= 0;
2501 hpte_buf
.header
.index
= pte_index
;
2502 hpte_buf
.hpte
[0] = pte0
;
2503 hpte_buf
.hpte
[1] = pte1
;
2505 * Write the hpte entry.
2506 * CAUTION: write() has the warn_unused_result attribute. Hence we
2507 * need to check the return value, even though we do nothing.
2509 if (write(htab_fd
, &hpte_buf
, sizeof(hpte_buf
)) < 0) {
2521 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry
*route
,
2522 uint64_t address
, uint32_t data
, PCIDevice
*dev
)
2527 int kvm_arch_msi_data_to_gsi(uint32_t data
)
2529 return data
& 0xffff;
2532 int kvmppc_enable_hwrng(void)
2534 if (!kvm_enabled() || !kvm_check_extension(kvm_state
, KVM_CAP_PPC_HWRNG
)) {
2538 return kvmppc_enable_hcall(kvm_state
, H_RANDOM
);