2 * PowerPC implementation of KVM hooks
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
17 #include "qemu/osdep.h"
19 #include <sys/ioctl.h>
22 #include <linux/kvm.h>
24 #include "qemu-common.h"
25 #include "qemu/error-report.h"
27 #include "cpu-models.h"
28 #include "qemu/timer.h"
29 #include "sysemu/sysemu.h"
30 #include "sysemu/hw_accel.h"
32 #include "sysemu/cpus.h"
33 #include "sysemu/device_tree.h"
34 #include "mmu-hash64.h"
36 #include "hw/sysbus.h"
37 #include "hw/ppc/spapr.h"
38 #include "hw/ppc/spapr_vio.h"
39 #include "hw/ppc/spapr_cpu_core.h"
40 #include "hw/ppc/ppc.h"
41 #include "sysemu/watchdog.h"
43 #include "exec/gdbstub.h"
44 #include "exec/memattrs.h"
45 #include "exec/ram_addr.h"
46 #include "sysemu/hostmem.h"
47 #include "qemu/cutils.h"
48 #include "qemu/mmap-alloc.h"
49 #if defined(TARGET_PPC64)
50 #include "hw/ppc/spapr_cpu_core.h"
57 #define DPRINTF(fmt, ...) \
58 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
60 #define DPRINTF(fmt, ...) \
64 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
66 const KVMCapabilityInfo kvm_arch_required_capabilities
[] = {
70 static int cap_interrupt_unset
= false;
71 static int cap_interrupt_level
= false;
72 static int cap_segstate
;
73 static int cap_booke_sregs
;
74 static int cap_ppc_smt
;
75 static int cap_ppc_rma
;
76 static int cap_spapr_tce
;
77 static int cap_spapr_multitce
;
78 static int cap_spapr_vfio
;
80 static int cap_one_reg
;
82 static int cap_ppc_watchdog
;
84 static int cap_htab_fd
;
85 static int cap_fixup_hcalls
;
86 static int cap_htm
; /* Hardware transactional memory support */
88 static uint32_t debug_inst_opcode
;
90 /* XXX We have a race condition where we actually have a level triggered
91 * interrupt, but the infrastructure can't expose that yet, so the guest
92 * takes but ignores it, goes to sleep and never gets notified that there's
93 * still an interrupt pending.
95 * As a quick workaround, let's just wake up again 20 ms after we injected
96 * an interrupt. That way we can assure that we're always reinjecting
97 * interrupts in case the guest swallowed them.
99 static QEMUTimer
*idle_timer
;
101 static void kvm_kick_cpu(void *opaque
)
103 PowerPCCPU
*cpu
= opaque
;
105 qemu_cpu_kick(CPU(cpu
));
108 /* Check whether we are running with KVM-PR (instead of KVM-HV). This
109 * should only be used for fallback tests - generally we should use
110 * explicit capabilities for the features we want, rather than
111 * assuming what is/isn't available depending on the KVM variant. */
112 static bool kvmppc_is_pr(KVMState
*ks
)
114 /* Assume KVM-PR if the GET_PVINFO capability is available */
115 return kvm_check_extension(ks
, KVM_CAP_PPC_GET_PVINFO
) != 0;
118 static int kvm_ppc_register_host_cpu_type(void);
120 int kvm_arch_init(MachineState
*ms
, KVMState
*s
)
122 cap_interrupt_unset
= kvm_check_extension(s
, KVM_CAP_PPC_UNSET_IRQ
);
123 cap_interrupt_level
= kvm_check_extension(s
, KVM_CAP_PPC_IRQ_LEVEL
);
124 cap_segstate
= kvm_check_extension(s
, KVM_CAP_PPC_SEGSTATE
);
125 cap_booke_sregs
= kvm_check_extension(s
, KVM_CAP_PPC_BOOKE_SREGS
);
126 cap_ppc_smt
= kvm_check_extension(s
, KVM_CAP_PPC_SMT
);
127 cap_ppc_rma
= kvm_check_extension(s
, KVM_CAP_PPC_RMA
);
128 cap_spapr_tce
= kvm_check_extension(s
, KVM_CAP_SPAPR_TCE
);
129 cap_spapr_multitce
= kvm_check_extension(s
, KVM_CAP_SPAPR_MULTITCE
);
130 cap_spapr_vfio
= false;
131 cap_one_reg
= kvm_check_extension(s
, KVM_CAP_ONE_REG
);
132 cap_hior
= kvm_check_extension(s
, KVM_CAP_PPC_HIOR
);
133 cap_epr
= kvm_check_extension(s
, KVM_CAP_PPC_EPR
);
134 cap_ppc_watchdog
= kvm_check_extension(s
, KVM_CAP_PPC_BOOKE_WATCHDOG
);
135 /* Note: we don't set cap_papr here, because this capability is
136 * only activated after this by kvmppc_set_papr() */
137 cap_htab_fd
= kvm_check_extension(s
, KVM_CAP_PPC_HTAB_FD
);
138 cap_fixup_hcalls
= kvm_check_extension(s
, KVM_CAP_PPC_FIXUP_HCALL
);
139 cap_htm
= kvm_vm_check_extension(s
, KVM_CAP_PPC_HTM
);
141 if (!cap_interrupt_level
) {
142 fprintf(stderr
, "KVM: Couldn't find level irq capability. Expect the "
143 "VM to stall at times!\n");
146 kvm_ppc_register_host_cpu_type();
151 int kvm_arch_irqchip_create(MachineState
*ms
, KVMState
*s
)
156 static int kvm_arch_sync_sregs(PowerPCCPU
*cpu
)
158 CPUPPCState
*cenv
= &cpu
->env
;
159 CPUState
*cs
= CPU(cpu
);
160 struct kvm_sregs sregs
;
163 if (cenv
->excp_model
== POWERPC_EXCP_BOOKE
) {
164 /* What we're really trying to say is "if we're on BookE, we use
165 the native PVR for now". This is the only sane way to check
166 it though, so we potentially confuse users that they can run
167 BookE guests on BookS. Let's hope nobody dares enough :) */
171 fprintf(stderr
, "kvm error: missing PVR setting capability\n");
176 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_SREGS
, &sregs
);
181 sregs
.pvr
= cenv
->spr
[SPR_PVR
];
182 return kvm_vcpu_ioctl(cs
, KVM_SET_SREGS
, &sregs
);
185 /* Set up a shared TLB array with KVM */
186 static int kvm_booke206_tlb_init(PowerPCCPU
*cpu
)
188 CPUPPCState
*env
= &cpu
->env
;
189 CPUState
*cs
= CPU(cpu
);
190 struct kvm_book3e_206_tlb_params params
= {};
191 struct kvm_config_tlb cfg
= {};
192 unsigned int entries
= 0;
195 if (!kvm_enabled() ||
196 !kvm_check_extension(cs
->kvm_state
, KVM_CAP_SW_TLB
)) {
200 assert(ARRAY_SIZE(params
.tlb_sizes
) == BOOKE206_MAX_TLBN
);
202 for (i
= 0; i
< BOOKE206_MAX_TLBN
; i
++) {
203 params
.tlb_sizes
[i
] = booke206_tlb_size(env
, i
);
204 params
.tlb_ways
[i
] = booke206_tlb_ways(env
, i
);
205 entries
+= params
.tlb_sizes
[i
];
208 assert(entries
== env
->nb_tlb
);
209 assert(sizeof(struct kvm_book3e_206_tlb_entry
) == sizeof(ppcmas_tlb_t
));
211 env
->tlb_dirty
= true;
213 cfg
.array
= (uintptr_t)env
->tlb
.tlbm
;
214 cfg
.array_len
= sizeof(ppcmas_tlb_t
) * entries
;
215 cfg
.params
= (uintptr_t)¶ms
;
216 cfg
.mmu_type
= KVM_MMU_FSL_BOOKE_NOHV
;
218 ret
= kvm_vcpu_enable_cap(cs
, KVM_CAP_SW_TLB
, 0, (uintptr_t)&cfg
);
220 fprintf(stderr
, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
221 __func__
, strerror(-ret
));
225 env
->kvm_sw_tlb
= true;
230 #if defined(TARGET_PPC64)
231 static void kvm_get_fallback_smmu_info(PowerPCCPU
*cpu
,
232 struct kvm_ppc_smmu_info
*info
)
234 CPUPPCState
*env
= &cpu
->env
;
235 CPUState
*cs
= CPU(cpu
);
237 memset(info
, 0, sizeof(*info
));
239 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
240 * need to "guess" what the supported page sizes are.
242 * For that to work we make a few assumptions:
244 * - Check whether we are running "PR" KVM which only supports 4K
245 * and 16M pages, but supports them regardless of the backing
246 * store characteritics. We also don't support 1T segments.
248 * This is safe as if HV KVM ever supports that capability or PR
249 * KVM grows supports for more page/segment sizes, those versions
250 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
251 * will not hit this fallback
253 * - Else we are running HV KVM. This means we only support page
254 * sizes that fit in the backing store. Additionally we only
255 * advertize 64K pages if the processor is ARCH 2.06 and we assume
256 * P7 encodings for the SLB and hash table. Here too, we assume
257 * support for any newer processor will mean a kernel that
258 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
261 if (kvmppc_is_pr(cs
->kvm_state
)) {
266 /* Standard 4k base page size segment */
267 info
->sps
[0].page_shift
= 12;
268 info
->sps
[0].slb_enc
= 0;
269 info
->sps
[0].enc
[0].page_shift
= 12;
270 info
->sps
[0].enc
[0].pte_enc
= 0;
272 /* Standard 16M large page size segment */
273 info
->sps
[1].page_shift
= 24;
274 info
->sps
[1].slb_enc
= SLB_VSID_L
;
275 info
->sps
[1].enc
[0].page_shift
= 24;
276 info
->sps
[1].enc
[0].pte_enc
= 0;
280 /* HV KVM has backing store size restrictions */
281 info
->flags
= KVM_PPC_PAGE_SIZES_REAL
;
283 if (env
->mmu_model
& POWERPC_MMU_1TSEG
) {
284 info
->flags
|= KVM_PPC_1T_SEGMENTS
;
287 if (POWERPC_MMU_VER(env
->mmu_model
) == POWERPC_MMU_VER_2_06
||
288 POWERPC_MMU_VER(env
->mmu_model
) == POWERPC_MMU_VER_2_07
) {
294 /* Standard 4k base page size segment */
295 info
->sps
[i
].page_shift
= 12;
296 info
->sps
[i
].slb_enc
= 0;
297 info
->sps
[i
].enc
[0].page_shift
= 12;
298 info
->sps
[i
].enc
[0].pte_enc
= 0;
301 /* 64K on MMU 2.06 and later */
302 if (POWERPC_MMU_VER(env
->mmu_model
) == POWERPC_MMU_VER_2_06
||
303 POWERPC_MMU_VER(env
->mmu_model
) == POWERPC_MMU_VER_2_07
) {
304 info
->sps
[i
].page_shift
= 16;
305 info
->sps
[i
].slb_enc
= 0x110;
306 info
->sps
[i
].enc
[0].page_shift
= 16;
307 info
->sps
[i
].enc
[0].pte_enc
= 1;
311 /* Standard 16M large page size segment */
312 info
->sps
[i
].page_shift
= 24;
313 info
->sps
[i
].slb_enc
= SLB_VSID_L
;
314 info
->sps
[i
].enc
[0].page_shift
= 24;
315 info
->sps
[i
].enc
[0].pte_enc
= 0;
319 static void kvm_get_smmu_info(PowerPCCPU
*cpu
, struct kvm_ppc_smmu_info
*info
)
321 CPUState
*cs
= CPU(cpu
);
324 if (kvm_check_extension(cs
->kvm_state
, KVM_CAP_PPC_GET_SMMU_INFO
)) {
325 ret
= kvm_vm_ioctl(cs
->kvm_state
, KVM_PPC_GET_SMMU_INFO
, info
);
331 kvm_get_fallback_smmu_info(cpu
, info
);
334 static bool kvm_valid_page_size(uint32_t flags
, long rampgsize
, uint32_t shift
)
336 if (!(flags
& KVM_PPC_PAGE_SIZES_REAL
)) {
340 return (1ul << shift
) <= rampgsize
;
343 static long max_cpu_page_size
;
345 static void kvm_fixup_page_sizes(PowerPCCPU
*cpu
)
347 static struct kvm_ppc_smmu_info smmu_info
;
348 static bool has_smmu_info
;
349 CPUPPCState
*env
= &cpu
->env
;
351 bool has_64k_pages
= false;
353 /* We only handle page sizes for 64-bit server guests for now */
354 if (!(env
->mmu_model
& POWERPC_MMU_64
)) {
358 /* Collect MMU info from kernel if not already */
359 if (!has_smmu_info
) {
360 kvm_get_smmu_info(cpu
, &smmu_info
);
361 has_smmu_info
= true;
364 if (!max_cpu_page_size
) {
365 max_cpu_page_size
= qemu_getrampagesize();
368 /* Convert to QEMU form */
369 memset(&env
->sps
, 0, sizeof(env
->sps
));
371 /* If we have HV KVM, we need to forbid CI large pages if our
372 * host page size is smaller than 64K.
374 if (smmu_info
.flags
& KVM_PPC_PAGE_SIZES_REAL
) {
375 env
->ci_large_pages
= getpagesize() >= 0x10000;
379 * XXX This loop should be an entry wide AND of the capabilities that
380 * the selected CPU has with the capabilities that KVM supports.
382 for (ik
= iq
= 0; ik
< KVM_PPC_PAGE_SIZES_MAX_SZ
; ik
++) {
383 struct ppc_one_seg_page_size
*qsps
= &env
->sps
.sps
[iq
];
384 struct kvm_ppc_one_seg_page_size
*ksps
= &smmu_info
.sps
[ik
];
386 if (!kvm_valid_page_size(smmu_info
.flags
, max_cpu_page_size
,
390 qsps
->page_shift
= ksps
->page_shift
;
391 qsps
->slb_enc
= ksps
->slb_enc
;
392 for (jk
= jq
= 0; jk
< KVM_PPC_PAGE_SIZES_MAX_SZ
; jk
++) {
393 if (!kvm_valid_page_size(smmu_info
.flags
, max_cpu_page_size
,
394 ksps
->enc
[jk
].page_shift
)) {
397 if (ksps
->enc
[jk
].page_shift
== 16) {
398 has_64k_pages
= true;
400 qsps
->enc
[jq
].page_shift
= ksps
->enc
[jk
].page_shift
;
401 qsps
->enc
[jq
].pte_enc
= ksps
->enc
[jk
].pte_enc
;
402 if (++jq
>= PPC_PAGE_SIZES_MAX_SZ
) {
406 if (++iq
>= PPC_PAGE_SIZES_MAX_SZ
) {
410 env
->slb_nr
= smmu_info
.slb_size
;
411 if (!(smmu_info
.flags
& KVM_PPC_1T_SEGMENTS
)) {
412 env
->mmu_model
&= ~POWERPC_MMU_1TSEG
;
414 if (!has_64k_pages
) {
415 env
->mmu_model
&= ~POWERPC_MMU_64K
;
419 bool kvmppc_is_mem_backend_page_size_ok(char *obj_path
)
421 Object
*mem_obj
= object_resolve_path(obj_path
, NULL
);
422 char *mempath
= object_property_get_str(mem_obj
, "mem-path", NULL
);
426 pagesize
= qemu_mempath_getpagesize(mempath
);
428 pagesize
= getpagesize();
431 return pagesize
>= max_cpu_page_size
;
434 #else /* defined (TARGET_PPC64) */
436 static inline void kvm_fixup_page_sizes(PowerPCCPU
*cpu
)
440 bool kvmppc_is_mem_backend_page_size_ok(char *obj_path
)
445 #endif /* !defined (TARGET_PPC64) */
447 unsigned long kvm_arch_vcpu_id(CPUState
*cpu
)
449 return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu
));
452 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
453 * book3s supports only 1 watchpoint, so array size
454 * of 4 is sufficient for now.
456 #define MAX_HW_BKPTS 4
458 static struct HWBreakpoint
{
461 } hw_debug_points
[MAX_HW_BKPTS
];
463 static CPUWatchpoint hw_watchpoint
;
465 /* Default there is no breakpoint and watchpoint supported */
466 static int max_hw_breakpoint
;
467 static int max_hw_watchpoint
;
468 static int nb_hw_breakpoint
;
469 static int nb_hw_watchpoint
;
471 static void kvmppc_hw_debug_points_init(CPUPPCState
*cenv
)
473 if (cenv
->excp_model
== POWERPC_EXCP_BOOKE
) {
474 max_hw_breakpoint
= 2;
475 max_hw_watchpoint
= 2;
478 if ((max_hw_breakpoint
+ max_hw_watchpoint
) > MAX_HW_BKPTS
) {
479 fprintf(stderr
, "Error initializing h/w breakpoints\n");
484 int kvm_arch_init_vcpu(CPUState
*cs
)
486 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
487 CPUPPCState
*cenv
= &cpu
->env
;
490 /* Gather server mmu info from KVM and update the CPU state */
491 kvm_fixup_page_sizes(cpu
);
493 /* Synchronize sregs with kvm */
494 ret
= kvm_arch_sync_sregs(cpu
);
496 if (ret
== -EINVAL
) {
497 error_report("Register sync failed... If you're using kvm-hv.ko,"
498 " only \"-cpu host\" is possible");
503 idle_timer
= timer_new_ns(QEMU_CLOCK_VIRTUAL
, kvm_kick_cpu
, cpu
);
505 switch (cenv
->mmu_model
) {
506 case POWERPC_MMU_BOOKE206
:
507 /* This target supports access to KVM's guest TLB */
508 ret
= kvm_booke206_tlb_init(cpu
);
510 case POWERPC_MMU_2_07
:
511 if (!cap_htm
&& !kvmppc_is_pr(cs
->kvm_state
)) {
512 /* KVM-HV has transactional memory on POWER8 also without the
513 * KVM_CAP_PPC_HTM extension, so enable it here instead as
514 * long as it's availble to userspace on the host. */
515 if (qemu_getauxval(AT_HWCAP2
) & PPC_FEATURE2_HAS_HTM
) {
524 kvm_get_one_reg(cs
, KVM_REG_PPC_DEBUG_INST
, &debug_inst_opcode
);
525 kvmppc_hw_debug_points_init(cenv
);
530 static void kvm_sw_tlb_put(PowerPCCPU
*cpu
)
532 CPUPPCState
*env
= &cpu
->env
;
533 CPUState
*cs
= CPU(cpu
);
534 struct kvm_dirty_tlb dirty_tlb
;
535 unsigned char *bitmap
;
538 if (!env
->kvm_sw_tlb
) {
542 bitmap
= g_malloc((env
->nb_tlb
+ 7) / 8);
543 memset(bitmap
, 0xFF, (env
->nb_tlb
+ 7) / 8);
545 dirty_tlb
.bitmap
= (uintptr_t)bitmap
;
546 dirty_tlb
.num_dirty
= env
->nb_tlb
;
548 ret
= kvm_vcpu_ioctl(cs
, KVM_DIRTY_TLB
, &dirty_tlb
);
550 fprintf(stderr
, "%s: KVM_DIRTY_TLB: %s\n",
551 __func__
, strerror(-ret
));
557 static void kvm_get_one_spr(CPUState
*cs
, uint64_t id
, int spr
)
559 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
560 CPUPPCState
*env
= &cpu
->env
;
565 struct kvm_one_reg reg
= {
567 .addr
= (uintptr_t) &val
,
571 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
573 trace_kvm_failed_spr_get(spr
, strerror(errno
));
575 switch (id
& KVM_REG_SIZE_MASK
) {
576 case KVM_REG_SIZE_U32
:
577 env
->spr
[spr
] = val
.u32
;
580 case KVM_REG_SIZE_U64
:
581 env
->spr
[spr
] = val
.u64
;
585 /* Don't handle this size yet */
591 static void kvm_put_one_spr(CPUState
*cs
, uint64_t id
, int spr
)
593 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
594 CPUPPCState
*env
= &cpu
->env
;
599 struct kvm_one_reg reg
= {
601 .addr
= (uintptr_t) &val
,
605 switch (id
& KVM_REG_SIZE_MASK
) {
606 case KVM_REG_SIZE_U32
:
607 val
.u32
= env
->spr
[spr
];
610 case KVM_REG_SIZE_U64
:
611 val
.u64
= env
->spr
[spr
];
615 /* Don't handle this size yet */
619 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
621 trace_kvm_failed_spr_set(spr
, strerror(errno
));
625 static int kvm_put_fp(CPUState
*cs
)
627 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
628 CPUPPCState
*env
= &cpu
->env
;
629 struct kvm_one_reg reg
;
633 if (env
->insns_flags
& PPC_FLOAT
) {
634 uint64_t fpscr
= env
->fpscr
;
635 bool vsx
= !!(env
->insns_flags2
& PPC2_VSX
);
637 reg
.id
= KVM_REG_PPC_FPSCR
;
638 reg
.addr
= (uintptr_t)&fpscr
;
639 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
641 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno
));
645 for (i
= 0; i
< 32; i
++) {
648 #ifdef HOST_WORDS_BIGENDIAN
649 vsr
[0] = float64_val(env
->fpr
[i
]);
650 vsr
[1] = env
->vsr
[i
];
652 vsr
[0] = env
->vsr
[i
];
653 vsr
[1] = float64_val(env
->fpr
[i
]);
655 reg
.addr
= (uintptr_t) &vsr
;
656 reg
.id
= vsx
? KVM_REG_PPC_VSR(i
) : KVM_REG_PPC_FPR(i
);
658 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
660 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx
? "VSR" : "FPR",
667 if (env
->insns_flags
& PPC_ALTIVEC
) {
668 reg
.id
= KVM_REG_PPC_VSCR
;
669 reg
.addr
= (uintptr_t)&env
->vscr
;
670 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
672 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno
));
676 for (i
= 0; i
< 32; i
++) {
677 reg
.id
= KVM_REG_PPC_VR(i
);
678 reg
.addr
= (uintptr_t)&env
->avr
[i
];
679 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
681 DPRINTF("Unable to set VR%d to KVM: %s\n", i
, strerror(errno
));
690 static int kvm_get_fp(CPUState
*cs
)
692 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
693 CPUPPCState
*env
= &cpu
->env
;
694 struct kvm_one_reg reg
;
698 if (env
->insns_flags
& PPC_FLOAT
) {
700 bool vsx
= !!(env
->insns_flags2
& PPC2_VSX
);
702 reg
.id
= KVM_REG_PPC_FPSCR
;
703 reg
.addr
= (uintptr_t)&fpscr
;
704 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
706 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno
));
712 for (i
= 0; i
< 32; i
++) {
715 reg
.addr
= (uintptr_t) &vsr
;
716 reg
.id
= vsx
? KVM_REG_PPC_VSR(i
) : KVM_REG_PPC_FPR(i
);
718 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
720 DPRINTF("Unable to get %s%d from KVM: %s\n",
721 vsx
? "VSR" : "FPR", i
, strerror(errno
));
724 #ifdef HOST_WORDS_BIGENDIAN
725 env
->fpr
[i
] = vsr
[0];
727 env
->vsr
[i
] = vsr
[1];
730 env
->fpr
[i
] = vsr
[1];
732 env
->vsr
[i
] = vsr
[0];
739 if (env
->insns_flags
& PPC_ALTIVEC
) {
740 reg
.id
= KVM_REG_PPC_VSCR
;
741 reg
.addr
= (uintptr_t)&env
->vscr
;
742 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
744 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno
));
748 for (i
= 0; i
< 32; i
++) {
749 reg
.id
= KVM_REG_PPC_VR(i
);
750 reg
.addr
= (uintptr_t)&env
->avr
[i
];
751 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
753 DPRINTF("Unable to get VR%d from KVM: %s\n",
763 #if defined(TARGET_PPC64)
764 static int kvm_get_vpa(CPUState
*cs
)
766 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
767 CPUPPCState
*env
= &cpu
->env
;
768 struct kvm_one_reg reg
;
771 reg
.id
= KVM_REG_PPC_VPA_ADDR
;
772 reg
.addr
= (uintptr_t)&env
->vpa_addr
;
773 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
775 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno
));
779 assert((uintptr_t)&env
->slb_shadow_size
780 == ((uintptr_t)&env
->slb_shadow_addr
+ 8));
781 reg
.id
= KVM_REG_PPC_VPA_SLB
;
782 reg
.addr
= (uintptr_t)&env
->slb_shadow_addr
;
783 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
785 DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
790 assert((uintptr_t)&env
->dtl_size
== ((uintptr_t)&env
->dtl_addr
+ 8));
791 reg
.id
= KVM_REG_PPC_VPA_DTL
;
792 reg
.addr
= (uintptr_t)&env
->dtl_addr
;
793 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_ONE_REG
, ®
);
795 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
803 static int kvm_put_vpa(CPUState
*cs
)
805 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
806 CPUPPCState
*env
= &cpu
->env
;
807 struct kvm_one_reg reg
;
810 /* SLB shadow or DTL can't be registered unless a master VPA is
811 * registered. That means when restoring state, if a VPA *is*
812 * registered, we need to set that up first. If not, we need to
813 * deregister the others before deregistering the master VPA */
814 assert(env
->vpa_addr
|| !(env
->slb_shadow_addr
|| env
->dtl_addr
));
817 reg
.id
= KVM_REG_PPC_VPA_ADDR
;
818 reg
.addr
= (uintptr_t)&env
->vpa_addr
;
819 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
821 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno
));
826 assert((uintptr_t)&env
->slb_shadow_size
827 == ((uintptr_t)&env
->slb_shadow_addr
+ 8));
828 reg
.id
= KVM_REG_PPC_VPA_SLB
;
829 reg
.addr
= (uintptr_t)&env
->slb_shadow_addr
;
830 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
832 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno
));
836 assert((uintptr_t)&env
->dtl_size
== ((uintptr_t)&env
->dtl_addr
+ 8));
837 reg
.id
= KVM_REG_PPC_VPA_DTL
;
838 reg
.addr
= (uintptr_t)&env
->dtl_addr
;
839 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
841 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
846 if (!env
->vpa_addr
) {
847 reg
.id
= KVM_REG_PPC_VPA_ADDR
;
848 reg
.addr
= (uintptr_t)&env
->vpa_addr
;
849 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
851 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno
));
858 #endif /* TARGET_PPC64 */
860 int kvmppc_put_books_sregs(PowerPCCPU
*cpu
)
862 CPUPPCState
*env
= &cpu
->env
;
863 struct kvm_sregs sregs
;
866 sregs
.pvr
= env
->spr
[SPR_PVR
];
868 sregs
.u
.s
.sdr1
= env
->spr
[SPR_SDR1
];
872 for (i
= 0; i
< ARRAY_SIZE(env
->slb
); i
++) {
873 sregs
.u
.s
.ppc64
.slb
[i
].slbe
= env
->slb
[i
].esid
;
874 if (env
->slb
[i
].esid
& SLB_ESID_V
) {
875 sregs
.u
.s
.ppc64
.slb
[i
].slbe
|= i
;
877 sregs
.u
.s
.ppc64
.slb
[i
].slbv
= env
->slb
[i
].vsid
;
882 for (i
= 0; i
< 16; i
++) {
883 sregs
.u
.s
.ppc32
.sr
[i
] = env
->sr
[i
];
887 for (i
= 0; i
< 8; i
++) {
888 /* Beware. We have to swap upper and lower bits here */
889 sregs
.u
.s
.ppc32
.dbat
[i
] = ((uint64_t)env
->DBAT
[0][i
] << 32)
891 sregs
.u
.s
.ppc32
.ibat
[i
] = ((uint64_t)env
->IBAT
[0][i
] << 32)
895 return kvm_vcpu_ioctl(CPU(cpu
), KVM_SET_SREGS
, &sregs
);
898 int kvm_arch_put_registers(CPUState
*cs
, int level
)
900 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
901 CPUPPCState
*env
= &cpu
->env
;
902 struct kvm_regs regs
;
906 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_REGS
, ®s
);
913 regs
.xer
= cpu_read_xer(env
);
917 regs
.srr0
= env
->spr
[SPR_SRR0
];
918 regs
.srr1
= env
->spr
[SPR_SRR1
];
920 regs
.sprg0
= env
->spr
[SPR_SPRG0
];
921 regs
.sprg1
= env
->spr
[SPR_SPRG1
];
922 regs
.sprg2
= env
->spr
[SPR_SPRG2
];
923 regs
.sprg3
= env
->spr
[SPR_SPRG3
];
924 regs
.sprg4
= env
->spr
[SPR_SPRG4
];
925 regs
.sprg5
= env
->spr
[SPR_SPRG5
];
926 regs
.sprg6
= env
->spr
[SPR_SPRG6
];
927 regs
.sprg7
= env
->spr
[SPR_SPRG7
];
929 regs
.pid
= env
->spr
[SPR_BOOKE_PID
];
931 for (i
= 0;i
< 32; i
++)
932 regs
.gpr
[i
] = env
->gpr
[i
];
935 for (i
= 0; i
< 8; i
++) {
936 regs
.cr
|= (env
->crf
[i
] & 15) << (4 * (7 - i
));
939 ret
= kvm_vcpu_ioctl(cs
, KVM_SET_REGS
, ®s
);
945 if (env
->tlb_dirty
) {
947 env
->tlb_dirty
= false;
950 if (cap_segstate
&& (level
>= KVM_PUT_RESET_STATE
)) {
951 ret
= kvmppc_put_books_sregs(cpu
);
957 if (cap_hior
&& (level
>= KVM_PUT_RESET_STATE
)) {
958 kvm_put_one_spr(cs
, KVM_REG_PPC_HIOR
, SPR_HIOR
);
964 /* We deliberately ignore errors here, for kernels which have
965 * the ONE_REG calls, but don't support the specific
966 * registers, there's a reasonable chance things will still
967 * work, at least until we try to migrate. */
968 for (i
= 0; i
< 1024; i
++) {
969 uint64_t id
= env
->spr_cb
[i
].one_reg_id
;
972 kvm_put_one_spr(cs
, id
, i
);
978 for (i
= 0; i
< ARRAY_SIZE(env
->tm_gpr
); i
++) {
979 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_GPR(i
), &env
->tm_gpr
[i
]);
981 for (i
= 0; i
< ARRAY_SIZE(env
->tm_vsr
); i
++) {
982 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_VSR(i
), &env
->tm_vsr
[i
]);
984 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_CR
, &env
->tm_cr
);
985 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_LR
, &env
->tm_lr
);
986 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_CTR
, &env
->tm_ctr
);
987 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_FPSCR
, &env
->tm_fpscr
);
988 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_AMR
, &env
->tm_amr
);
989 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_PPR
, &env
->tm_ppr
);
990 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_VRSAVE
, &env
->tm_vrsave
);
991 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_VSCR
, &env
->tm_vscr
);
992 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_DSCR
, &env
->tm_dscr
);
993 kvm_set_one_reg(cs
, KVM_REG_PPC_TM_TAR
, &env
->tm_tar
);
997 if (kvm_put_vpa(cs
) < 0) {
998 DPRINTF("Warning: Unable to set VPA information to KVM\n");
1002 kvm_set_one_reg(cs
, KVM_REG_PPC_TB_OFFSET
, &env
->tb_env
->tb_offset
);
1003 #endif /* TARGET_PPC64 */
1009 static void kvm_sync_excp(CPUPPCState
*env
, int vector
, int ivor
)
1011 env
->excp_vectors
[vector
] = env
->spr
[ivor
] + env
->spr
[SPR_BOOKE_IVPR
];
1014 static int kvmppc_get_booke_sregs(PowerPCCPU
*cpu
)
1016 CPUPPCState
*env
= &cpu
->env
;
1017 struct kvm_sregs sregs
;
1020 ret
= kvm_vcpu_ioctl(CPU(cpu
), KVM_GET_SREGS
, &sregs
);
1025 if (sregs
.u
.e
.features
& KVM_SREGS_E_BASE
) {
1026 env
->spr
[SPR_BOOKE_CSRR0
] = sregs
.u
.e
.csrr0
;
1027 env
->spr
[SPR_BOOKE_CSRR1
] = sregs
.u
.e
.csrr1
;
1028 env
->spr
[SPR_BOOKE_ESR
] = sregs
.u
.e
.esr
;
1029 env
->spr
[SPR_BOOKE_DEAR
] = sregs
.u
.e
.dear
;
1030 env
->spr
[SPR_BOOKE_MCSR
] = sregs
.u
.e
.mcsr
;
1031 env
->spr
[SPR_BOOKE_TSR
] = sregs
.u
.e
.tsr
;
1032 env
->spr
[SPR_BOOKE_TCR
] = sregs
.u
.e
.tcr
;
1033 env
->spr
[SPR_DECR
] = sregs
.u
.e
.dec
;
1034 env
->spr
[SPR_TBL
] = sregs
.u
.e
.tb
& 0xffffffff;
1035 env
->spr
[SPR_TBU
] = sregs
.u
.e
.tb
>> 32;
1036 env
->spr
[SPR_VRSAVE
] = sregs
.u
.e
.vrsave
;
1039 if (sregs
.u
.e
.features
& KVM_SREGS_E_ARCH206
) {
1040 env
->spr
[SPR_BOOKE_PIR
] = sregs
.u
.e
.pir
;
1041 env
->spr
[SPR_BOOKE_MCSRR0
] = sregs
.u
.e
.mcsrr0
;
1042 env
->spr
[SPR_BOOKE_MCSRR1
] = sregs
.u
.e
.mcsrr1
;
1043 env
->spr
[SPR_BOOKE_DECAR
] = sregs
.u
.e
.decar
;
1044 env
->spr
[SPR_BOOKE_IVPR
] = sregs
.u
.e
.ivpr
;
1047 if (sregs
.u
.e
.features
& KVM_SREGS_E_64
) {
1048 env
->spr
[SPR_BOOKE_EPCR
] = sregs
.u
.e
.epcr
;
1051 if (sregs
.u
.e
.features
& KVM_SREGS_E_SPRG8
) {
1052 env
->spr
[SPR_BOOKE_SPRG8
] = sregs
.u
.e
.sprg8
;
1055 if (sregs
.u
.e
.features
& KVM_SREGS_E_IVOR
) {
1056 env
->spr
[SPR_BOOKE_IVOR0
] = sregs
.u
.e
.ivor_low
[0];
1057 kvm_sync_excp(env
, POWERPC_EXCP_CRITICAL
, SPR_BOOKE_IVOR0
);
1058 env
->spr
[SPR_BOOKE_IVOR1
] = sregs
.u
.e
.ivor_low
[1];
1059 kvm_sync_excp(env
, POWERPC_EXCP_MCHECK
, SPR_BOOKE_IVOR1
);
1060 env
->spr
[SPR_BOOKE_IVOR2
] = sregs
.u
.e
.ivor_low
[2];
1061 kvm_sync_excp(env
, POWERPC_EXCP_DSI
, SPR_BOOKE_IVOR2
);
1062 env
->spr
[SPR_BOOKE_IVOR3
] = sregs
.u
.e
.ivor_low
[3];
1063 kvm_sync_excp(env
, POWERPC_EXCP_ISI
, SPR_BOOKE_IVOR3
);
1064 env
->spr
[SPR_BOOKE_IVOR4
] = sregs
.u
.e
.ivor_low
[4];
1065 kvm_sync_excp(env
, POWERPC_EXCP_EXTERNAL
, SPR_BOOKE_IVOR4
);
1066 env
->spr
[SPR_BOOKE_IVOR5
] = sregs
.u
.e
.ivor_low
[5];
1067 kvm_sync_excp(env
, POWERPC_EXCP_ALIGN
, SPR_BOOKE_IVOR5
);
1068 env
->spr
[SPR_BOOKE_IVOR6
] = sregs
.u
.e
.ivor_low
[6];
1069 kvm_sync_excp(env
, POWERPC_EXCP_PROGRAM
, SPR_BOOKE_IVOR6
);
1070 env
->spr
[SPR_BOOKE_IVOR7
] = sregs
.u
.e
.ivor_low
[7];
1071 kvm_sync_excp(env
, POWERPC_EXCP_FPU
, SPR_BOOKE_IVOR7
);
1072 env
->spr
[SPR_BOOKE_IVOR8
] = sregs
.u
.e
.ivor_low
[8];
1073 kvm_sync_excp(env
, POWERPC_EXCP_SYSCALL
, SPR_BOOKE_IVOR8
);
1074 env
->spr
[SPR_BOOKE_IVOR9
] = sregs
.u
.e
.ivor_low
[9];
1075 kvm_sync_excp(env
, POWERPC_EXCP_APU
, SPR_BOOKE_IVOR9
);
1076 env
->spr
[SPR_BOOKE_IVOR10
] = sregs
.u
.e
.ivor_low
[10];
1077 kvm_sync_excp(env
, POWERPC_EXCP_DECR
, SPR_BOOKE_IVOR10
);
1078 env
->spr
[SPR_BOOKE_IVOR11
] = sregs
.u
.e
.ivor_low
[11];
1079 kvm_sync_excp(env
, POWERPC_EXCP_FIT
, SPR_BOOKE_IVOR11
);
1080 env
->spr
[SPR_BOOKE_IVOR12
] = sregs
.u
.e
.ivor_low
[12];
1081 kvm_sync_excp(env
, POWERPC_EXCP_WDT
, SPR_BOOKE_IVOR12
);
1082 env
->spr
[SPR_BOOKE_IVOR13
] = sregs
.u
.e
.ivor_low
[13];
1083 kvm_sync_excp(env
, POWERPC_EXCP_DTLB
, SPR_BOOKE_IVOR13
);
1084 env
->spr
[SPR_BOOKE_IVOR14
] = sregs
.u
.e
.ivor_low
[14];
1085 kvm_sync_excp(env
, POWERPC_EXCP_ITLB
, SPR_BOOKE_IVOR14
);
1086 env
->spr
[SPR_BOOKE_IVOR15
] = sregs
.u
.e
.ivor_low
[15];
1087 kvm_sync_excp(env
, POWERPC_EXCP_DEBUG
, SPR_BOOKE_IVOR15
);
1089 if (sregs
.u
.e
.features
& KVM_SREGS_E_SPE
) {
1090 env
->spr
[SPR_BOOKE_IVOR32
] = sregs
.u
.e
.ivor_high
[0];
1091 kvm_sync_excp(env
, POWERPC_EXCP_SPEU
, SPR_BOOKE_IVOR32
);
1092 env
->spr
[SPR_BOOKE_IVOR33
] = sregs
.u
.e
.ivor_high
[1];
1093 kvm_sync_excp(env
, POWERPC_EXCP_EFPDI
, SPR_BOOKE_IVOR33
);
1094 env
->spr
[SPR_BOOKE_IVOR34
] = sregs
.u
.e
.ivor_high
[2];
1095 kvm_sync_excp(env
, POWERPC_EXCP_EFPRI
, SPR_BOOKE_IVOR34
);
1098 if (sregs
.u
.e
.features
& KVM_SREGS_E_PM
) {
1099 env
->spr
[SPR_BOOKE_IVOR35
] = sregs
.u
.e
.ivor_high
[3];
1100 kvm_sync_excp(env
, POWERPC_EXCP_EPERFM
, SPR_BOOKE_IVOR35
);
1103 if (sregs
.u
.e
.features
& KVM_SREGS_E_PC
) {
1104 env
->spr
[SPR_BOOKE_IVOR36
] = sregs
.u
.e
.ivor_high
[4];
1105 kvm_sync_excp(env
, POWERPC_EXCP_DOORI
, SPR_BOOKE_IVOR36
);
1106 env
->spr
[SPR_BOOKE_IVOR37
] = sregs
.u
.e
.ivor_high
[5];
1107 kvm_sync_excp(env
, POWERPC_EXCP_DOORCI
, SPR_BOOKE_IVOR37
);
1111 if (sregs
.u
.e
.features
& KVM_SREGS_E_ARCH206_MMU
) {
1112 env
->spr
[SPR_BOOKE_MAS0
] = sregs
.u
.e
.mas0
;
1113 env
->spr
[SPR_BOOKE_MAS1
] = sregs
.u
.e
.mas1
;
1114 env
->spr
[SPR_BOOKE_MAS2
] = sregs
.u
.e
.mas2
;
1115 env
->spr
[SPR_BOOKE_MAS3
] = sregs
.u
.e
.mas7_3
& 0xffffffff;
1116 env
->spr
[SPR_BOOKE_MAS4
] = sregs
.u
.e
.mas4
;
1117 env
->spr
[SPR_BOOKE_MAS6
] = sregs
.u
.e
.mas6
;
1118 env
->spr
[SPR_BOOKE_MAS7
] = sregs
.u
.e
.mas7_3
>> 32;
1119 env
->spr
[SPR_MMUCFG
] = sregs
.u
.e
.mmucfg
;
1120 env
->spr
[SPR_BOOKE_TLB0CFG
] = sregs
.u
.e
.tlbcfg
[0];
1121 env
->spr
[SPR_BOOKE_TLB1CFG
] = sregs
.u
.e
.tlbcfg
[1];
1124 if (sregs
.u
.e
.features
& KVM_SREGS_EXP
) {
1125 env
->spr
[SPR_BOOKE_EPR
] = sregs
.u
.e
.epr
;
1128 if (sregs
.u
.e
.features
& KVM_SREGS_E_PD
) {
1129 env
->spr
[SPR_BOOKE_EPLC
] = sregs
.u
.e
.eplc
;
1130 env
->spr
[SPR_BOOKE_EPSC
] = sregs
.u
.e
.epsc
;
1133 if (sregs
.u
.e
.impl_id
== KVM_SREGS_E_IMPL_FSL
) {
1134 env
->spr
[SPR_E500_SVR
] = sregs
.u
.e
.impl
.fsl
.svr
;
1135 env
->spr
[SPR_Exxx_MCAR
] = sregs
.u
.e
.impl
.fsl
.mcar
;
1136 env
->spr
[SPR_HID0
] = sregs
.u
.e
.impl
.fsl
.hid0
;
1138 if (sregs
.u
.e
.impl
.fsl
.features
& KVM_SREGS_E_FSL_PIDn
) {
1139 env
->spr
[SPR_BOOKE_PID1
] = sregs
.u
.e
.impl
.fsl
.pid1
;
1140 env
->spr
[SPR_BOOKE_PID2
] = sregs
.u
.e
.impl
.fsl
.pid2
;
1147 static int kvmppc_get_books_sregs(PowerPCCPU
*cpu
)
1149 CPUPPCState
*env
= &cpu
->env
;
1150 struct kvm_sregs sregs
;
1154 ret
= kvm_vcpu_ioctl(CPU(cpu
), KVM_GET_SREGS
, &sregs
);
1160 ppc_store_sdr1(env
, sregs
.u
.s
.sdr1
);
1166 * The packed SLB array we get from KVM_GET_SREGS only contains
1167 * information about valid entries. So we flush our internal copy
1168 * to get rid of stale ones, then put all valid SLB entries back
1171 memset(env
->slb
, 0, sizeof(env
->slb
));
1172 for (i
= 0; i
< ARRAY_SIZE(env
->slb
); i
++) {
1173 target_ulong rb
= sregs
.u
.s
.ppc64
.slb
[i
].slbe
;
1174 target_ulong rs
= sregs
.u
.s
.ppc64
.slb
[i
].slbv
;
1176 * Only restore valid entries
1178 if (rb
& SLB_ESID_V
) {
1179 ppc_store_slb(cpu
, rb
& 0xfff, rb
& ~0xfffULL
, rs
);
1185 for (i
= 0; i
< 16; i
++) {
1186 env
->sr
[i
] = sregs
.u
.s
.ppc32
.sr
[i
];
1190 for (i
= 0; i
< 8; i
++) {
1191 env
->DBAT
[0][i
] = sregs
.u
.s
.ppc32
.dbat
[i
] & 0xffffffff;
1192 env
->DBAT
[1][i
] = sregs
.u
.s
.ppc32
.dbat
[i
] >> 32;
1193 env
->IBAT
[0][i
] = sregs
.u
.s
.ppc32
.ibat
[i
] & 0xffffffff;
1194 env
->IBAT
[1][i
] = sregs
.u
.s
.ppc32
.ibat
[i
] >> 32;
1200 int kvm_arch_get_registers(CPUState
*cs
)
1202 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
1203 CPUPPCState
*env
= &cpu
->env
;
1204 struct kvm_regs regs
;
1208 ret
= kvm_vcpu_ioctl(cs
, KVM_GET_REGS
, ®s
);
1213 for (i
= 7; i
>= 0; i
--) {
1214 env
->crf
[i
] = cr
& 15;
1218 env
->ctr
= regs
.ctr
;
1220 cpu_write_xer(env
, regs
.xer
);
1221 env
->msr
= regs
.msr
;
1224 env
->spr
[SPR_SRR0
] = regs
.srr0
;
1225 env
->spr
[SPR_SRR1
] = regs
.srr1
;
1227 env
->spr
[SPR_SPRG0
] = regs
.sprg0
;
1228 env
->spr
[SPR_SPRG1
] = regs
.sprg1
;
1229 env
->spr
[SPR_SPRG2
] = regs
.sprg2
;
1230 env
->spr
[SPR_SPRG3
] = regs
.sprg3
;
1231 env
->spr
[SPR_SPRG4
] = regs
.sprg4
;
1232 env
->spr
[SPR_SPRG5
] = regs
.sprg5
;
1233 env
->spr
[SPR_SPRG6
] = regs
.sprg6
;
1234 env
->spr
[SPR_SPRG7
] = regs
.sprg7
;
1236 env
->spr
[SPR_BOOKE_PID
] = regs
.pid
;
1238 for (i
= 0;i
< 32; i
++)
1239 env
->gpr
[i
] = regs
.gpr
[i
];
1243 if (cap_booke_sregs
) {
1244 ret
= kvmppc_get_booke_sregs(cpu
);
1251 ret
= kvmppc_get_books_sregs(cpu
);
1258 kvm_get_one_spr(cs
, KVM_REG_PPC_HIOR
, SPR_HIOR
);
1264 /* We deliberately ignore errors here, for kernels which have
1265 * the ONE_REG calls, but don't support the specific
1266 * registers, there's a reasonable chance things will still
1267 * work, at least until we try to migrate. */
1268 for (i
= 0; i
< 1024; i
++) {
1269 uint64_t id
= env
->spr_cb
[i
].one_reg_id
;
1272 kvm_get_one_spr(cs
, id
, i
);
1278 for (i
= 0; i
< ARRAY_SIZE(env
->tm_gpr
); i
++) {
1279 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_GPR(i
), &env
->tm_gpr
[i
]);
1281 for (i
= 0; i
< ARRAY_SIZE(env
->tm_vsr
); i
++) {
1282 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_VSR(i
), &env
->tm_vsr
[i
]);
1284 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_CR
, &env
->tm_cr
);
1285 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_LR
, &env
->tm_lr
);
1286 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_CTR
, &env
->tm_ctr
);
1287 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_FPSCR
, &env
->tm_fpscr
);
1288 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_AMR
, &env
->tm_amr
);
1289 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_PPR
, &env
->tm_ppr
);
1290 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_VRSAVE
, &env
->tm_vrsave
);
1291 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_VSCR
, &env
->tm_vscr
);
1292 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_DSCR
, &env
->tm_dscr
);
1293 kvm_get_one_reg(cs
, KVM_REG_PPC_TM_TAR
, &env
->tm_tar
);
1297 if (kvm_get_vpa(cs
) < 0) {
1298 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1302 kvm_get_one_reg(cs
, KVM_REG_PPC_TB_OFFSET
, &env
->tb_env
->tb_offset
);
1309 int kvmppc_set_interrupt(PowerPCCPU
*cpu
, int irq
, int level
)
1311 unsigned virq
= level
? KVM_INTERRUPT_SET_LEVEL
: KVM_INTERRUPT_UNSET
;
1313 if (irq
!= PPC_INTERRUPT_EXT
) {
1317 if (!kvm_enabled() || !cap_interrupt_unset
|| !cap_interrupt_level
) {
1321 kvm_vcpu_ioctl(CPU(cpu
), KVM_INTERRUPT
, &virq
);
1326 #if defined(TARGET_PPCEMB)
1327 #define PPC_INPUT_INT PPC40x_INPUT_INT
1328 #elif defined(TARGET_PPC64)
1329 #define PPC_INPUT_INT PPC970_INPUT_INT
1331 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1334 void kvm_arch_pre_run(CPUState
*cs
, struct kvm_run
*run
)
1336 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
1337 CPUPPCState
*env
= &cpu
->env
;
1341 qemu_mutex_lock_iothread();
1343 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1344 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1345 if (!cap_interrupt_level
&&
1346 run
->ready_for_interrupt_injection
&&
1347 (cs
->interrupt_request
& CPU_INTERRUPT_HARD
) &&
1348 (env
->irq_input_state
& (1<<PPC_INPUT_INT
)))
1350 /* For now KVM disregards the 'irq' argument. However, in the
1351 * future KVM could cache it in-kernel to avoid a heavyweight exit
1352 * when reading the UIC.
1354 irq
= KVM_INTERRUPT_SET
;
1356 DPRINTF("injected interrupt %d\n", irq
);
1357 r
= kvm_vcpu_ioctl(cs
, KVM_INTERRUPT
, &irq
);
1359 printf("cpu %d fail inject %x\n", cs
->cpu_index
, irq
);
1362 /* Always wake up soon in case the interrupt was level based */
1363 timer_mod(idle_timer
, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL
) +
1364 (NANOSECONDS_PER_SECOND
/ 50));
1367 /* We don't know if there are more interrupts pending after this. However,
1368 * the guest will return to userspace in the course of handling this one
1369 * anyways, so we will get a chance to deliver the rest. */
1371 qemu_mutex_unlock_iothread();
1374 MemTxAttrs
kvm_arch_post_run(CPUState
*cs
, struct kvm_run
*run
)
1376 return MEMTXATTRS_UNSPECIFIED
;
1379 int kvm_arch_process_async_events(CPUState
*cs
)
1384 static int kvmppc_handle_halt(PowerPCCPU
*cpu
)
1386 CPUState
*cs
= CPU(cpu
);
1387 CPUPPCState
*env
= &cpu
->env
;
1389 if (!(cs
->interrupt_request
& CPU_INTERRUPT_HARD
) && (msr_ee
)) {
1391 cs
->exception_index
= EXCP_HLT
;
1397 /* map dcr access to existing qemu dcr emulation */
1398 static int kvmppc_handle_dcr_read(CPUPPCState
*env
, uint32_t dcrn
, uint32_t *data
)
1400 if (ppc_dcr_read(env
->dcr_env
, dcrn
, data
) < 0)
1401 fprintf(stderr
, "Read to unhandled DCR (0x%x)\n", dcrn
);
1406 static int kvmppc_handle_dcr_write(CPUPPCState
*env
, uint32_t dcrn
, uint32_t data
)
1408 if (ppc_dcr_write(env
->dcr_env
, dcrn
, data
) < 0)
1409 fprintf(stderr
, "Write to unhandled DCR (0x%x)\n", dcrn
);
1414 int kvm_arch_insert_sw_breakpoint(CPUState
*cs
, struct kvm_sw_breakpoint
*bp
)
1416 /* Mixed endian case is not handled */
1417 uint32_t sc
= debug_inst_opcode
;
1419 if (cpu_memory_rw_debug(cs
, bp
->pc
, (uint8_t *)&bp
->saved_insn
,
1421 cpu_memory_rw_debug(cs
, bp
->pc
, (uint8_t *)&sc
, sizeof(sc
), 1)) {
1428 int kvm_arch_remove_sw_breakpoint(CPUState
*cs
, struct kvm_sw_breakpoint
*bp
)
1432 if (cpu_memory_rw_debug(cs
, bp
->pc
, (uint8_t *)&sc
, sizeof(sc
), 0) ||
1433 sc
!= debug_inst_opcode
||
1434 cpu_memory_rw_debug(cs
, bp
->pc
, (uint8_t *)&bp
->saved_insn
,
1442 static int find_hw_breakpoint(target_ulong addr
, int type
)
1446 assert((nb_hw_breakpoint
+ nb_hw_watchpoint
)
1447 <= ARRAY_SIZE(hw_debug_points
));
1449 for (n
= 0; n
< nb_hw_breakpoint
+ nb_hw_watchpoint
; n
++) {
1450 if (hw_debug_points
[n
].addr
== addr
&&
1451 hw_debug_points
[n
].type
== type
) {
1459 static int find_hw_watchpoint(target_ulong addr
, int *flag
)
1463 n
= find_hw_breakpoint(addr
, GDB_WATCHPOINT_ACCESS
);
1465 *flag
= BP_MEM_ACCESS
;
1469 n
= find_hw_breakpoint(addr
, GDB_WATCHPOINT_WRITE
);
1471 *flag
= BP_MEM_WRITE
;
1475 n
= find_hw_breakpoint(addr
, GDB_WATCHPOINT_READ
);
1477 *flag
= BP_MEM_READ
;
1484 int kvm_arch_insert_hw_breakpoint(target_ulong addr
,
1485 target_ulong len
, int type
)
1487 if ((nb_hw_breakpoint
+ nb_hw_watchpoint
) >= ARRAY_SIZE(hw_debug_points
)) {
1491 hw_debug_points
[nb_hw_breakpoint
+ nb_hw_watchpoint
].addr
= addr
;
1492 hw_debug_points
[nb_hw_breakpoint
+ nb_hw_watchpoint
].type
= type
;
1495 case GDB_BREAKPOINT_HW
:
1496 if (nb_hw_breakpoint
>= max_hw_breakpoint
) {
1500 if (find_hw_breakpoint(addr
, type
) >= 0) {
1507 case GDB_WATCHPOINT_WRITE
:
1508 case GDB_WATCHPOINT_READ
:
1509 case GDB_WATCHPOINT_ACCESS
:
1510 if (nb_hw_watchpoint
>= max_hw_watchpoint
) {
1514 if (find_hw_breakpoint(addr
, type
) >= 0) {
1528 int kvm_arch_remove_hw_breakpoint(target_ulong addr
,
1529 target_ulong len
, int type
)
1533 n
= find_hw_breakpoint(addr
, type
);
1539 case GDB_BREAKPOINT_HW
:
1543 case GDB_WATCHPOINT_WRITE
:
1544 case GDB_WATCHPOINT_READ
:
1545 case GDB_WATCHPOINT_ACCESS
:
1552 hw_debug_points
[n
] = hw_debug_points
[nb_hw_breakpoint
+ nb_hw_watchpoint
];
1557 void kvm_arch_remove_all_hw_breakpoints(void)
1559 nb_hw_breakpoint
= nb_hw_watchpoint
= 0;
1562 void kvm_arch_update_guest_debug(CPUState
*cs
, struct kvm_guest_debug
*dbg
)
1566 /* Software Breakpoint updates */
1567 if (kvm_sw_breakpoints_active(cs
)) {
1568 dbg
->control
|= KVM_GUESTDBG_ENABLE
| KVM_GUESTDBG_USE_SW_BP
;
1571 assert((nb_hw_breakpoint
+ nb_hw_watchpoint
)
1572 <= ARRAY_SIZE(hw_debug_points
));
1573 assert((nb_hw_breakpoint
+ nb_hw_watchpoint
) <= ARRAY_SIZE(dbg
->arch
.bp
));
1575 if (nb_hw_breakpoint
+ nb_hw_watchpoint
> 0) {
1576 dbg
->control
|= KVM_GUESTDBG_ENABLE
| KVM_GUESTDBG_USE_HW_BP
;
1577 memset(dbg
->arch
.bp
, 0, sizeof(dbg
->arch
.bp
));
1578 for (n
= 0; n
< nb_hw_breakpoint
+ nb_hw_watchpoint
; n
++) {
1579 switch (hw_debug_points
[n
].type
) {
1580 case GDB_BREAKPOINT_HW
:
1581 dbg
->arch
.bp
[n
].type
= KVMPPC_DEBUG_BREAKPOINT
;
1583 case GDB_WATCHPOINT_WRITE
:
1584 dbg
->arch
.bp
[n
].type
= KVMPPC_DEBUG_WATCH_WRITE
;
1586 case GDB_WATCHPOINT_READ
:
1587 dbg
->arch
.bp
[n
].type
= KVMPPC_DEBUG_WATCH_READ
;
1589 case GDB_WATCHPOINT_ACCESS
:
1590 dbg
->arch
.bp
[n
].type
= KVMPPC_DEBUG_WATCH_WRITE
|
1591 KVMPPC_DEBUG_WATCH_READ
;
1594 cpu_abort(cs
, "Unsupported breakpoint type\n");
1596 dbg
->arch
.bp
[n
].addr
= hw_debug_points
[n
].addr
;
1601 static int kvm_handle_debug(PowerPCCPU
*cpu
, struct kvm_run
*run
)
1603 CPUState
*cs
= CPU(cpu
);
1604 CPUPPCState
*env
= &cpu
->env
;
1605 struct kvm_debug_exit_arch
*arch_info
= &run
->debug
.arch
;
1610 if (cs
->singlestep_enabled
) {
1612 } else if (arch_info
->status
) {
1613 if (nb_hw_breakpoint
+ nb_hw_watchpoint
> 0) {
1614 if (arch_info
->status
& KVMPPC_DEBUG_BREAKPOINT
) {
1615 n
= find_hw_breakpoint(arch_info
->address
, GDB_BREAKPOINT_HW
);
1619 } else if (arch_info
->status
& (KVMPPC_DEBUG_WATCH_READ
|
1620 KVMPPC_DEBUG_WATCH_WRITE
)) {
1621 n
= find_hw_watchpoint(arch_info
->address
, &flag
);
1624 cs
->watchpoint_hit
= &hw_watchpoint
;
1625 hw_watchpoint
.vaddr
= hw_debug_points
[n
].addr
;
1626 hw_watchpoint
.flags
= flag
;
1630 } else if (kvm_find_sw_breakpoint(cs
, arch_info
->address
)) {
1633 /* QEMU is not able to handle debug exception, so inject
1634 * program exception to guest;
1635 * Yes program exception NOT debug exception !!
1636 * When QEMU is using debug resources then debug exception must
1637 * be always set. To achieve this we set MSR_DE and also set
1638 * MSRP_DEP so guest cannot change MSR_DE.
1639 * When emulating debug resource for guest we want guest
1640 * to control MSR_DE (enable/disable debug interrupt on need).
1641 * Supporting both configurations are NOT possible.
1642 * So the result is that we cannot share debug resources
1643 * between QEMU and Guest on BOOKE architecture.
1644 * In the current design QEMU gets the priority over guest,
1645 * this means that if QEMU is using debug resources then guest
1647 * For software breakpoint QEMU uses a privileged instruction;
1648 * So there cannot be any reason that we are here for guest
1649 * set debug exception, only possibility is guest executed a
1650 * privileged / illegal instruction and that's why we are
1651 * injecting a program interrupt.
1654 cpu_synchronize_state(cs
);
1655 /* env->nip is PC, so increment this by 4 to use
1656 * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1659 cs
->exception_index
= POWERPC_EXCP_PROGRAM
;
1660 env
->error_code
= POWERPC_EXCP_INVAL
;
1661 ppc_cpu_do_interrupt(cs
);
1667 int kvm_arch_handle_exit(CPUState
*cs
, struct kvm_run
*run
)
1669 PowerPCCPU
*cpu
= POWERPC_CPU(cs
);
1670 CPUPPCState
*env
= &cpu
->env
;
1673 qemu_mutex_lock_iothread();
1675 switch (run
->exit_reason
) {
1677 if (run
->dcr
.is_write
) {
1678 DPRINTF("handle dcr write\n");
1679 ret
= kvmppc_handle_dcr_write(env
, run
->dcr
.dcrn
, run
->dcr
.data
);
1681 DPRINTF("handle dcr read\n");
1682 ret
= kvmppc_handle_dcr_read(env
, run
->dcr
.dcrn
, &run
->dcr
.data
);
1686 DPRINTF("handle halt\n");
1687 ret
= kvmppc_handle_halt(cpu
);
1689 #if defined(TARGET_PPC64)
1690 case KVM_EXIT_PAPR_HCALL
:
1691 DPRINTF("handle PAPR hypercall\n");
1692 run
->papr_hcall
.ret
= spapr_hypercall(cpu
,
1694 run
->papr_hcall
.args
);
1699 DPRINTF("handle epr\n");
1700 run
->epr
.epr
= ldl_phys(cs
->as
, env
->mpic_iack
);
1703 case KVM_EXIT_WATCHDOG
:
1704 DPRINTF("handle watchdog expiry\n");
1705 watchdog_perform_action();
1709 case KVM_EXIT_DEBUG
:
1710 DPRINTF("handle debug exception\n");
1711 if (kvm_handle_debug(cpu
, run
)) {
1715 /* re-enter, this exception was guest-internal */
1720 fprintf(stderr
, "KVM: unknown exit reason %d\n", run
->exit_reason
);
1725 qemu_mutex_unlock_iothread();
1729 int kvmppc_or_tsr_bits(PowerPCCPU
*cpu
, uint32_t tsr_bits
)
1731 CPUState
*cs
= CPU(cpu
);
1732 uint32_t bits
= tsr_bits
;
1733 struct kvm_one_reg reg
= {
1734 .id
= KVM_REG_PPC_OR_TSR
,
1735 .addr
= (uintptr_t) &bits
,
1738 return kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
1741 int kvmppc_clear_tsr_bits(PowerPCCPU
*cpu
, uint32_t tsr_bits
)
1744 CPUState
*cs
= CPU(cpu
);
1745 uint32_t bits
= tsr_bits
;
1746 struct kvm_one_reg reg
= {
1747 .id
= KVM_REG_PPC_CLEAR_TSR
,
1748 .addr
= (uintptr_t) &bits
,
1751 return kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
1754 int kvmppc_set_tcr(PowerPCCPU
*cpu
)
1756 CPUState
*cs
= CPU(cpu
);
1757 CPUPPCState
*env
= &cpu
->env
;
1758 uint32_t tcr
= env
->spr
[SPR_BOOKE_TCR
];
1760 struct kvm_one_reg reg
= {
1761 .id
= KVM_REG_PPC_TCR
,
1762 .addr
= (uintptr_t) &tcr
,
1765 return kvm_vcpu_ioctl(cs
, KVM_SET_ONE_REG
, ®
);
1768 int kvmppc_booke_watchdog_enable(PowerPCCPU
*cpu
)
1770 CPUState
*cs
= CPU(cpu
);
1773 if (!kvm_enabled()) {
1777 if (!cap_ppc_watchdog
) {
1778 printf("warning: KVM does not support watchdog");
1782 ret
= kvm_vcpu_enable_cap(cs
, KVM_CAP_PPC_BOOKE_WATCHDOG
, 0);
1784 fprintf(stderr
, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1785 __func__
, strerror(-ret
));
1792 static int read_cpuinfo(const char *field
, char *value
, int len
)
1796 int field_len
= strlen(field
);
1799 f
= fopen("/proc/cpuinfo", "r");
1805 if (!fgets(line
, sizeof(line
), f
)) {
1808 if (!strncmp(line
, field
, field_len
)) {
1809 pstrcpy(value
, len
, line
);
1820 uint32_t kvmppc_get_tbfreq(void)
1824 uint32_t retval
= NANOSECONDS_PER_SECOND
;
1826 if (read_cpuinfo("timebase", line
, sizeof(line
))) {
1830 if (!(ns
= strchr(line
, ':'))) {
1839 bool kvmppc_get_host_serial(char **value
)
1841 return g_file_get_contents("/proc/device-tree/system-id", value
, NULL
,
1845 bool kvmppc_get_host_model(char **value
)
1847 return g_file_get_contents("/proc/device-tree/model", value
, NULL
, NULL
);
1850 /* Try to find a device tree node for a CPU with clock-frequency property */
1851 static int kvmppc_find_cpu_dt(char *buf
, int buf_len
)
1853 struct dirent
*dirp
;
1856 if ((dp
= opendir(PROC_DEVTREE_CPU
)) == NULL
) {
1857 printf("Can't open directory " PROC_DEVTREE_CPU
"\n");
1862 while ((dirp
= readdir(dp
)) != NULL
) {
1864 snprintf(buf
, buf_len
, "%s%s/clock-frequency", PROC_DEVTREE_CPU
,
1866 f
= fopen(buf
, "r");
1868 snprintf(buf
, buf_len
, "%s%s", PROC_DEVTREE_CPU
, dirp
->d_name
);
1875 if (buf
[0] == '\0') {
1876 printf("Unknown host!\n");
1883 static uint64_t kvmppc_read_int_dt(const char *filename
)
1892 f
= fopen(filename
, "rb");
1897 len
= fread(&u
, 1, sizeof(u
), f
);
1901 /* property is a 32-bit quantity */
1902 return be32_to_cpu(u
.v32
);
1904 return be64_to_cpu(u
.v64
);
1910 /* Read a CPU node property from the host device tree that's a single
1911 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1912 * (can't find or open the property, or doesn't understand the
1914 static uint64_t kvmppc_read_int_cpu_dt(const char *propname
)
1916 char buf
[PATH_MAX
], *tmp
;
1919 if (kvmppc_find_cpu_dt(buf
, sizeof(buf
))) {
1923 tmp
= g_strdup_printf("%s/%s", buf
, propname
);
1924 val
= kvmppc_read_int_dt(tmp
);
1930 uint64_t kvmppc_get_clockfreq(void)
1932 return kvmppc_read_int_cpu_dt("clock-frequency");
1935 uint32_t kvmppc_get_vmx(void)
1937 return kvmppc_read_int_cpu_dt("ibm,vmx");
1940 uint32_t kvmppc_get_dfp(void)
1942 return kvmppc_read_int_cpu_dt("ibm,dfp");
1945 static int kvmppc_get_pvinfo(CPUPPCState
*env
, struct kvm_ppc_pvinfo
*pvinfo
)
1947 PowerPCCPU
*cpu
= ppc_env_get_cpu(env
);
1948 CPUState
*cs
= CPU(cpu
);
1950 if (kvm_vm_check_extension(cs
->kvm_state
, KVM_CAP_PPC_GET_PVINFO
) &&
1951 !kvm_vm_ioctl(cs
->kvm_state
, KVM_PPC_GET_PVINFO
, pvinfo
)) {
1958 int kvmppc_get_hasidle(CPUPPCState
*env
)
1960 struct kvm_ppc_pvinfo pvinfo
;
1962 if (!kvmppc_get_pvinfo(env
, &pvinfo
) &&
1963 (pvinfo
.flags
& KVM_PPC_PVINFO_FLAGS_EV_IDLE
)) {
1970 int kvmppc_get_hypercall(CPUPPCState
*env
, uint8_t *buf
, int buf_len
)
1972 uint32_t *hc
= (uint32_t*)buf
;
1973 struct kvm_ppc_pvinfo pvinfo
;
1975 if (!kvmppc_get_pvinfo(env
, &pvinfo
)) {
1976 memcpy(buf
, pvinfo
.hcall
, buf_len
);
1981 * Fallback to always fail hypercalls regardless of endianness:
1983 * tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
1985 * b .+8 (becomes nop in wrong endian)
1986 * bswap32(li r3, -1)
1989 hc
[0] = cpu_to_be32(0x08000048);
1990 hc
[1] = cpu_to_be32(0x3860ffff);
1991 hc
[2] = cpu_to_be32(0x48000008);
1992 hc
[3] = cpu_to_be32(bswap32(0x3860ffff));
1997 static inline int kvmppc_enable_hcall(KVMState
*s
, target_ulong hcall
)
1999 return kvm_vm_enable_cap(s
, KVM_CAP_PPC_ENABLE_HCALL
, 0, hcall
, 1);
2002 void kvmppc_enable_logical_ci_hcalls(void)
2005 * FIXME: it would be nice if we could detect the cases where
2006 * we're using a device which requires the in kernel
2007 * implementation of these hcalls, but the kernel lacks them and
2008 * produce a warning.
2010 kvmppc_enable_hcall(kvm_state
, H_LOGICAL_CI_LOAD
);
2011 kvmppc_enable_hcall(kvm_state
, H_LOGICAL_CI_STORE
);
2014 void kvmppc_enable_set_mode_hcall(void)
2016 kvmppc_enable_hcall(kvm_state
, H_SET_MODE
);
2019 void kvmppc_enable_clear_ref_mod_hcalls(void)
2021 kvmppc_enable_hcall(kvm_state
, H_CLEAR_REF
);
2022 kvmppc_enable_hcall(kvm_state
, H_CLEAR_MOD
);
2025 void kvmppc_set_papr(PowerPCCPU
*cpu
)
2027 CPUState
*cs
= CPU(cpu
);
2030 ret
= kvm_vcpu_enable_cap(cs
, KVM_CAP_PPC_PAPR
, 0);
2032 error_report("This vCPU type or KVM version does not support PAPR");
2036 /* Update the capability flag so we sync the right information
2041 int kvmppc_set_compat(PowerPCCPU
*cpu
, uint32_t compat_pvr
)
2043 return kvm_set_one_reg(CPU(cpu
), KVM_REG_PPC_ARCH_COMPAT
, &compat_pvr
);
2046 void kvmppc_set_mpic_proxy(PowerPCCPU
*cpu
, int mpic_proxy
)
2048 CPUState
*cs
= CPU(cpu
);
2051 ret
= kvm_vcpu_enable_cap(cs
, KVM_CAP_PPC_EPR
, 0, mpic_proxy
);
2052 if (ret
&& mpic_proxy
) {
2053 error_report("This KVM version does not support EPR");
2058 int kvmppc_smt_threads(void)
2060 return cap_ppc_smt
? cap_ppc_smt
: 1;
2064 off_t
kvmppc_alloc_rma(void **rma
)
2068 struct kvm_allocate_rma ret
;
2070 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2071 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2072 * not necessary on this hardware
2073 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2075 * FIXME: We should allow the user to force contiguous RMA
2076 * allocation in the cap_ppc_rma==1 case.
2078 if (cap_ppc_rma
< 2) {
2082 fd
= kvm_vm_ioctl(kvm_state
, KVM_ALLOCATE_RMA
, &ret
);
2084 fprintf(stderr
, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2089 size
= MIN(ret
.rma_size
, 256ul << 20);
2091 *rma
= mmap(NULL
, size
, PROT_READ
|PROT_WRITE
, MAP_SHARED
, fd
, 0);
2092 if (*rma
== MAP_FAILED
) {
2093 fprintf(stderr
, "KVM: Error mapping RMA: %s\n", strerror(errno
));
2100 uint64_t kvmppc_rma_size(uint64_t current_size
, unsigned int hash_shift
)
2102 struct kvm_ppc_smmu_info info
;
2103 long rampagesize
, best_page_shift
;
2106 if (cap_ppc_rma
>= 2) {
2107 return current_size
;
2110 /* Find the largest hardware supported page size that's less than
2111 * or equal to the (logical) backing page size of guest RAM */
2112 kvm_get_smmu_info(POWERPC_CPU(first_cpu
), &info
);
2113 rampagesize
= qemu_getrampagesize();
2114 best_page_shift
= 0;
2116 for (i
= 0; i
< KVM_PPC_PAGE_SIZES_MAX_SZ
; i
++) {
2117 struct kvm_ppc_one_seg_page_size
*sps
= &info
.sps
[i
];
2119 if (!sps
->page_shift
) {
2123 if ((sps
->page_shift
> best_page_shift
)
2124 && ((1UL << sps
->page_shift
) <= rampagesize
)) {
2125 best_page_shift
= sps
->page_shift
;
2129 return MIN(current_size
,
2130 1ULL << (best_page_shift
+ hash_shift
- 7));
2134 bool kvmppc_spapr_use_multitce(void)
2136 return cap_spapr_multitce
;
2139 void *kvmppc_create_spapr_tce(uint32_t liobn
, uint32_t window_size
, int *pfd
,
2142 struct kvm_create_spapr_tce args
= {
2144 .window_size
= window_size
,
2150 /* Must set fd to -1 so we don't try to munmap when called for
2151 * destroying the table, which the upper layers -will- do
2154 if (!cap_spapr_tce
|| (need_vfio
&& !cap_spapr_vfio
)) {
2158 fd
= kvm_vm_ioctl(kvm_state
, KVM_CREATE_SPAPR_TCE
, &args
);
2160 fprintf(stderr
, "KVM: Failed to create TCE table for liobn 0x%x\n",
2165 len
= (window_size
/ SPAPR_TCE_PAGE_SIZE
) * sizeof(uint64_t);
2166 /* FIXME: round this up to page size */
2168 table
= mmap(NULL
, len
, PROT_READ
|PROT_WRITE
, MAP_SHARED
, fd
, 0);
2169 if (table
== MAP_FAILED
) {
2170 fprintf(stderr
, "KVM: Failed to map TCE table for liobn 0x%x\n",
2180 int kvmppc_remove_spapr_tce(void *table
, int fd
, uint32_t nb_table
)
2188 len
= nb_table
* sizeof(uint64_t);
2189 if ((munmap(table
, len
) < 0) ||
2191 fprintf(stderr
, "KVM: Unexpected error removing TCE table: %s",
2193 /* Leak the table */
2199 int kvmppc_reset_htab(int shift_hint
)
2201 uint32_t shift
= shift_hint
;
2203 if (!kvm_enabled()) {
2204 /* Full emulation, tell caller to allocate htab itself */
2207 if (kvm_check_extension(kvm_state
, KVM_CAP_PPC_ALLOC_HTAB
)) {
2209 ret
= kvm_vm_ioctl(kvm_state
, KVM_PPC_ALLOCATE_HTAB
, &shift
);
2210 if (ret
== -ENOTTY
) {
2211 /* At least some versions of PR KVM advertise the
2212 * capability, but don't implement the ioctl(). Oops.
2213 * Return 0 so that we allocate the htab in qemu, as is
2214 * correct for PR. */
2216 } else if (ret
< 0) {
2222 /* We have a kernel that predates the htab reset calls. For PR
2223 * KVM, we need to allocate the htab ourselves, for an HV KVM of
2224 * this era, it has allocated a 16MB fixed size hash table already. */
2225 if (kvmppc_is_pr(kvm_state
)) {
2226 /* PR - tell caller to allocate htab */
2229 /* HV - assume 16MB kernel allocated htab */
2234 static inline uint32_t mfpvr(void)
2243 static void alter_insns(uint64_t *word
, uint64_t flags
, bool on
)
2252 static void kvmppc_host_cpu_class_init(ObjectClass
*oc
, void *data
)
2254 PowerPCCPUClass
*pcc
= POWERPC_CPU_CLASS(oc
);
2255 uint32_t vmx
= kvmppc_get_vmx();
2256 uint32_t dfp
= kvmppc_get_dfp();
2257 uint32_t dcache_size
= kvmppc_read_int_cpu_dt("d-cache-size");
2258 uint32_t icache_size
= kvmppc_read_int_cpu_dt("i-cache-size");
2260 /* Now fix up the class with information we can query from the host */
2264 /* Only override when we know what the host supports */
2265 alter_insns(&pcc
->insns_flags
, PPC_ALTIVEC
, vmx
> 0);
2266 alter_insns(&pcc
->insns_flags2
, PPC2_VSX
, vmx
> 1);
2269 /* Only override when we know what the host supports */
2270 alter_insns(&pcc
->insns_flags2
, PPC2_DFP
, dfp
);
2273 if (dcache_size
!= -1) {
2274 pcc
->l1_dcache_size
= dcache_size
;
2277 if (icache_size
!= -1) {
2278 pcc
->l1_icache_size
= icache_size
;
2282 bool kvmppc_has_cap_epr(void)
2287 bool kvmppc_has_cap_htab_fd(void)
2292 bool kvmppc_has_cap_fixup_hcalls(void)
2294 return cap_fixup_hcalls
;
2297 bool kvmppc_has_cap_htm(void)
2302 static PowerPCCPUClass
*ppc_cpu_get_family_class(PowerPCCPUClass
*pcc
)
2304 ObjectClass
*oc
= OBJECT_CLASS(pcc
);
2306 while (oc
&& !object_class_is_abstract(oc
)) {
2307 oc
= object_class_get_parent(oc
);
2311 return POWERPC_CPU_CLASS(oc
);
2314 PowerPCCPUClass
*kvm_ppc_get_host_cpu_class(void)
2316 uint32_t host_pvr
= mfpvr();
2317 PowerPCCPUClass
*pvr_pcc
;
2319 pvr_pcc
= ppc_cpu_class_by_pvr(host_pvr
);
2320 if (pvr_pcc
== NULL
) {
2321 pvr_pcc
= ppc_cpu_class_by_pvr_mask(host_pvr
);
2327 static int kvm_ppc_register_host_cpu_type(void)
2329 TypeInfo type_info
= {
2330 .name
= TYPE_HOST_POWERPC_CPU
,
2331 .class_init
= kvmppc_host_cpu_class_init
,
2333 PowerPCCPUClass
*pvr_pcc
;
2337 pvr_pcc
= kvm_ppc_get_host_cpu_class();
2338 if (pvr_pcc
== NULL
) {
2341 type_info
.parent
= object_class_get_name(OBJECT_CLASS(pvr_pcc
));
2342 type_register(&type_info
);
2344 #if defined(TARGET_PPC64)
2345 type_info
.name
= g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE
, "host");
2346 type_info
.parent
= TYPE_SPAPR_CPU_CORE
,
2347 type_info
.instance_size
= sizeof(sPAPRCPUCore
);
2348 type_info
.instance_init
= NULL
;
2349 type_info
.class_init
= spapr_cpu_core_class_init
;
2350 type_info
.class_data
= (void *) "host";
2351 type_register(&type_info
);
2352 g_free((void *)type_info
.name
);
2356 * Update generic CPU family class alias (e.g. on a POWER8NVL host,
2357 * we want "POWER8" to be a "family" alias that points to the current
2358 * host CPU type, too)
2360 dc
= DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc
));
2361 for (i
= 0; ppc_cpu_aliases
[i
].alias
!= NULL
; i
++) {
2362 if (strcmp(ppc_cpu_aliases
[i
].alias
, dc
->desc
) == 0) {
2363 ObjectClass
*oc
= OBJECT_CLASS(pvr_pcc
);
2366 ppc_cpu_aliases
[i
].model
= g_strdup(object_class_get_name(oc
));
2367 suffix
= strstr(ppc_cpu_aliases
[i
].model
, "-"TYPE_POWERPC_CPU
);
2371 ppc_cpu_aliases
[i
].oc
= oc
;
2379 int kvmppc_define_rtas_kernel_token(uint32_t token
, const char *function
)
2381 struct kvm_rtas_token_args args
= {
2385 if (!kvm_check_extension(kvm_state
, KVM_CAP_PPC_RTAS
)) {
2389 strncpy(args
.name
, function
, sizeof(args
.name
));
2391 return kvm_vm_ioctl(kvm_state
, KVM_PPC_RTAS_DEFINE_TOKEN
, &args
);
2394 int kvmppc_get_htab_fd(bool write
)
2396 struct kvm_get_htab_fd s
= {
2397 .flags
= write
? KVM_GET_HTAB_WRITE
: 0,
2402 fprintf(stderr
, "KVM version doesn't support saving the hash table\n");
2406 return kvm_vm_ioctl(kvm_state
, KVM_PPC_GET_HTAB_FD
, &s
);
2409 int kvmppc_save_htab(QEMUFile
*f
, int fd
, size_t bufsize
, int64_t max_ns
)
2411 int64_t starttime
= qemu_clock_get_ns(QEMU_CLOCK_REALTIME
);
2412 uint8_t buf
[bufsize
];
2416 rc
= read(fd
, buf
, bufsize
);
2418 fprintf(stderr
, "Error reading data from KVM HTAB fd: %s\n",
2422 uint8_t *buffer
= buf
;
2425 struct kvm_get_htab_header
*head
=
2426 (struct kvm_get_htab_header
*) buffer
;
2427 size_t chunksize
= sizeof(*head
) +
2428 HASH_PTE_SIZE_64
* head
->n_valid
;
2430 qemu_put_be32(f
, head
->index
);
2431 qemu_put_be16(f
, head
->n_valid
);
2432 qemu_put_be16(f
, head
->n_invalid
);
2433 qemu_put_buffer(f
, (void *)(head
+ 1),
2434 HASH_PTE_SIZE_64
* head
->n_valid
);
2436 buffer
+= chunksize
;
2442 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME
) - starttime
) < max_ns
)));
2444 return (rc
== 0) ? 1 : 0;
2447 int kvmppc_load_htab_chunk(QEMUFile
*f
, int fd
, uint32_t index
,
2448 uint16_t n_valid
, uint16_t n_invalid
)
2450 struct kvm_get_htab_header
*buf
;
2451 size_t chunksize
= sizeof(*buf
) + n_valid
*HASH_PTE_SIZE_64
;
2454 buf
= alloca(chunksize
);
2456 buf
->n_valid
= n_valid
;
2457 buf
->n_invalid
= n_invalid
;
2459 qemu_get_buffer(f
, (void *)(buf
+ 1), HASH_PTE_SIZE_64
*n_valid
);
2461 rc
= write(fd
, buf
, chunksize
);
2463 fprintf(stderr
, "Error writing KVM hash table: %s\n",
2467 if (rc
!= chunksize
) {
2468 /* We should never get a short write on a single chunk */
2469 fprintf(stderr
, "Short write, restoring KVM hash table\n");
2475 bool kvm_arch_stop_on_emulation_error(CPUState
*cpu
)
2480 void kvm_arch_init_irq_routing(KVMState
*s
)
2484 void kvmppc_read_hptes(ppc_hash_pte64_t
*hptes
, hwaddr ptex
, int n
)
2486 struct kvm_get_htab_fd ghf
= {
2488 .start_index
= ptex
,
2493 fd
= kvm_vm_ioctl(kvm_state
, KVM_PPC_GET_HTAB_FD
, &ghf
);
2495 hw_error("kvmppc_read_hptes: Unable to open HPT fd");
2500 struct kvm_get_htab_header
*hdr
;
2501 int m
= n
< HPTES_PER_GROUP
? n
: HPTES_PER_GROUP
;
2502 char buf
[sizeof(*hdr
) + m
* HASH_PTE_SIZE_64
];
2504 rc
= read(fd
, buf
, sizeof(buf
));
2506 hw_error("kvmppc_read_hptes: Unable to read HPTEs");
2509 hdr
= (struct kvm_get_htab_header
*)buf
;
2510 while ((i
< n
) && ((char *)hdr
< (buf
+ rc
))) {
2511 int invalid
= hdr
->n_invalid
;
2513 if (hdr
->index
!= (ptex
+ i
)) {
2514 hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32
2515 " != (%"HWADDR_PRIu
" + %d", hdr
->index
, ptex
, i
);
2518 memcpy(hptes
+ i
, hdr
+ 1, HASH_PTE_SIZE_64
* hdr
->n_valid
);
2521 if ((n
- i
) < invalid
) {
2524 memset(hptes
+ i
, 0, invalid
* HASH_PTE_SIZE_64
);
2525 i
+= hdr
->n_invalid
;
2527 hdr
= (struct kvm_get_htab_header
*)
2528 ((char *)(hdr
+ 1) + HASH_PTE_SIZE_64
* hdr
->n_valid
);
2535 void kvmppc_write_hpte(hwaddr ptex
, uint64_t pte0
, uint64_t pte1
)
2538 struct kvm_get_htab_fd ghf
;
2540 struct kvm_get_htab_header hdr
;
2546 ghf
.start_index
= 0; /* Ignored */
2547 fd
= kvm_vm_ioctl(kvm_state
, KVM_PPC_GET_HTAB_FD
, &ghf
);
2549 hw_error("kvmppc_write_hpte: Unable to open HPT fd");
2552 buf
.hdr
.n_valid
= 1;
2553 buf
.hdr
.n_invalid
= 0;
2554 buf
.hdr
.index
= ptex
;
2555 buf
.pte0
= cpu_to_be64(pte0
);
2556 buf
.pte1
= cpu_to_be64(pte1
);
2558 rc
= write(fd
, &buf
, sizeof(buf
));
2559 if (rc
!= sizeof(buf
)) {
2560 hw_error("kvmppc_write_hpte: Unable to update KVM HPT");
2565 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry
*route
,
2566 uint64_t address
, uint32_t data
, PCIDevice
*dev
)
2571 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry
*route
,
2572 int vector
, PCIDevice
*dev
)
2577 int kvm_arch_release_virq_post(int virq
)
2582 int kvm_arch_msi_data_to_gsi(uint32_t data
)
2584 return data
& 0xffff;
2587 int kvmppc_enable_hwrng(void)
2589 if (!kvm_enabled() || !kvm_check_extension(kvm_state
, KVM_CAP_PPC_HWRNG
)) {
2593 return kvmppc_enable_hcall(kvm_state
, H_RANDOM
);