2 * PowerPC implementation of KVM hooks
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
18 #include <sys/types.h>
19 #include <sys/ioctl.h>
23 #include <linux/kvm.h>
25 #include "qemu-common.h"
26 #include "qemu-timer.h"
32 #include "device_tree.h"
33 #include "hw/sysbus.h"
36 #include "hw/sysbus.h"
38 #include "hw/spapr_vio.h"
43 #define dprintf(fmt, ...) \
44 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
46 #define dprintf(fmt, ...) \
50 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
52 const KVMCapabilityInfo kvm_arch_required_capabilities
[] = {
56 static int cap_interrupt_unset
= false;
57 static int cap_interrupt_level
= false;
58 static int cap_segstate
;
59 static int cap_booke_sregs
;
60 static int cap_ppc_smt
;
61 static int cap_ppc_rma
;
62 static int cap_spapr_tce
;
65 /* XXX We have a race condition where we actually have a level triggered
66 * interrupt, but the infrastructure can't expose that yet, so the guest
67 * takes but ignores it, goes to sleep and never gets notified that there's
68 * still an interrupt pending.
70 * As a quick workaround, let's just wake up again 20 ms after we injected
71 * an interrupt. That way we can assure that we're always reinjecting
72 * interrupts in case the guest swallowed them.
74 static QEMUTimer
*idle_timer
;
76 static void kvm_kick_cpu(void *opaque
)
78 PowerPCCPU
*cpu
= opaque
;
80 qemu_cpu_kick(CPU(cpu
));
83 int kvm_arch_init(KVMState
*s
)
85 cap_interrupt_unset
= kvm_check_extension(s
, KVM_CAP_PPC_UNSET_IRQ
);
86 cap_interrupt_level
= kvm_check_extension(s
, KVM_CAP_PPC_IRQ_LEVEL
);
87 cap_segstate
= kvm_check_extension(s
, KVM_CAP_PPC_SEGSTATE
);
88 cap_booke_sregs
= kvm_check_extension(s
, KVM_CAP_PPC_BOOKE_SREGS
);
89 cap_ppc_smt
= kvm_check_extension(s
, KVM_CAP_PPC_SMT
);
90 cap_ppc_rma
= kvm_check_extension(s
, KVM_CAP_PPC_RMA
);
91 cap_spapr_tce
= kvm_check_extension(s
, KVM_CAP_SPAPR_TCE
);
92 cap_hior
= kvm_check_extension(s
, KVM_CAP_PPC_HIOR
);
94 if (!cap_interrupt_level
) {
95 fprintf(stderr
, "KVM: Couldn't find level irq capability. Expect the "
96 "VM to stall at times!\n");
102 static int kvm_arch_sync_sregs(CPUPPCState
*cenv
)
104 struct kvm_sregs sregs
;
107 if (cenv
->excp_model
== POWERPC_EXCP_BOOKE
) {
108 /* What we're really trying to say is "if we're on BookE, we use
109 the native PVR for now". This is the only sane way to check
110 it though, so we potentially confuse users that they can run
111 BookE guests on BookS. Let's hope nobody dares enough :) */
115 fprintf(stderr
, "kvm error: missing PVR setting capability\n");
120 ret
= kvm_vcpu_ioctl(cenv
, KVM_GET_SREGS
, &sregs
);
125 sregs
.pvr
= cenv
->spr
[SPR_PVR
];
126 return kvm_vcpu_ioctl(cenv
, KVM_SET_SREGS
, &sregs
);
129 /* Set up a shared TLB array with KVM */
130 static int kvm_booke206_tlb_init(CPUPPCState
*env
)
132 struct kvm_book3e_206_tlb_params params
= {};
133 struct kvm_config_tlb cfg
= {};
134 struct kvm_enable_cap encap
= {};
135 unsigned int entries
= 0;
138 if (!kvm_enabled() ||
139 !kvm_check_extension(env
->kvm_state
, KVM_CAP_SW_TLB
)) {
143 assert(ARRAY_SIZE(params
.tlb_sizes
) == BOOKE206_MAX_TLBN
);
145 for (i
= 0; i
< BOOKE206_MAX_TLBN
; i
++) {
146 params
.tlb_sizes
[i
] = booke206_tlb_size(env
, i
);
147 params
.tlb_ways
[i
] = booke206_tlb_ways(env
, i
);
148 entries
+= params
.tlb_sizes
[i
];
151 assert(entries
== env
->nb_tlb
);
152 assert(sizeof(struct kvm_book3e_206_tlb_entry
) == sizeof(ppcmas_tlb_t
));
154 env
->tlb_dirty
= true;
156 cfg
.array
= (uintptr_t)env
->tlb
.tlbm
;
157 cfg
.array_len
= sizeof(ppcmas_tlb_t
) * entries
;
158 cfg
.params
= (uintptr_t)¶ms
;
159 cfg
.mmu_type
= KVM_MMU_FSL_BOOKE_NOHV
;
161 encap
.cap
= KVM_CAP_SW_TLB
;
162 encap
.args
[0] = (uintptr_t)&cfg
;
164 ret
= kvm_vcpu_ioctl(env
, KVM_ENABLE_CAP
, &encap
);
166 fprintf(stderr
, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
167 __func__
, strerror(-ret
));
171 env
->kvm_sw_tlb
= true;
176 #if defined(TARGET_PPC64)
177 static void kvm_get_fallback_smmu_info(CPUPPCState
*env
,
178 struct kvm_ppc_smmu_info
*info
)
180 memset(info
, 0, sizeof(*info
));
182 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
183 * need to "guess" what the supported page sizes are.
185 * For that to work we make a few assumptions:
187 * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
188 * KVM which only supports 4K and 16M pages, but supports them
189 * regardless of the backing store characteritics. We also don't
190 * support 1T segments.
192 * This is safe as if HV KVM ever supports that capability or PR
193 * KVM grows supports for more page/segment sizes, those versions
194 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
195 * will not hit this fallback
197 * - Else we are running HV KVM. This means we only support page
198 * sizes that fit in the backing store. Additionally we only
199 * advertize 64K pages if the processor is ARCH 2.06 and we assume
200 * P7 encodings for the SLB and hash table. Here too, we assume
201 * support for any newer processor will mean a kernel that
202 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
205 if (kvm_check_extension(env
->kvm_state
, KVM_CAP_PPC_GET_PVINFO
)) {
210 /* Standard 4k base page size segment */
211 info
->sps
[0].page_shift
= 12;
212 info
->sps
[0].slb_enc
= 0;
213 info
->sps
[0].enc
[0].page_shift
= 12;
214 info
->sps
[0].enc
[0].pte_enc
= 0;
216 /* Standard 16M large page size segment */
217 info
->sps
[1].page_shift
= 24;
218 info
->sps
[1].slb_enc
= SLB_VSID_L
;
219 info
->sps
[1].enc
[0].page_shift
= 24;
220 info
->sps
[1].enc
[0].pte_enc
= 0;
224 /* HV KVM has backing store size restrictions */
225 info
->flags
= KVM_PPC_PAGE_SIZES_REAL
;
227 if (env
->mmu_model
& POWERPC_MMU_1TSEG
) {
228 info
->flags
|= KVM_PPC_1T_SEGMENTS
;
231 if (env
->mmu_model
== POWERPC_MMU_2_06
) {
237 /* Standard 4k base page size segment */
238 info
->sps
[i
].page_shift
= 12;
239 info
->sps
[i
].slb_enc
= 0;
240 info
->sps
[i
].enc
[0].page_shift
= 12;
241 info
->sps
[i
].enc
[0].pte_enc
= 0;
244 /* 64K on MMU 2.06 */
245 if (env
->mmu_model
== POWERPC_MMU_2_06
) {
246 info
->sps
[i
].page_shift
= 16;
247 info
->sps
[i
].slb_enc
= 0x110;
248 info
->sps
[i
].enc
[0].page_shift
= 16;
249 info
->sps
[i
].enc
[0].pte_enc
= 1;
253 /* Standard 16M large page size segment */
254 info
->sps
[i
].page_shift
= 24;
255 info
->sps
[i
].slb_enc
= SLB_VSID_L
;
256 info
->sps
[i
].enc
[0].page_shift
= 24;
257 info
->sps
[i
].enc
[0].pte_enc
= 0;
261 static void kvm_get_smmu_info(CPUPPCState
*env
, struct kvm_ppc_smmu_info
*info
)
265 if (kvm_check_extension(env
->kvm_state
, KVM_CAP_PPC_GET_SMMU_INFO
)) {
266 ret
= kvm_vm_ioctl(env
->kvm_state
, KVM_PPC_GET_SMMU_INFO
, info
);
272 kvm_get_fallback_smmu_info(env
, info
);
275 static long getrampagesize(void)
281 /* guest RAM is backed by normal anonymous pages */
282 return getpagesize();
286 ret
= statfs(mem_path
, &fs
);
287 } while (ret
!= 0 && errno
== EINTR
);
290 fprintf(stderr
, "Couldn't statfs() memory path: %s\n",
295 #define HUGETLBFS_MAGIC 0x958458f6
297 if (fs
.f_type
!= HUGETLBFS_MAGIC
) {
298 /* Explicit mempath, but it's ordinary pages */
299 return getpagesize();
302 /* It's hugepage, return the huge page size */
306 static bool kvm_valid_page_size(uint32_t flags
, long rampgsize
, uint32_t shift
)
308 if (!(flags
& KVM_PPC_PAGE_SIZES_REAL
)) {
312 return (1ul << shift
) <= rampgsize
;
315 static void kvm_fixup_page_sizes(CPUPPCState
*env
)
317 static struct kvm_ppc_smmu_info smmu_info
;
318 static bool has_smmu_info
;
322 /* We only handle page sizes for 64-bit server guests for now */
323 if (!(env
->mmu_model
& POWERPC_MMU_64
)) {
327 /* Collect MMU info from kernel if not already */
328 if (!has_smmu_info
) {
329 kvm_get_smmu_info(env
, &smmu_info
);
330 has_smmu_info
= true;
333 rampagesize
= getrampagesize();
335 /* Convert to QEMU form */
336 memset(&env
->sps
, 0, sizeof(env
->sps
));
338 for (ik
= iq
= 0; ik
< KVM_PPC_PAGE_SIZES_MAX_SZ
; ik
++) {
339 struct ppc_one_seg_page_size
*qsps
= &env
->sps
.sps
[iq
];
340 struct kvm_ppc_one_seg_page_size
*ksps
= &smmu_info
.sps
[ik
];
342 if (!kvm_valid_page_size(smmu_info
.flags
, rampagesize
,
346 qsps
->page_shift
= ksps
->page_shift
;
347 qsps
->slb_enc
= ksps
->slb_enc
;
348 for (jk
= jq
= 0; jk
< KVM_PPC_PAGE_SIZES_MAX_SZ
; jk
++) {
349 if (!kvm_valid_page_size(smmu_info
.flags
, rampagesize
,
350 ksps
->enc
[jk
].page_shift
)) {
353 qsps
->enc
[jq
].page_shift
= ksps
->enc
[jk
].page_shift
;
354 qsps
->enc
[jq
].pte_enc
= ksps
->enc
[jk
].pte_enc
;
355 if (++jq
>= PPC_PAGE_SIZES_MAX_SZ
) {
359 if (++iq
>= PPC_PAGE_SIZES_MAX_SZ
) {
363 env
->slb_nr
= smmu_info
.slb_size
;
364 if (smmu_info
.flags
& KVM_PPC_1T_SEGMENTS
) {
365 env
->mmu_model
|= POWERPC_MMU_1TSEG
;
367 env
->mmu_model
&= ~POWERPC_MMU_1TSEG
;
370 #else /* defined (TARGET_PPC64) */
372 static inline void kvm_fixup_page_sizes(CPUPPCState
*env
)
376 #endif /* !defined (TARGET_PPC64) */
378 int kvm_arch_init_vcpu(CPUPPCState
*cenv
)
380 PowerPCCPU
*cpu
= ppc_env_get_cpu(cenv
);
383 /* Gather server mmu info from KVM and update the CPU state */
384 kvm_fixup_page_sizes(cenv
);
386 /* Synchronize sregs with kvm */
387 ret
= kvm_arch_sync_sregs(cenv
);
392 idle_timer
= qemu_new_timer_ns(vm_clock
, kvm_kick_cpu
, cpu
);
394 /* Some targets support access to KVM's guest TLB. */
395 switch (cenv
->mmu_model
) {
396 case POWERPC_MMU_BOOKE206
:
397 ret
= kvm_booke206_tlb_init(cenv
);
406 void kvm_arch_reset_vcpu(CPUPPCState
*env
)
410 static void kvm_sw_tlb_put(CPUPPCState
*env
)
412 struct kvm_dirty_tlb dirty_tlb
;
413 unsigned char *bitmap
;
416 if (!env
->kvm_sw_tlb
) {
420 bitmap
= g_malloc((env
->nb_tlb
+ 7) / 8);
421 memset(bitmap
, 0xFF, (env
->nb_tlb
+ 7) / 8);
423 dirty_tlb
.bitmap
= (uintptr_t)bitmap
;
424 dirty_tlb
.num_dirty
= env
->nb_tlb
;
426 ret
= kvm_vcpu_ioctl(env
, KVM_DIRTY_TLB
, &dirty_tlb
);
428 fprintf(stderr
, "%s: KVM_DIRTY_TLB: %s\n",
429 __func__
, strerror(-ret
));
435 int kvm_arch_put_registers(CPUPPCState
*env
, int level
)
437 struct kvm_regs regs
;
441 ret
= kvm_vcpu_ioctl(env
, KVM_GET_REGS
, ®s
);
451 regs
.srr0
= env
->spr
[SPR_SRR0
];
452 regs
.srr1
= env
->spr
[SPR_SRR1
];
454 regs
.sprg0
= env
->spr
[SPR_SPRG0
];
455 regs
.sprg1
= env
->spr
[SPR_SPRG1
];
456 regs
.sprg2
= env
->spr
[SPR_SPRG2
];
457 regs
.sprg3
= env
->spr
[SPR_SPRG3
];
458 regs
.sprg4
= env
->spr
[SPR_SPRG4
];
459 regs
.sprg5
= env
->spr
[SPR_SPRG5
];
460 regs
.sprg6
= env
->spr
[SPR_SPRG6
];
461 regs
.sprg7
= env
->spr
[SPR_SPRG7
];
463 regs
.pid
= env
->spr
[SPR_BOOKE_PID
];
465 for (i
= 0;i
< 32; i
++)
466 regs
.gpr
[i
] = env
->gpr
[i
];
468 ret
= kvm_vcpu_ioctl(env
, KVM_SET_REGS
, ®s
);
472 if (env
->tlb_dirty
) {
474 env
->tlb_dirty
= false;
477 if (cap_segstate
&& (level
>= KVM_PUT_RESET_STATE
)) {
478 struct kvm_sregs sregs
;
480 sregs
.pvr
= env
->spr
[SPR_PVR
];
482 sregs
.u
.s
.sdr1
= env
->spr
[SPR_SDR1
];
486 for (i
= 0; i
< 64; i
++) {
487 sregs
.u
.s
.ppc64
.slb
[i
].slbe
= env
->slb
[i
].esid
;
488 sregs
.u
.s
.ppc64
.slb
[i
].slbv
= env
->slb
[i
].vsid
;
493 for (i
= 0; i
< 16; i
++) {
494 sregs
.u
.s
.ppc32
.sr
[i
] = env
->sr
[i
];
498 for (i
= 0; i
< 8; i
++) {
499 /* Beware. We have to swap upper and lower bits here */
500 sregs
.u
.s
.ppc32
.dbat
[i
] = ((uint64_t)env
->DBAT
[0][i
] << 32)
502 sregs
.u
.s
.ppc32
.ibat
[i
] = ((uint64_t)env
->IBAT
[0][i
] << 32)
506 ret
= kvm_vcpu_ioctl(env
, KVM_SET_SREGS
, &sregs
);
512 if (cap_hior
&& (level
>= KVM_PUT_RESET_STATE
)) {
513 uint64_t hior
= env
->spr
[SPR_HIOR
];
514 struct kvm_one_reg reg
= {
515 .id
= KVM_REG_PPC_HIOR
,
516 .addr
= (uintptr_t) &hior
,
519 ret
= kvm_vcpu_ioctl(env
, KVM_SET_ONE_REG
, ®
);
528 int kvm_arch_get_registers(CPUPPCState
*env
)
530 struct kvm_regs regs
;
531 struct kvm_sregs sregs
;
535 ret
= kvm_vcpu_ioctl(env
, KVM_GET_REGS
, ®s
);
540 for (i
= 7; i
>= 0; i
--) {
541 env
->crf
[i
] = cr
& 15;
551 env
->spr
[SPR_SRR0
] = regs
.srr0
;
552 env
->spr
[SPR_SRR1
] = regs
.srr1
;
554 env
->spr
[SPR_SPRG0
] = regs
.sprg0
;
555 env
->spr
[SPR_SPRG1
] = regs
.sprg1
;
556 env
->spr
[SPR_SPRG2
] = regs
.sprg2
;
557 env
->spr
[SPR_SPRG3
] = regs
.sprg3
;
558 env
->spr
[SPR_SPRG4
] = regs
.sprg4
;
559 env
->spr
[SPR_SPRG5
] = regs
.sprg5
;
560 env
->spr
[SPR_SPRG6
] = regs
.sprg6
;
561 env
->spr
[SPR_SPRG7
] = regs
.sprg7
;
563 env
->spr
[SPR_BOOKE_PID
] = regs
.pid
;
565 for (i
= 0;i
< 32; i
++)
566 env
->gpr
[i
] = regs
.gpr
[i
];
568 if (cap_booke_sregs
) {
569 ret
= kvm_vcpu_ioctl(env
, KVM_GET_SREGS
, &sregs
);
574 if (sregs
.u
.e
.features
& KVM_SREGS_E_BASE
) {
575 env
->spr
[SPR_BOOKE_CSRR0
] = sregs
.u
.e
.csrr0
;
576 env
->spr
[SPR_BOOKE_CSRR1
] = sregs
.u
.e
.csrr1
;
577 env
->spr
[SPR_BOOKE_ESR
] = sregs
.u
.e
.esr
;
578 env
->spr
[SPR_BOOKE_DEAR
] = sregs
.u
.e
.dear
;
579 env
->spr
[SPR_BOOKE_MCSR
] = sregs
.u
.e
.mcsr
;
580 env
->spr
[SPR_BOOKE_TSR
] = sregs
.u
.e
.tsr
;
581 env
->spr
[SPR_BOOKE_TCR
] = sregs
.u
.e
.tcr
;
582 env
->spr
[SPR_DECR
] = sregs
.u
.e
.dec
;
583 env
->spr
[SPR_TBL
] = sregs
.u
.e
.tb
& 0xffffffff;
584 env
->spr
[SPR_TBU
] = sregs
.u
.e
.tb
>> 32;
585 env
->spr
[SPR_VRSAVE
] = sregs
.u
.e
.vrsave
;
588 if (sregs
.u
.e
.features
& KVM_SREGS_E_ARCH206
) {
589 env
->spr
[SPR_BOOKE_PIR
] = sregs
.u
.e
.pir
;
590 env
->spr
[SPR_BOOKE_MCSRR0
] = sregs
.u
.e
.mcsrr0
;
591 env
->spr
[SPR_BOOKE_MCSRR1
] = sregs
.u
.e
.mcsrr1
;
592 env
->spr
[SPR_BOOKE_DECAR
] = sregs
.u
.e
.decar
;
593 env
->spr
[SPR_BOOKE_IVPR
] = sregs
.u
.e
.ivpr
;
596 if (sregs
.u
.e
.features
& KVM_SREGS_E_64
) {
597 env
->spr
[SPR_BOOKE_EPCR
] = sregs
.u
.e
.epcr
;
600 if (sregs
.u
.e
.features
& KVM_SREGS_E_SPRG8
) {
601 env
->spr
[SPR_BOOKE_SPRG8
] = sregs
.u
.e
.sprg8
;
604 if (sregs
.u
.e
.features
& KVM_SREGS_E_IVOR
) {
605 env
->spr
[SPR_BOOKE_IVOR0
] = sregs
.u
.e
.ivor_low
[0];
606 env
->spr
[SPR_BOOKE_IVOR1
] = sregs
.u
.e
.ivor_low
[1];
607 env
->spr
[SPR_BOOKE_IVOR2
] = sregs
.u
.e
.ivor_low
[2];
608 env
->spr
[SPR_BOOKE_IVOR3
] = sregs
.u
.e
.ivor_low
[3];
609 env
->spr
[SPR_BOOKE_IVOR4
] = sregs
.u
.e
.ivor_low
[4];
610 env
->spr
[SPR_BOOKE_IVOR5
] = sregs
.u
.e
.ivor_low
[5];
611 env
->spr
[SPR_BOOKE_IVOR6
] = sregs
.u
.e
.ivor_low
[6];
612 env
->spr
[SPR_BOOKE_IVOR7
] = sregs
.u
.e
.ivor_low
[7];
613 env
->spr
[SPR_BOOKE_IVOR8
] = sregs
.u
.e
.ivor_low
[8];
614 env
->spr
[SPR_BOOKE_IVOR9
] = sregs
.u
.e
.ivor_low
[9];
615 env
->spr
[SPR_BOOKE_IVOR10
] = sregs
.u
.e
.ivor_low
[10];
616 env
->spr
[SPR_BOOKE_IVOR11
] = sregs
.u
.e
.ivor_low
[11];
617 env
->spr
[SPR_BOOKE_IVOR12
] = sregs
.u
.e
.ivor_low
[12];
618 env
->spr
[SPR_BOOKE_IVOR13
] = sregs
.u
.e
.ivor_low
[13];
619 env
->spr
[SPR_BOOKE_IVOR14
] = sregs
.u
.e
.ivor_low
[14];
620 env
->spr
[SPR_BOOKE_IVOR15
] = sregs
.u
.e
.ivor_low
[15];
622 if (sregs
.u
.e
.features
& KVM_SREGS_E_SPE
) {
623 env
->spr
[SPR_BOOKE_IVOR32
] = sregs
.u
.e
.ivor_high
[0];
624 env
->spr
[SPR_BOOKE_IVOR33
] = sregs
.u
.e
.ivor_high
[1];
625 env
->spr
[SPR_BOOKE_IVOR34
] = sregs
.u
.e
.ivor_high
[2];
628 if (sregs
.u
.e
.features
& KVM_SREGS_E_PM
) {
629 env
->spr
[SPR_BOOKE_IVOR35
] = sregs
.u
.e
.ivor_high
[3];
632 if (sregs
.u
.e
.features
& KVM_SREGS_E_PC
) {
633 env
->spr
[SPR_BOOKE_IVOR36
] = sregs
.u
.e
.ivor_high
[4];
634 env
->spr
[SPR_BOOKE_IVOR37
] = sregs
.u
.e
.ivor_high
[5];
638 if (sregs
.u
.e
.features
& KVM_SREGS_E_ARCH206_MMU
) {
639 env
->spr
[SPR_BOOKE_MAS0
] = sregs
.u
.e
.mas0
;
640 env
->spr
[SPR_BOOKE_MAS1
] = sregs
.u
.e
.mas1
;
641 env
->spr
[SPR_BOOKE_MAS2
] = sregs
.u
.e
.mas2
;
642 env
->spr
[SPR_BOOKE_MAS3
] = sregs
.u
.e
.mas7_3
& 0xffffffff;
643 env
->spr
[SPR_BOOKE_MAS4
] = sregs
.u
.e
.mas4
;
644 env
->spr
[SPR_BOOKE_MAS6
] = sregs
.u
.e
.mas6
;
645 env
->spr
[SPR_BOOKE_MAS7
] = sregs
.u
.e
.mas7_3
>> 32;
646 env
->spr
[SPR_MMUCFG
] = sregs
.u
.e
.mmucfg
;
647 env
->spr
[SPR_BOOKE_TLB0CFG
] = sregs
.u
.e
.tlbcfg
[0];
648 env
->spr
[SPR_BOOKE_TLB1CFG
] = sregs
.u
.e
.tlbcfg
[1];
651 if (sregs
.u
.e
.features
& KVM_SREGS_EXP
) {
652 env
->spr
[SPR_BOOKE_EPR
] = sregs
.u
.e
.epr
;
655 if (sregs
.u
.e
.features
& KVM_SREGS_E_PD
) {
656 env
->spr
[SPR_BOOKE_EPLC
] = sregs
.u
.e
.eplc
;
657 env
->spr
[SPR_BOOKE_EPSC
] = sregs
.u
.e
.epsc
;
660 if (sregs
.u
.e
.impl_id
== KVM_SREGS_E_IMPL_FSL
) {
661 env
->spr
[SPR_E500_SVR
] = sregs
.u
.e
.impl
.fsl
.svr
;
662 env
->spr
[SPR_Exxx_MCAR
] = sregs
.u
.e
.impl
.fsl
.mcar
;
663 env
->spr
[SPR_HID0
] = sregs
.u
.e
.impl
.fsl
.hid0
;
665 if (sregs
.u
.e
.impl
.fsl
.features
& KVM_SREGS_E_FSL_PIDn
) {
666 env
->spr
[SPR_BOOKE_PID1
] = sregs
.u
.e
.impl
.fsl
.pid1
;
667 env
->spr
[SPR_BOOKE_PID2
] = sregs
.u
.e
.impl
.fsl
.pid2
;
673 ret
= kvm_vcpu_ioctl(env
, KVM_GET_SREGS
, &sregs
);
678 ppc_store_sdr1(env
, sregs
.u
.s
.sdr1
);
682 for (i
= 0; i
< 64; i
++) {
683 ppc_store_slb(env
, sregs
.u
.s
.ppc64
.slb
[i
].slbe
,
684 sregs
.u
.s
.ppc64
.slb
[i
].slbv
);
689 for (i
= 0; i
< 16; i
++) {
690 env
->sr
[i
] = sregs
.u
.s
.ppc32
.sr
[i
];
694 for (i
= 0; i
< 8; i
++) {
695 env
->DBAT
[0][i
] = sregs
.u
.s
.ppc32
.dbat
[i
] & 0xffffffff;
696 env
->DBAT
[1][i
] = sregs
.u
.s
.ppc32
.dbat
[i
] >> 32;
697 env
->IBAT
[0][i
] = sregs
.u
.s
.ppc32
.ibat
[i
] & 0xffffffff;
698 env
->IBAT
[1][i
] = sregs
.u
.s
.ppc32
.ibat
[i
] >> 32;
705 int kvmppc_set_interrupt(CPUPPCState
*env
, int irq
, int level
)
707 unsigned virq
= level
? KVM_INTERRUPT_SET_LEVEL
: KVM_INTERRUPT_UNSET
;
709 if (irq
!= PPC_INTERRUPT_EXT
) {
713 if (!kvm_enabled() || !cap_interrupt_unset
|| !cap_interrupt_level
) {
717 kvm_vcpu_ioctl(env
, KVM_INTERRUPT
, &virq
);
722 #if defined(TARGET_PPCEMB)
723 #define PPC_INPUT_INT PPC40x_INPUT_INT
724 #elif defined(TARGET_PPC64)
725 #define PPC_INPUT_INT PPC970_INPUT_INT
727 #define PPC_INPUT_INT PPC6xx_INPUT_INT
730 void kvm_arch_pre_run(CPUPPCState
*env
, struct kvm_run
*run
)
735 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
736 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
737 if (!cap_interrupt_level
&&
738 run
->ready_for_interrupt_injection
&&
739 (env
->interrupt_request
& CPU_INTERRUPT_HARD
) &&
740 (env
->irq_input_state
& (1<<PPC_INPUT_INT
)))
742 /* For now KVM disregards the 'irq' argument. However, in the
743 * future KVM could cache it in-kernel to avoid a heavyweight exit
744 * when reading the UIC.
746 irq
= KVM_INTERRUPT_SET
;
748 dprintf("injected interrupt %d\n", irq
);
749 r
= kvm_vcpu_ioctl(env
, KVM_INTERRUPT
, &irq
);
751 printf("cpu %d fail inject %x\n", env
->cpu_index
, irq
);
753 /* Always wake up soon in case the interrupt was level based */
754 qemu_mod_timer(idle_timer
, qemu_get_clock_ns(vm_clock
) +
755 (get_ticks_per_sec() / 50));
758 /* We don't know if there are more interrupts pending after this. However,
759 * the guest will return to userspace in the course of handling this one
760 * anyways, so we will get a chance to deliver the rest. */
763 void kvm_arch_post_run(CPUPPCState
*env
, struct kvm_run
*run
)
767 int kvm_arch_process_async_events(CPUPPCState
*env
)
772 static int kvmppc_handle_halt(CPUPPCState
*env
)
774 if (!(env
->interrupt_request
& CPU_INTERRUPT_HARD
) && (msr_ee
)) {
776 env
->exception_index
= EXCP_HLT
;
782 /* map dcr access to existing qemu dcr emulation */
783 static int kvmppc_handle_dcr_read(CPUPPCState
*env
, uint32_t dcrn
, uint32_t *data
)
785 if (ppc_dcr_read(env
->dcr_env
, dcrn
, data
) < 0)
786 fprintf(stderr
, "Read to unhandled DCR (0x%x)\n", dcrn
);
791 static int kvmppc_handle_dcr_write(CPUPPCState
*env
, uint32_t dcrn
, uint32_t data
)
793 if (ppc_dcr_write(env
->dcr_env
, dcrn
, data
) < 0)
794 fprintf(stderr
, "Write to unhandled DCR (0x%x)\n", dcrn
);
799 int kvm_arch_handle_exit(CPUPPCState
*env
, struct kvm_run
*run
)
803 switch (run
->exit_reason
) {
805 if (run
->dcr
.is_write
) {
806 dprintf("handle dcr write\n");
807 ret
= kvmppc_handle_dcr_write(env
, run
->dcr
.dcrn
, run
->dcr
.data
);
809 dprintf("handle dcr read\n");
810 ret
= kvmppc_handle_dcr_read(env
, run
->dcr
.dcrn
, &run
->dcr
.data
);
814 dprintf("handle halt\n");
815 ret
= kvmppc_handle_halt(env
);
817 #ifdef CONFIG_PSERIES
818 case KVM_EXIT_PAPR_HCALL
:
819 dprintf("handle PAPR hypercall\n");
820 run
->papr_hcall
.ret
= spapr_hypercall(ppc_env_get_cpu(env
),
822 run
->papr_hcall
.args
);
827 fprintf(stderr
, "KVM: unknown exit reason %d\n", run
->exit_reason
);
835 static int read_cpuinfo(const char *field
, char *value
, int len
)
839 int field_len
= strlen(field
);
842 f
= fopen("/proc/cpuinfo", "r");
848 if(!fgets(line
, sizeof(line
), f
)) {
851 if (!strncmp(line
, field
, field_len
)) {
852 pstrcpy(value
, len
, line
);
863 uint32_t kvmppc_get_tbfreq(void)
867 uint32_t retval
= get_ticks_per_sec();
869 if (read_cpuinfo("timebase", line
, sizeof(line
))) {
873 if (!(ns
= strchr(line
, ':'))) {
883 /* Try to find a device tree node for a CPU with clock-frequency property */
884 static int kvmppc_find_cpu_dt(char *buf
, int buf_len
)
889 if ((dp
= opendir(PROC_DEVTREE_CPU
)) == NULL
) {
890 printf("Can't open directory " PROC_DEVTREE_CPU
"\n");
895 while ((dirp
= readdir(dp
)) != NULL
) {
897 snprintf(buf
, buf_len
, "%s%s/clock-frequency", PROC_DEVTREE_CPU
,
901 snprintf(buf
, buf_len
, "%s%s", PROC_DEVTREE_CPU
, dirp
->d_name
);
908 if (buf
[0] == '\0') {
909 printf("Unknown host!\n");
916 /* Read a CPU node property from the host device tree that's a single
917 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
918 * (can't find or open the property, or doesn't understand the
920 static uint64_t kvmppc_read_int_cpu_dt(const char *propname
)
930 if (kvmppc_find_cpu_dt(buf
, sizeof(buf
))) {
934 strncat(buf
, "/", sizeof(buf
) - strlen(buf
));
935 strncat(buf
, propname
, sizeof(buf
) - strlen(buf
));
937 f
= fopen(buf
, "rb");
942 len
= fread(&u
, 1, sizeof(u
), f
);
946 /* property is a 32-bit quantity */
947 return be32_to_cpu(u
.v32
);
949 return be64_to_cpu(u
.v64
);
955 uint64_t kvmppc_get_clockfreq(void)
957 return kvmppc_read_int_cpu_dt("clock-frequency");
960 uint32_t kvmppc_get_vmx(void)
962 return kvmppc_read_int_cpu_dt("ibm,vmx");
965 uint32_t kvmppc_get_dfp(void)
967 return kvmppc_read_int_cpu_dt("ibm,dfp");
970 int kvmppc_get_hypercall(CPUPPCState
*env
, uint8_t *buf
, int buf_len
)
972 uint32_t *hc
= (uint32_t*)buf
;
974 struct kvm_ppc_pvinfo pvinfo
;
976 if (kvm_check_extension(env
->kvm_state
, KVM_CAP_PPC_GET_PVINFO
) &&
977 !kvm_vm_ioctl(env
->kvm_state
, KVM_PPC_GET_PVINFO
, &pvinfo
)) {
978 memcpy(buf
, pvinfo
.hcall
, buf_len
);
984 * Fallback to always fail hypercalls:
1000 void kvmppc_set_papr(CPUPPCState
*env
)
1002 struct kvm_enable_cap cap
= {};
1005 cap
.cap
= KVM_CAP_PPC_PAPR
;
1006 ret
= kvm_vcpu_ioctl(env
, KVM_ENABLE_CAP
, &cap
);
1009 cpu_abort(env
, "This KVM version does not support PAPR\n");
1013 int kvmppc_smt_threads(void)
1015 return cap_ppc_smt
? cap_ppc_smt
: 1;
1019 off_t
kvmppc_alloc_rma(const char *name
, MemoryRegion
*sysmem
)
1024 struct kvm_allocate_rma ret
;
1025 MemoryRegion
*rma_region
;
1027 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
1028 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
1029 * not necessary on this hardware
1030 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
1032 * FIXME: We should allow the user to force contiguous RMA
1033 * allocation in the cap_ppc_rma==1 case.
1035 if (cap_ppc_rma
< 2) {
1039 fd
= kvm_vm_ioctl(kvm_state
, KVM_ALLOCATE_RMA
, &ret
);
1041 fprintf(stderr
, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
1046 size
= MIN(ret
.rma_size
, 256ul << 20);
1048 rma
= mmap(NULL
, size
, PROT_READ
|PROT_WRITE
, MAP_SHARED
, fd
, 0);
1049 if (rma
== MAP_FAILED
) {
1050 fprintf(stderr
, "KVM: Error mapping RMA: %s\n", strerror(errno
));
1054 rma_region
= g_new(MemoryRegion
, 1);
1055 memory_region_init_ram_ptr(rma_region
, name
, size
, rma
);
1056 vmstate_register_ram_global(rma_region
);
1057 memory_region_add_subregion(sysmem
, 0, rma_region
);
1062 uint64_t kvmppc_rma_size(uint64_t current_size
, unsigned int hash_shift
)
1064 if (cap_ppc_rma
>= 2) {
1065 return current_size
;
1067 return MIN(current_size
,
1068 getrampagesize() << (hash_shift
- 7));
1072 void *kvmppc_create_spapr_tce(uint32_t liobn
, uint32_t window_size
, int *pfd
)
1074 struct kvm_create_spapr_tce args
= {
1076 .window_size
= window_size
,
1082 /* Must set fd to -1 so we don't try to munmap when called for
1083 * destroying the table, which the upper layers -will- do
1086 if (!cap_spapr_tce
) {
1090 fd
= kvm_vm_ioctl(kvm_state
, KVM_CREATE_SPAPR_TCE
, &args
);
1092 fprintf(stderr
, "KVM: Failed to create TCE table for liobn 0x%x\n",
1097 len
= (window_size
/ SPAPR_TCE_PAGE_SIZE
) * sizeof(sPAPRTCE
);
1098 /* FIXME: round this up to page size */
1100 table
= mmap(NULL
, len
, PROT_READ
|PROT_WRITE
, MAP_SHARED
, fd
, 0);
1101 if (table
== MAP_FAILED
) {
1102 fprintf(stderr
, "KVM: Failed to map TCE table for liobn 0x%x\n",
1112 int kvmppc_remove_spapr_tce(void *table
, int fd
, uint32_t window_size
)
1120 len
= (window_size
/ SPAPR_TCE_PAGE_SIZE
)*sizeof(sPAPRTCE
);
1121 if ((munmap(table
, len
) < 0) ||
1123 fprintf(stderr
, "KVM: Unexpected error removing TCE table: %s",
1125 /* Leak the table */
1131 int kvmppc_reset_htab(int shift_hint
)
1133 uint32_t shift
= shift_hint
;
1135 if (!kvm_enabled()) {
1136 /* Full emulation, tell caller to allocate htab itself */
1139 if (kvm_check_extension(kvm_state
, KVM_CAP_PPC_ALLOC_HTAB
)) {
1141 ret
= kvm_vm_ioctl(kvm_state
, KVM_PPC_ALLOCATE_HTAB
, &shift
);
1142 if (ret
== -ENOTTY
) {
1143 /* At least some versions of PR KVM advertise the
1144 * capability, but don't implement the ioctl(). Oops.
1145 * Return 0 so that we allocate the htab in qemu, as is
1146 * correct for PR. */
1148 } else if (ret
< 0) {
1154 /* We have a kernel that predates the htab reset calls. For PR
1155 * KVM, we need to allocate the htab ourselves, for an HV KVM of
1156 * this era, it has allocated a 16MB fixed size hash table
1157 * already. Kernels of this era have the GET_PVINFO capability
1158 * only on PR, so we use this hack to determine the right
1160 if (kvm_check_extension(kvm_state
, KVM_CAP_PPC_GET_PVINFO
)) {
1161 /* PR - tell caller to allocate htab */
1164 /* HV - assume 16MB kernel allocated htab */
1169 static inline uint32_t mfpvr(void)
1178 static void alter_insns(uint64_t *word
, uint64_t flags
, bool on
)
1187 const ppc_def_t
*kvmppc_host_cpu_def(void)
1189 uint32_t host_pvr
= mfpvr();
1190 const ppc_def_t
*base_spec
;
1192 uint32_t vmx
= kvmppc_get_vmx();
1193 uint32_t dfp
= kvmppc_get_dfp();
1195 base_spec
= ppc_find_by_pvr(host_pvr
);
1197 spec
= g_malloc0(sizeof(*spec
));
1198 memcpy(spec
, base_spec
, sizeof(*spec
));
1200 /* Now fix up the spec with information we can query from the host */
1203 /* Only override when we know what the host supports */
1204 alter_insns(&spec
->insns_flags
, PPC_ALTIVEC
, vmx
> 0);
1205 alter_insns(&spec
->insns_flags2
, PPC2_VSX
, vmx
> 1);
1208 /* Only override when we know what the host supports */
1209 alter_insns(&spec
->insns_flags2
, PPC2_DFP
, dfp
);
1215 int kvmppc_fixup_cpu(CPUPPCState
*env
)
1219 /* Adjust cpu index for SMT */
1220 smt
= kvmppc_smt_threads();
1221 env
->cpu_index
= (env
->cpu_index
/ smp_threads
) * smt
1222 + (env
->cpu_index
% smp_threads
);
1228 bool kvm_arch_stop_on_emulation_error(CPUPPCState
*env
)
1233 int kvm_arch_on_sigbus_vcpu(CPUPPCState
*env
, int code
, void *addr
)
1238 int kvm_arch_on_sigbus(int code
, void *addr
)