2 * PowerPC implementation of KVM hooks
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
18 #include <sys/types.h>
19 #include <sys/ioctl.h>
23 #include <linux/kvm.h>
25 #include "qemu-common.h"
26 #include "qemu-timer.h"
32 #include "device_tree.h"
33 #include "hw/sysbus.h"
36 #include "hw/sysbus.h"
38 #include "hw/spapr_vio.h"
43 #define dprintf(fmt, ...) \
44 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
46 #define dprintf(fmt, ...) \
50 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
52 const KVMCapabilityInfo kvm_arch_required_capabilities
[] = {
56 static int cap_interrupt_unset
= false;
57 static int cap_interrupt_level
= false;
58 static int cap_segstate
;
59 static int cap_booke_sregs
;
60 static int cap_ppc_smt
;
61 static int cap_ppc_rma
;
62 static int cap_spapr_tce
;
64 /* XXX We have a race condition where we actually have a level triggered
65 * interrupt, but the infrastructure can't expose that yet, so the guest
66 * takes but ignores it, goes to sleep and never gets notified that there's
67 * still an interrupt pending.
69 * As a quick workaround, let's just wake up again 20 ms after we injected
70 * an interrupt. That way we can assure that we're always reinjecting
71 * interrupts in case the guest swallowed them.
73 static QEMUTimer
*idle_timer
;
75 static void kvm_kick_env(void *env
)
80 int kvm_arch_init(KVMState
*s
)
82 cap_interrupt_unset
= kvm_check_extension(s
, KVM_CAP_PPC_UNSET_IRQ
);
83 cap_interrupt_level
= kvm_check_extension(s
, KVM_CAP_PPC_IRQ_LEVEL
);
84 cap_segstate
= kvm_check_extension(s
, KVM_CAP_PPC_SEGSTATE
);
85 cap_booke_sregs
= kvm_check_extension(s
, KVM_CAP_PPC_BOOKE_SREGS
);
86 cap_ppc_smt
= kvm_check_extension(s
, KVM_CAP_PPC_SMT
);
87 cap_ppc_rma
= kvm_check_extension(s
, KVM_CAP_PPC_RMA
);
88 cap_spapr_tce
= kvm_check_extension(s
, KVM_CAP_SPAPR_TCE
);
90 if (!cap_interrupt_level
) {
91 fprintf(stderr
, "KVM: Couldn't find level irq capability. Expect the "
92 "VM to stall at times!\n");
98 static int kvm_arch_sync_sregs(CPUPPCState
*cenv
)
100 struct kvm_sregs sregs
;
103 if (cenv
->excp_model
== POWERPC_EXCP_BOOKE
) {
104 /* What we're really trying to say is "if we're on BookE, we use
105 the native PVR for now". This is the only sane way to check
106 it though, so we potentially confuse users that they can run
107 BookE guests on BookS. Let's hope nobody dares enough :) */
111 fprintf(stderr
, "kvm error: missing PVR setting capability\n");
116 ret
= kvm_vcpu_ioctl(cenv
, KVM_GET_SREGS
, &sregs
);
121 sregs
.pvr
= cenv
->spr
[SPR_PVR
];
122 return kvm_vcpu_ioctl(cenv
, KVM_SET_SREGS
, &sregs
);
125 /* Set up a shared TLB array with KVM */
126 static int kvm_booke206_tlb_init(CPUPPCState
*env
)
128 struct kvm_book3e_206_tlb_params params
= {};
129 struct kvm_config_tlb cfg
= {};
130 struct kvm_enable_cap encap
= {};
131 unsigned int entries
= 0;
134 if (!kvm_enabled() ||
135 !kvm_check_extension(env
->kvm_state
, KVM_CAP_SW_TLB
)) {
139 assert(ARRAY_SIZE(params
.tlb_sizes
) == BOOKE206_MAX_TLBN
);
141 for (i
= 0; i
< BOOKE206_MAX_TLBN
; i
++) {
142 params
.tlb_sizes
[i
] = booke206_tlb_size(env
, i
);
143 params
.tlb_ways
[i
] = booke206_tlb_ways(env
, i
);
144 entries
+= params
.tlb_sizes
[i
];
147 assert(entries
== env
->nb_tlb
);
148 assert(sizeof(struct kvm_book3e_206_tlb_entry
) == sizeof(ppcmas_tlb_t
));
150 env
->tlb_dirty
= true;
152 cfg
.array
= (uintptr_t)env
->tlb
.tlbm
;
153 cfg
.array_len
= sizeof(ppcmas_tlb_t
) * entries
;
154 cfg
.params
= (uintptr_t)¶ms
;
155 cfg
.mmu_type
= KVM_MMU_FSL_BOOKE_NOHV
;
157 encap
.cap
= KVM_CAP_SW_TLB
;
158 encap
.args
[0] = (uintptr_t)&cfg
;
160 ret
= kvm_vcpu_ioctl(env
, KVM_ENABLE_CAP
, &encap
);
162 fprintf(stderr
, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
163 __func__
, strerror(-ret
));
167 env
->kvm_sw_tlb
= true;
172 #if defined(TARGET_PPC64)
173 static void kvm_get_fallback_smmu_info(CPUPPCState
*env
,
174 struct kvm_ppc_smmu_info
*info
)
176 memset(info
, 0, sizeof(*info
));
178 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
179 * need to "guess" what the supported page sizes are.
181 * For that to work we make a few assumptions:
183 * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
184 * KVM which only supports 4K and 16M pages, but supports them
185 * regardless of the backing store characteritics. We also don't
186 * support 1T segments.
188 * This is safe as if HV KVM ever supports that capability or PR
189 * KVM grows supports for more page/segment sizes, those versions
190 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
191 * will not hit this fallback
193 * - Else we are running HV KVM. This means we only support page
194 * sizes that fit in the backing store. Additionally we only
195 * advertize 64K pages if the processor is ARCH 2.06 and we assume
196 * P7 encodings for the SLB and hash table. Here too, we assume
197 * support for any newer processor will mean a kernel that
198 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
201 if (kvm_check_extension(env
->kvm_state
, KVM_CAP_PPC_GET_PVINFO
)) {
206 /* Standard 4k base page size segment */
207 info
->sps
[0].page_shift
= 12;
208 info
->sps
[0].slb_enc
= 0;
209 info
->sps
[0].enc
[0].page_shift
= 12;
210 info
->sps
[0].enc
[0].pte_enc
= 0;
212 /* Standard 16M large page size segment */
213 info
->sps
[1].page_shift
= 24;
214 info
->sps
[1].slb_enc
= SLB_VSID_L
;
215 info
->sps
[1].enc
[0].page_shift
= 24;
216 info
->sps
[1].enc
[0].pte_enc
= 0;
220 /* HV KVM has backing store size restrictions */
221 info
->flags
= KVM_PPC_PAGE_SIZES_REAL
;
223 if (env
->mmu_model
& POWERPC_MMU_1TSEG
) {
224 info
->flags
|= KVM_PPC_1T_SEGMENTS
;
227 if (env
->mmu_model
== POWERPC_MMU_2_06
) {
233 /* Standard 4k base page size segment */
234 info
->sps
[i
].page_shift
= 12;
235 info
->sps
[i
].slb_enc
= 0;
236 info
->sps
[i
].enc
[0].page_shift
= 12;
237 info
->sps
[i
].enc
[0].pte_enc
= 0;
240 /* 64K on MMU 2.06 */
241 if (env
->mmu_model
== POWERPC_MMU_2_06
) {
242 info
->sps
[i
].page_shift
= 16;
243 info
->sps
[i
].slb_enc
= 0x110;
244 info
->sps
[i
].enc
[0].page_shift
= 16;
245 info
->sps
[i
].enc
[0].pte_enc
= 1;
249 /* Standard 16M large page size segment */
250 info
->sps
[i
].page_shift
= 24;
251 info
->sps
[i
].slb_enc
= SLB_VSID_L
;
252 info
->sps
[i
].enc
[0].page_shift
= 24;
253 info
->sps
[i
].enc
[0].pte_enc
= 0;
257 static void kvm_get_smmu_info(CPUPPCState
*env
, struct kvm_ppc_smmu_info
*info
)
261 if (kvm_check_extension(env
->kvm_state
, KVM_CAP_PPC_GET_SMMU_INFO
)) {
262 ret
= kvm_vm_ioctl(env
->kvm_state
, KVM_PPC_GET_SMMU_INFO
, info
);
268 kvm_get_fallback_smmu_info(env
, info
);
271 static long getrampagesize(void)
277 /* guest RAM is backed by normal anonymous pages */
278 return getpagesize();
282 ret
= statfs(mem_path
, &fs
);
283 } while (ret
!= 0 && errno
== EINTR
);
286 fprintf(stderr
, "Couldn't statfs() memory path: %s\n",
291 #define HUGETLBFS_MAGIC 0x958458f6
293 if (fs
.f_type
!= HUGETLBFS_MAGIC
) {
294 /* Explicit mempath, but it's ordinary pages */
295 return getpagesize();
298 /* It's hugepage, return the huge page size */
302 static bool kvm_valid_page_size(uint32_t flags
, long rampgsize
, uint32_t shift
)
304 if (!(flags
& KVM_PPC_PAGE_SIZES_REAL
)) {
308 return (1ul << shift
) <= rampgsize
;
311 static void kvm_fixup_page_sizes(CPUPPCState
*env
)
313 static struct kvm_ppc_smmu_info smmu_info
;
314 static bool has_smmu_info
;
318 /* We only handle page sizes for 64-bit server guests for now */
319 if (!(env
->mmu_model
& POWERPC_MMU_64
)) {
323 /* Collect MMU info from kernel if not already */
324 if (!has_smmu_info
) {
325 kvm_get_smmu_info(env
, &smmu_info
);
326 has_smmu_info
= true;
329 rampagesize
= getrampagesize();
331 /* Convert to QEMU form */
332 memset(&env
->sps
, 0, sizeof(env
->sps
));
334 for (ik
= iq
= 0; ik
< KVM_PPC_PAGE_SIZES_MAX_SZ
; ik
++) {
335 struct ppc_one_seg_page_size
*qsps
= &env
->sps
.sps
[iq
];
336 struct kvm_ppc_one_seg_page_size
*ksps
= &smmu_info
.sps
[ik
];
338 if (!kvm_valid_page_size(smmu_info
.flags
, rampagesize
,
342 qsps
->page_shift
= ksps
->page_shift
;
343 qsps
->slb_enc
= ksps
->slb_enc
;
344 for (jk
= jq
= 0; jk
< KVM_PPC_PAGE_SIZES_MAX_SZ
; jk
++) {
345 if (!kvm_valid_page_size(smmu_info
.flags
, rampagesize
,
346 ksps
->enc
[jk
].page_shift
)) {
349 qsps
->enc
[jq
].page_shift
= ksps
->enc
[jk
].page_shift
;
350 qsps
->enc
[jq
].pte_enc
= ksps
->enc
[jk
].pte_enc
;
351 if (++jq
>= PPC_PAGE_SIZES_MAX_SZ
) {
355 if (++iq
>= PPC_PAGE_SIZES_MAX_SZ
) {
359 env
->slb_nr
= smmu_info
.slb_size
;
360 if (smmu_info
.flags
& KVM_PPC_1T_SEGMENTS
) {
361 env
->mmu_model
|= POWERPC_MMU_1TSEG
;
363 env
->mmu_model
&= ~POWERPC_MMU_1TSEG
;
366 #else /* defined (TARGET_PPC64) */
368 static inline void kvm_fixup_page_sizes(CPUPPCState
*env
)
372 #endif /* !defined (TARGET_PPC64) */
374 int kvm_arch_init_vcpu(CPUPPCState
*cenv
)
378 /* Gather server mmu info from KVM and update the CPU state */
379 kvm_fixup_page_sizes(cenv
);
381 /* Synchronize sregs with kvm */
382 ret
= kvm_arch_sync_sregs(cenv
);
387 idle_timer
= qemu_new_timer_ns(vm_clock
, kvm_kick_env
, cenv
);
389 /* Some targets support access to KVM's guest TLB. */
390 switch (cenv
->mmu_model
) {
391 case POWERPC_MMU_BOOKE206
:
392 ret
= kvm_booke206_tlb_init(cenv
);
401 void kvm_arch_reset_vcpu(CPUPPCState
*env
)
405 static void kvm_sw_tlb_put(CPUPPCState
*env
)
407 struct kvm_dirty_tlb dirty_tlb
;
408 unsigned char *bitmap
;
411 if (!env
->kvm_sw_tlb
) {
415 bitmap
= g_malloc((env
->nb_tlb
+ 7) / 8);
416 memset(bitmap
, 0xFF, (env
->nb_tlb
+ 7) / 8);
418 dirty_tlb
.bitmap
= (uintptr_t)bitmap
;
419 dirty_tlb
.num_dirty
= env
->nb_tlb
;
421 ret
= kvm_vcpu_ioctl(env
, KVM_DIRTY_TLB
, &dirty_tlb
);
423 fprintf(stderr
, "%s: KVM_DIRTY_TLB: %s\n",
424 __func__
, strerror(-ret
));
430 int kvm_arch_put_registers(CPUPPCState
*env
, int level
)
432 struct kvm_regs regs
;
436 ret
= kvm_vcpu_ioctl(env
, KVM_GET_REGS
, ®s
);
446 regs
.srr0
= env
->spr
[SPR_SRR0
];
447 regs
.srr1
= env
->spr
[SPR_SRR1
];
449 regs
.sprg0
= env
->spr
[SPR_SPRG0
];
450 regs
.sprg1
= env
->spr
[SPR_SPRG1
];
451 regs
.sprg2
= env
->spr
[SPR_SPRG2
];
452 regs
.sprg3
= env
->spr
[SPR_SPRG3
];
453 regs
.sprg4
= env
->spr
[SPR_SPRG4
];
454 regs
.sprg5
= env
->spr
[SPR_SPRG5
];
455 regs
.sprg6
= env
->spr
[SPR_SPRG6
];
456 regs
.sprg7
= env
->spr
[SPR_SPRG7
];
458 regs
.pid
= env
->spr
[SPR_BOOKE_PID
];
460 for (i
= 0;i
< 32; i
++)
461 regs
.gpr
[i
] = env
->gpr
[i
];
463 ret
= kvm_vcpu_ioctl(env
, KVM_SET_REGS
, ®s
);
467 if (env
->tlb_dirty
) {
469 env
->tlb_dirty
= false;
475 int kvm_arch_get_registers(CPUPPCState
*env
)
477 struct kvm_regs regs
;
478 struct kvm_sregs sregs
;
482 ret
= kvm_vcpu_ioctl(env
, KVM_GET_REGS
, ®s
);
487 for (i
= 7; i
>= 0; i
--) {
488 env
->crf
[i
] = cr
& 15;
498 env
->spr
[SPR_SRR0
] = regs
.srr0
;
499 env
->spr
[SPR_SRR1
] = regs
.srr1
;
501 env
->spr
[SPR_SPRG0
] = regs
.sprg0
;
502 env
->spr
[SPR_SPRG1
] = regs
.sprg1
;
503 env
->spr
[SPR_SPRG2
] = regs
.sprg2
;
504 env
->spr
[SPR_SPRG3
] = regs
.sprg3
;
505 env
->spr
[SPR_SPRG4
] = regs
.sprg4
;
506 env
->spr
[SPR_SPRG5
] = regs
.sprg5
;
507 env
->spr
[SPR_SPRG6
] = regs
.sprg6
;
508 env
->spr
[SPR_SPRG7
] = regs
.sprg7
;
510 env
->spr
[SPR_BOOKE_PID
] = regs
.pid
;
512 for (i
= 0;i
< 32; i
++)
513 env
->gpr
[i
] = regs
.gpr
[i
];
515 if (cap_booke_sregs
) {
516 ret
= kvm_vcpu_ioctl(env
, KVM_GET_SREGS
, &sregs
);
521 if (sregs
.u
.e
.features
& KVM_SREGS_E_BASE
) {
522 env
->spr
[SPR_BOOKE_CSRR0
] = sregs
.u
.e
.csrr0
;
523 env
->spr
[SPR_BOOKE_CSRR1
] = sregs
.u
.e
.csrr1
;
524 env
->spr
[SPR_BOOKE_ESR
] = sregs
.u
.e
.esr
;
525 env
->spr
[SPR_BOOKE_DEAR
] = sregs
.u
.e
.dear
;
526 env
->spr
[SPR_BOOKE_MCSR
] = sregs
.u
.e
.mcsr
;
527 env
->spr
[SPR_BOOKE_TSR
] = sregs
.u
.e
.tsr
;
528 env
->spr
[SPR_BOOKE_TCR
] = sregs
.u
.e
.tcr
;
529 env
->spr
[SPR_DECR
] = sregs
.u
.e
.dec
;
530 env
->spr
[SPR_TBL
] = sregs
.u
.e
.tb
& 0xffffffff;
531 env
->spr
[SPR_TBU
] = sregs
.u
.e
.tb
>> 32;
532 env
->spr
[SPR_VRSAVE
] = sregs
.u
.e
.vrsave
;
535 if (sregs
.u
.e
.features
& KVM_SREGS_E_ARCH206
) {
536 env
->spr
[SPR_BOOKE_PIR
] = sregs
.u
.e
.pir
;
537 env
->spr
[SPR_BOOKE_MCSRR0
] = sregs
.u
.e
.mcsrr0
;
538 env
->spr
[SPR_BOOKE_MCSRR1
] = sregs
.u
.e
.mcsrr1
;
539 env
->spr
[SPR_BOOKE_DECAR
] = sregs
.u
.e
.decar
;
540 env
->spr
[SPR_BOOKE_IVPR
] = sregs
.u
.e
.ivpr
;
543 if (sregs
.u
.e
.features
& KVM_SREGS_E_64
) {
544 env
->spr
[SPR_BOOKE_EPCR
] = sregs
.u
.e
.epcr
;
547 if (sregs
.u
.e
.features
& KVM_SREGS_E_SPRG8
) {
548 env
->spr
[SPR_BOOKE_SPRG8
] = sregs
.u
.e
.sprg8
;
551 if (sregs
.u
.e
.features
& KVM_SREGS_E_IVOR
) {
552 env
->spr
[SPR_BOOKE_IVOR0
] = sregs
.u
.e
.ivor_low
[0];
553 env
->spr
[SPR_BOOKE_IVOR1
] = sregs
.u
.e
.ivor_low
[1];
554 env
->spr
[SPR_BOOKE_IVOR2
] = sregs
.u
.e
.ivor_low
[2];
555 env
->spr
[SPR_BOOKE_IVOR3
] = sregs
.u
.e
.ivor_low
[3];
556 env
->spr
[SPR_BOOKE_IVOR4
] = sregs
.u
.e
.ivor_low
[4];
557 env
->spr
[SPR_BOOKE_IVOR5
] = sregs
.u
.e
.ivor_low
[5];
558 env
->spr
[SPR_BOOKE_IVOR6
] = sregs
.u
.e
.ivor_low
[6];
559 env
->spr
[SPR_BOOKE_IVOR7
] = sregs
.u
.e
.ivor_low
[7];
560 env
->spr
[SPR_BOOKE_IVOR8
] = sregs
.u
.e
.ivor_low
[8];
561 env
->spr
[SPR_BOOKE_IVOR9
] = sregs
.u
.e
.ivor_low
[9];
562 env
->spr
[SPR_BOOKE_IVOR10
] = sregs
.u
.e
.ivor_low
[10];
563 env
->spr
[SPR_BOOKE_IVOR11
] = sregs
.u
.e
.ivor_low
[11];
564 env
->spr
[SPR_BOOKE_IVOR12
] = sregs
.u
.e
.ivor_low
[12];
565 env
->spr
[SPR_BOOKE_IVOR13
] = sregs
.u
.e
.ivor_low
[13];
566 env
->spr
[SPR_BOOKE_IVOR14
] = sregs
.u
.e
.ivor_low
[14];
567 env
->spr
[SPR_BOOKE_IVOR15
] = sregs
.u
.e
.ivor_low
[15];
569 if (sregs
.u
.e
.features
& KVM_SREGS_E_SPE
) {
570 env
->spr
[SPR_BOOKE_IVOR32
] = sregs
.u
.e
.ivor_high
[0];
571 env
->spr
[SPR_BOOKE_IVOR33
] = sregs
.u
.e
.ivor_high
[1];
572 env
->spr
[SPR_BOOKE_IVOR34
] = sregs
.u
.e
.ivor_high
[2];
575 if (sregs
.u
.e
.features
& KVM_SREGS_E_PM
) {
576 env
->spr
[SPR_BOOKE_IVOR35
] = sregs
.u
.e
.ivor_high
[3];
579 if (sregs
.u
.e
.features
& KVM_SREGS_E_PC
) {
580 env
->spr
[SPR_BOOKE_IVOR36
] = sregs
.u
.e
.ivor_high
[4];
581 env
->spr
[SPR_BOOKE_IVOR37
] = sregs
.u
.e
.ivor_high
[5];
585 if (sregs
.u
.e
.features
& KVM_SREGS_E_ARCH206_MMU
) {
586 env
->spr
[SPR_BOOKE_MAS0
] = sregs
.u
.e
.mas0
;
587 env
->spr
[SPR_BOOKE_MAS1
] = sregs
.u
.e
.mas1
;
588 env
->spr
[SPR_BOOKE_MAS2
] = sregs
.u
.e
.mas2
;
589 env
->spr
[SPR_BOOKE_MAS3
] = sregs
.u
.e
.mas7_3
& 0xffffffff;
590 env
->spr
[SPR_BOOKE_MAS4
] = sregs
.u
.e
.mas4
;
591 env
->spr
[SPR_BOOKE_MAS6
] = sregs
.u
.e
.mas6
;
592 env
->spr
[SPR_BOOKE_MAS7
] = sregs
.u
.e
.mas7_3
>> 32;
593 env
->spr
[SPR_MMUCFG
] = sregs
.u
.e
.mmucfg
;
594 env
->spr
[SPR_BOOKE_TLB0CFG
] = sregs
.u
.e
.tlbcfg
[0];
595 env
->spr
[SPR_BOOKE_TLB1CFG
] = sregs
.u
.e
.tlbcfg
[1];
598 if (sregs
.u
.e
.features
& KVM_SREGS_EXP
) {
599 env
->spr
[SPR_BOOKE_EPR
] = sregs
.u
.e
.epr
;
602 if (sregs
.u
.e
.features
& KVM_SREGS_E_PD
) {
603 env
->spr
[SPR_BOOKE_EPLC
] = sregs
.u
.e
.eplc
;
604 env
->spr
[SPR_BOOKE_EPSC
] = sregs
.u
.e
.epsc
;
607 if (sregs
.u
.e
.impl_id
== KVM_SREGS_E_IMPL_FSL
) {
608 env
->spr
[SPR_E500_SVR
] = sregs
.u
.e
.impl
.fsl
.svr
;
609 env
->spr
[SPR_Exxx_MCAR
] = sregs
.u
.e
.impl
.fsl
.mcar
;
610 env
->spr
[SPR_HID0
] = sregs
.u
.e
.impl
.fsl
.hid0
;
612 if (sregs
.u
.e
.impl
.fsl
.features
& KVM_SREGS_E_FSL_PIDn
) {
613 env
->spr
[SPR_BOOKE_PID1
] = sregs
.u
.e
.impl
.fsl
.pid1
;
614 env
->spr
[SPR_BOOKE_PID2
] = sregs
.u
.e
.impl
.fsl
.pid2
;
620 ret
= kvm_vcpu_ioctl(env
, KVM_GET_SREGS
, &sregs
);
625 ppc_store_sdr1(env
, sregs
.u
.s
.sdr1
);
629 for (i
= 0; i
< 64; i
++) {
630 ppc_store_slb(env
, sregs
.u
.s
.ppc64
.slb
[i
].slbe
,
631 sregs
.u
.s
.ppc64
.slb
[i
].slbv
);
636 for (i
= 0; i
< 16; i
++) {
637 env
->sr
[i
] = sregs
.u
.s
.ppc32
.sr
[i
];
641 for (i
= 0; i
< 8; i
++) {
642 env
->DBAT
[0][i
] = sregs
.u
.s
.ppc32
.dbat
[i
] & 0xffffffff;
643 env
->DBAT
[1][i
] = sregs
.u
.s
.ppc32
.dbat
[i
] >> 32;
644 env
->IBAT
[0][i
] = sregs
.u
.s
.ppc32
.ibat
[i
] & 0xffffffff;
645 env
->IBAT
[1][i
] = sregs
.u
.s
.ppc32
.ibat
[i
] >> 32;
652 int kvmppc_set_interrupt(CPUPPCState
*env
, int irq
, int level
)
654 unsigned virq
= level
? KVM_INTERRUPT_SET_LEVEL
: KVM_INTERRUPT_UNSET
;
656 if (irq
!= PPC_INTERRUPT_EXT
) {
660 if (!kvm_enabled() || !cap_interrupt_unset
|| !cap_interrupt_level
) {
664 kvm_vcpu_ioctl(env
, KVM_INTERRUPT
, &virq
);
669 #if defined(TARGET_PPCEMB)
670 #define PPC_INPUT_INT PPC40x_INPUT_INT
671 #elif defined(TARGET_PPC64)
672 #define PPC_INPUT_INT PPC970_INPUT_INT
674 #define PPC_INPUT_INT PPC6xx_INPUT_INT
677 void kvm_arch_pre_run(CPUPPCState
*env
, struct kvm_run
*run
)
682 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
683 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
684 if (!cap_interrupt_level
&&
685 run
->ready_for_interrupt_injection
&&
686 (env
->interrupt_request
& CPU_INTERRUPT_HARD
) &&
687 (env
->irq_input_state
& (1<<PPC_INPUT_INT
)))
689 /* For now KVM disregards the 'irq' argument. However, in the
690 * future KVM could cache it in-kernel to avoid a heavyweight exit
691 * when reading the UIC.
693 irq
= KVM_INTERRUPT_SET
;
695 dprintf("injected interrupt %d\n", irq
);
696 r
= kvm_vcpu_ioctl(env
, KVM_INTERRUPT
, &irq
);
698 printf("cpu %d fail inject %x\n", env
->cpu_index
, irq
);
700 /* Always wake up soon in case the interrupt was level based */
701 qemu_mod_timer(idle_timer
, qemu_get_clock_ns(vm_clock
) +
702 (get_ticks_per_sec() / 50));
705 /* We don't know if there are more interrupts pending after this. However,
706 * the guest will return to userspace in the course of handling this one
707 * anyways, so we will get a chance to deliver the rest. */
710 void kvm_arch_post_run(CPUPPCState
*env
, struct kvm_run
*run
)
714 int kvm_arch_process_async_events(CPUPPCState
*env
)
719 static int kvmppc_handle_halt(CPUPPCState
*env
)
721 if (!(env
->interrupt_request
& CPU_INTERRUPT_HARD
) && (msr_ee
)) {
723 env
->exception_index
= EXCP_HLT
;
729 /* map dcr access to existing qemu dcr emulation */
730 static int kvmppc_handle_dcr_read(CPUPPCState
*env
, uint32_t dcrn
, uint32_t *data
)
732 if (ppc_dcr_read(env
->dcr_env
, dcrn
, data
) < 0)
733 fprintf(stderr
, "Read to unhandled DCR (0x%x)\n", dcrn
);
738 static int kvmppc_handle_dcr_write(CPUPPCState
*env
, uint32_t dcrn
, uint32_t data
)
740 if (ppc_dcr_write(env
->dcr_env
, dcrn
, data
) < 0)
741 fprintf(stderr
, "Write to unhandled DCR (0x%x)\n", dcrn
);
746 int kvm_arch_handle_exit(CPUPPCState
*env
, struct kvm_run
*run
)
750 switch (run
->exit_reason
) {
752 if (run
->dcr
.is_write
) {
753 dprintf("handle dcr write\n");
754 ret
= kvmppc_handle_dcr_write(env
, run
->dcr
.dcrn
, run
->dcr
.data
);
756 dprintf("handle dcr read\n");
757 ret
= kvmppc_handle_dcr_read(env
, run
->dcr
.dcrn
, &run
->dcr
.data
);
761 dprintf("handle halt\n");
762 ret
= kvmppc_handle_halt(env
);
764 #ifdef CONFIG_PSERIES
765 case KVM_EXIT_PAPR_HCALL
:
766 dprintf("handle PAPR hypercall\n");
767 run
->papr_hcall
.ret
= spapr_hypercall(env
, run
->papr_hcall
.nr
,
768 run
->papr_hcall
.args
);
773 fprintf(stderr
, "KVM: unknown exit reason %d\n", run
->exit_reason
);
781 static int read_cpuinfo(const char *field
, char *value
, int len
)
785 int field_len
= strlen(field
);
788 f
= fopen("/proc/cpuinfo", "r");
794 if(!fgets(line
, sizeof(line
), f
)) {
797 if (!strncmp(line
, field
, field_len
)) {
798 strncpy(value
, line
, len
);
809 uint32_t kvmppc_get_tbfreq(void)
813 uint32_t retval
= get_ticks_per_sec();
815 if (read_cpuinfo("timebase", line
, sizeof(line
))) {
819 if (!(ns
= strchr(line
, ':'))) {
829 /* Try to find a device tree node for a CPU with clock-frequency property */
830 static int kvmppc_find_cpu_dt(char *buf
, int buf_len
)
835 if ((dp
= opendir(PROC_DEVTREE_CPU
)) == NULL
) {
836 printf("Can't open directory " PROC_DEVTREE_CPU
"\n");
841 while ((dirp
= readdir(dp
)) != NULL
) {
843 snprintf(buf
, buf_len
, "%s%s/clock-frequency", PROC_DEVTREE_CPU
,
847 snprintf(buf
, buf_len
, "%s%s", PROC_DEVTREE_CPU
, dirp
->d_name
);
854 if (buf
[0] == '\0') {
855 printf("Unknown host!\n");
862 /* Read a CPU node property from the host device tree that's a single
863 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
864 * (can't find or open the property, or doesn't understand the
866 static uint64_t kvmppc_read_int_cpu_dt(const char *propname
)
876 if (kvmppc_find_cpu_dt(buf
, sizeof(buf
))) {
880 strncat(buf
, "/", sizeof(buf
) - strlen(buf
));
881 strncat(buf
, propname
, sizeof(buf
) - strlen(buf
));
883 f
= fopen(buf
, "rb");
888 len
= fread(&u
, 1, sizeof(u
), f
);
892 /* property is a 32-bit quantity */
893 return be32_to_cpu(u
.v32
);
895 return be64_to_cpu(u
.v64
);
901 uint64_t kvmppc_get_clockfreq(void)
903 return kvmppc_read_int_cpu_dt("clock-frequency");
906 uint32_t kvmppc_get_vmx(void)
908 return kvmppc_read_int_cpu_dt("ibm,vmx");
911 uint32_t kvmppc_get_dfp(void)
913 return kvmppc_read_int_cpu_dt("ibm,dfp");
916 int kvmppc_get_hypercall(CPUPPCState
*env
, uint8_t *buf
, int buf_len
)
918 uint32_t *hc
= (uint32_t*)buf
;
920 struct kvm_ppc_pvinfo pvinfo
;
922 if (kvm_check_extension(env
->kvm_state
, KVM_CAP_PPC_GET_PVINFO
) &&
923 !kvm_vm_ioctl(env
->kvm_state
, KVM_PPC_GET_PVINFO
, &pvinfo
)) {
924 memcpy(buf
, pvinfo
.hcall
, buf_len
);
930 * Fallback to always fail hypercalls:
946 void kvmppc_set_papr(CPUPPCState
*env
)
948 struct kvm_enable_cap cap
= {};
949 struct kvm_one_reg reg
= {};
950 struct kvm_sregs sregs
= {};
952 uint64_t hior
= env
->spr
[SPR_HIOR
];
954 cap
.cap
= KVM_CAP_PPC_PAPR
;
955 ret
= kvm_vcpu_ioctl(env
, KVM_ENABLE_CAP
, &cap
);
962 * XXX We set HIOR here. It really should be a qdev property of
963 * the CPU node, but we don't have CPUs converted to qdev yet.
965 * Once we have qdev CPUs, move HIOR to a qdev property and
968 reg
.id
= KVM_REG_PPC_HIOR
;
969 reg
.addr
= (uintptr_t)&hior
;
970 ret
= kvm_vcpu_ioctl(env
, KVM_SET_ONE_REG
, ®
);
972 fprintf(stderr
, "Couldn't set HIOR. Maybe you're running an old \n"
973 "kernel with support for HV KVM but no PAPR PR \n"
974 "KVM in which case things will work. If they don't \n"
975 "please update your host kernel!\n");
978 /* Set SDR1 so kernel space finds the HTAB */
979 ret
= kvm_vcpu_ioctl(env
, KVM_GET_SREGS
, &sregs
);
984 sregs
.u
.s
.sdr1
= env
->spr
[SPR_SDR1
];
986 ret
= kvm_vcpu_ioctl(env
, KVM_SET_SREGS
, &sregs
);
994 cpu_abort(env
, "This KVM version does not support PAPR\n");
997 int kvmppc_smt_threads(void)
999 return cap_ppc_smt
? cap_ppc_smt
: 1;
1002 off_t
kvmppc_alloc_rma(const char *name
, MemoryRegion
*sysmem
)
1007 struct kvm_allocate_rma ret
;
1008 MemoryRegion
*rma_region
;
1010 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
1011 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
1012 * not necessary on this hardware
1013 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
1015 * FIXME: We should allow the user to force contiguous RMA
1016 * allocation in the cap_ppc_rma==1 case.
1018 if (cap_ppc_rma
< 2) {
1022 fd
= kvm_vm_ioctl(kvm_state
, KVM_ALLOCATE_RMA
, &ret
);
1024 fprintf(stderr
, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
1029 size
= MIN(ret
.rma_size
, 256ul << 20);
1031 rma
= mmap(NULL
, size
, PROT_READ
|PROT_WRITE
, MAP_SHARED
, fd
, 0);
1032 if (rma
== MAP_FAILED
) {
1033 fprintf(stderr
, "KVM: Error mapping RMA: %s\n", strerror(errno
));
1037 rma_region
= g_new(MemoryRegion
, 1);
1038 memory_region_init_ram_ptr(rma_region
, name
, size
, rma
);
1039 vmstate_register_ram_global(rma_region
);
1040 memory_region_add_subregion(sysmem
, 0, rma_region
);
1045 void *kvmppc_create_spapr_tce(uint32_t liobn
, uint32_t window_size
, int *pfd
)
1047 struct kvm_create_spapr_tce args
= {
1049 .window_size
= window_size
,
1055 /* Must set fd to -1 so we don't try to munmap when called for
1056 * destroying the table, which the upper layers -will- do
1059 if (!cap_spapr_tce
) {
1063 fd
= kvm_vm_ioctl(kvm_state
, KVM_CREATE_SPAPR_TCE
, &args
);
1065 fprintf(stderr
, "KVM: Failed to create TCE table for liobn 0x%x\n",
1070 len
= (window_size
/ SPAPR_TCE_PAGE_SIZE
) * sizeof(sPAPRTCE
);
1071 /* FIXME: round this up to page size */
1073 table
= mmap(NULL
, len
, PROT_READ
|PROT_WRITE
, MAP_SHARED
, fd
, 0);
1074 if (table
== MAP_FAILED
) {
1075 fprintf(stderr
, "KVM: Failed to map TCE table for liobn 0x%x\n",
1085 int kvmppc_remove_spapr_tce(void *table
, int fd
, uint32_t window_size
)
1093 len
= (window_size
/ SPAPR_TCE_PAGE_SIZE
)*sizeof(sPAPRTCE
);
1094 if ((munmap(table
, len
) < 0) ||
1096 fprintf(stderr
, "KVM: Unexpected error removing TCE table: %s",
1098 /* Leak the table */
1104 static inline uint32_t mfpvr(void)
1113 static void alter_insns(uint64_t *word
, uint64_t flags
, bool on
)
1122 const ppc_def_t
*kvmppc_host_cpu_def(void)
1124 uint32_t host_pvr
= mfpvr();
1125 const ppc_def_t
*base_spec
;
1127 uint32_t vmx
= kvmppc_get_vmx();
1128 uint32_t dfp
= kvmppc_get_dfp();
1130 base_spec
= ppc_find_by_pvr(host_pvr
);
1132 spec
= g_malloc0(sizeof(*spec
));
1133 memcpy(spec
, base_spec
, sizeof(*spec
));
1135 /* Now fix up the spec with information we can query from the host */
1138 /* Only override when we know what the host supports */
1139 alter_insns(&spec
->insns_flags
, PPC_ALTIVEC
, vmx
> 0);
1140 alter_insns(&spec
->insns_flags2
, PPC2_VSX
, vmx
> 1);
1143 /* Only override when we know what the host supports */
1144 alter_insns(&spec
->insns_flags2
, PPC2_DFP
, dfp
);
1150 int kvmppc_fixup_cpu(CPUPPCState
*env
)
1154 /* Adjust cpu index for SMT */
1155 smt
= kvmppc_smt_threads();
1156 env
->cpu_index
= (env
->cpu_index
/ smp_threads
) * smt
1157 + (env
->cpu_index
% smp_threads
);
1163 bool kvm_arch_stop_on_emulation_error(CPUPPCState
*env
)
1168 int kvm_arch_on_sigbus_vcpu(CPUPPCState
*env
, int code
, void *addr
)
1173 int kvm_arch_on_sigbus(int code
, void *addr
)