usb: Fix compilation for MinGW (regression)
[qemu/cris-port.git] / target-ppc / kvm.c
blob2f4f06818aabc11d2d8401f17e26e8e0d3068fef
1 /*
2 * PowerPC implementation of KVM hooks
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
7 * Authors:
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
17 #include <dirent.h>
18 #include <sys/types.h>
19 #include <sys/ioctl.h>
20 #include <sys/mman.h>
21 #include <sys/vfs.h>
23 #include <linux/kvm.h>
25 #include "qemu-common.h"
26 #include "qemu/timer.h"
27 #include "sysemu/sysemu.h"
28 #include "sysemu/kvm.h"
29 #include "kvm_ppc.h"
30 #include "cpu.h"
31 #include "sysemu/cpus.h"
32 #include "sysemu/device_tree.h"
33 #include "hw/sysbus.h"
34 #include "hw/spapr.h"
36 #include "hw/sysbus.h"
37 #include "hw/spapr.h"
38 #include "hw/spapr_vio.h"
40 //#define DEBUG_KVM
42 #ifdef DEBUG_KVM
43 #define dprintf(fmt, ...) \
44 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
45 #else
46 #define dprintf(fmt, ...) \
47 do { } while (0)
48 #endif
50 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
52 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
53 KVM_CAP_LAST_INFO
56 static int cap_interrupt_unset = false;
57 static int cap_interrupt_level = false;
58 static int cap_segstate;
59 static int cap_booke_sregs;
60 static int cap_ppc_smt;
61 static int cap_ppc_rma;
62 static int cap_spapr_tce;
63 static int cap_hior;
65 /* XXX We have a race condition where we actually have a level triggered
66 * interrupt, but the infrastructure can't expose that yet, so the guest
67 * takes but ignores it, goes to sleep and never gets notified that there's
68 * still an interrupt pending.
70 * As a quick workaround, let's just wake up again 20 ms after we injected
71 * an interrupt. That way we can assure that we're always reinjecting
72 * interrupts in case the guest swallowed them.
74 static QEMUTimer *idle_timer;
76 static void kvm_kick_cpu(void *opaque)
78 PowerPCCPU *cpu = opaque;
80 qemu_cpu_kick(CPU(cpu));
83 int kvm_arch_init(KVMState *s)
85 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
86 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
87 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
88 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
89 cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
90 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
91 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
92 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
94 if (!cap_interrupt_level) {
95 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
96 "VM to stall at times!\n");
99 return 0;
102 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
104 CPUPPCState *cenv = &cpu->env;
105 CPUState *cs = CPU(cpu);
106 struct kvm_sregs sregs;
107 int ret;
109 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
110 /* What we're really trying to say is "if we're on BookE, we use
111 the native PVR for now". This is the only sane way to check
112 it though, so we potentially confuse users that they can run
113 BookE guests on BookS. Let's hope nobody dares enough :) */
114 return 0;
115 } else {
116 if (!cap_segstate) {
117 fprintf(stderr, "kvm error: missing PVR setting capability\n");
118 return -ENOSYS;
122 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
123 if (ret) {
124 return ret;
127 sregs.pvr = cenv->spr[SPR_PVR];
128 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
131 /* Set up a shared TLB array with KVM */
132 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
134 CPUPPCState *env = &cpu->env;
135 CPUState *cs = CPU(cpu);
136 struct kvm_book3e_206_tlb_params params = {};
137 struct kvm_config_tlb cfg = {};
138 struct kvm_enable_cap encap = {};
139 unsigned int entries = 0;
140 int ret, i;
142 if (!kvm_enabled() ||
143 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
144 return 0;
147 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
149 for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
150 params.tlb_sizes[i] = booke206_tlb_size(env, i);
151 params.tlb_ways[i] = booke206_tlb_ways(env, i);
152 entries += params.tlb_sizes[i];
155 assert(entries == env->nb_tlb);
156 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
158 env->tlb_dirty = true;
160 cfg.array = (uintptr_t)env->tlb.tlbm;
161 cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
162 cfg.params = (uintptr_t)&params;
163 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
165 encap.cap = KVM_CAP_SW_TLB;
166 encap.args[0] = (uintptr_t)&cfg;
168 ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &encap);
169 if (ret < 0) {
170 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
171 __func__, strerror(-ret));
172 return ret;
175 env->kvm_sw_tlb = true;
176 return 0;
180 #if defined(TARGET_PPC64)
181 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
182 struct kvm_ppc_smmu_info *info)
184 CPUPPCState *env = &cpu->env;
185 CPUState *cs = CPU(cpu);
187 memset(info, 0, sizeof(*info));
189 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
190 * need to "guess" what the supported page sizes are.
192 * For that to work we make a few assumptions:
194 * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
195 * KVM which only supports 4K and 16M pages, but supports them
196 * regardless of the backing store characteritics. We also don't
197 * support 1T segments.
199 * This is safe as if HV KVM ever supports that capability or PR
200 * KVM grows supports for more page/segment sizes, those versions
201 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
202 * will not hit this fallback
204 * - Else we are running HV KVM. This means we only support page
205 * sizes that fit in the backing store. Additionally we only
206 * advertize 64K pages if the processor is ARCH 2.06 and we assume
207 * P7 encodings for the SLB and hash table. Here too, we assume
208 * support for any newer processor will mean a kernel that
209 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
210 * this fallback.
212 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
213 /* No flags */
214 info->flags = 0;
215 info->slb_size = 64;
217 /* Standard 4k base page size segment */
218 info->sps[0].page_shift = 12;
219 info->sps[0].slb_enc = 0;
220 info->sps[0].enc[0].page_shift = 12;
221 info->sps[0].enc[0].pte_enc = 0;
223 /* Standard 16M large page size segment */
224 info->sps[1].page_shift = 24;
225 info->sps[1].slb_enc = SLB_VSID_L;
226 info->sps[1].enc[0].page_shift = 24;
227 info->sps[1].enc[0].pte_enc = 0;
228 } else {
229 int i = 0;
231 /* HV KVM has backing store size restrictions */
232 info->flags = KVM_PPC_PAGE_SIZES_REAL;
234 if (env->mmu_model & POWERPC_MMU_1TSEG) {
235 info->flags |= KVM_PPC_1T_SEGMENTS;
238 if (env->mmu_model == POWERPC_MMU_2_06) {
239 info->slb_size = 32;
240 } else {
241 info->slb_size = 64;
244 /* Standard 4k base page size segment */
245 info->sps[i].page_shift = 12;
246 info->sps[i].slb_enc = 0;
247 info->sps[i].enc[0].page_shift = 12;
248 info->sps[i].enc[0].pte_enc = 0;
249 i++;
251 /* 64K on MMU 2.06 */
252 if (env->mmu_model == POWERPC_MMU_2_06) {
253 info->sps[i].page_shift = 16;
254 info->sps[i].slb_enc = 0x110;
255 info->sps[i].enc[0].page_shift = 16;
256 info->sps[i].enc[0].pte_enc = 1;
257 i++;
260 /* Standard 16M large page size segment */
261 info->sps[i].page_shift = 24;
262 info->sps[i].slb_enc = SLB_VSID_L;
263 info->sps[i].enc[0].page_shift = 24;
264 info->sps[i].enc[0].pte_enc = 0;
268 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
270 CPUState *cs = CPU(cpu);
271 int ret;
273 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
274 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
275 if (ret == 0) {
276 return;
280 kvm_get_fallback_smmu_info(cpu, info);
283 static long getrampagesize(void)
285 struct statfs fs;
286 int ret;
288 if (!mem_path) {
289 /* guest RAM is backed by normal anonymous pages */
290 return getpagesize();
293 do {
294 ret = statfs(mem_path, &fs);
295 } while (ret != 0 && errno == EINTR);
297 if (ret != 0) {
298 fprintf(stderr, "Couldn't statfs() memory path: %s\n",
299 strerror(errno));
300 exit(1);
303 #define HUGETLBFS_MAGIC 0x958458f6
305 if (fs.f_type != HUGETLBFS_MAGIC) {
306 /* Explicit mempath, but it's ordinary pages */
307 return getpagesize();
310 /* It's hugepage, return the huge page size */
311 return fs.f_bsize;
314 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
316 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
317 return true;
320 return (1ul << shift) <= rampgsize;
323 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
325 static struct kvm_ppc_smmu_info smmu_info;
326 static bool has_smmu_info;
327 CPUPPCState *env = &cpu->env;
328 long rampagesize;
329 int iq, ik, jq, jk;
331 /* We only handle page sizes for 64-bit server guests for now */
332 if (!(env->mmu_model & POWERPC_MMU_64)) {
333 return;
336 /* Collect MMU info from kernel if not already */
337 if (!has_smmu_info) {
338 kvm_get_smmu_info(cpu, &smmu_info);
339 has_smmu_info = true;
342 rampagesize = getrampagesize();
344 /* Convert to QEMU form */
345 memset(&env->sps, 0, sizeof(env->sps));
347 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
348 struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
349 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
351 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
352 ksps->page_shift)) {
353 continue;
355 qsps->page_shift = ksps->page_shift;
356 qsps->slb_enc = ksps->slb_enc;
357 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
358 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
359 ksps->enc[jk].page_shift)) {
360 continue;
362 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
363 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
364 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
365 break;
368 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
369 break;
372 env->slb_nr = smmu_info.slb_size;
373 if (smmu_info.flags & KVM_PPC_1T_SEGMENTS) {
374 env->mmu_model |= POWERPC_MMU_1TSEG;
375 } else {
376 env->mmu_model &= ~POWERPC_MMU_1TSEG;
379 #else /* defined (TARGET_PPC64) */
381 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
385 #endif /* !defined (TARGET_PPC64) */
387 int kvm_arch_init_vcpu(CPUState *cs)
389 PowerPCCPU *cpu = POWERPC_CPU(cs);
390 CPUPPCState *cenv = &cpu->env;
391 int ret;
393 /* Gather server mmu info from KVM and update the CPU state */
394 kvm_fixup_page_sizes(cpu);
396 /* Synchronize sregs with kvm */
397 ret = kvm_arch_sync_sregs(cpu);
398 if (ret) {
399 return ret;
402 idle_timer = qemu_new_timer_ns(vm_clock, kvm_kick_cpu, cpu);
404 /* Some targets support access to KVM's guest TLB. */
405 switch (cenv->mmu_model) {
406 case POWERPC_MMU_BOOKE206:
407 ret = kvm_booke206_tlb_init(cpu);
408 break;
409 default:
410 break;
413 return ret;
416 void kvm_arch_reset_vcpu(CPUState *cpu)
420 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
422 CPUPPCState *env = &cpu->env;
423 CPUState *cs = CPU(cpu);
424 struct kvm_dirty_tlb dirty_tlb;
425 unsigned char *bitmap;
426 int ret;
428 if (!env->kvm_sw_tlb) {
429 return;
432 bitmap = g_malloc((env->nb_tlb + 7) / 8);
433 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
435 dirty_tlb.bitmap = (uintptr_t)bitmap;
436 dirty_tlb.num_dirty = env->nb_tlb;
438 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
439 if (ret) {
440 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
441 __func__, strerror(-ret));
444 g_free(bitmap);
447 int kvm_arch_put_registers(CPUState *cs, int level)
449 PowerPCCPU *cpu = POWERPC_CPU(cs);
450 CPUPPCState *env = &cpu->env;
451 struct kvm_regs regs;
452 int ret;
453 int i;
455 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
456 if (ret < 0) {
457 return ret;
460 regs.ctr = env->ctr;
461 regs.lr = env->lr;
462 regs.xer = env->xer;
463 regs.msr = env->msr;
464 regs.pc = env->nip;
466 regs.srr0 = env->spr[SPR_SRR0];
467 regs.srr1 = env->spr[SPR_SRR1];
469 regs.sprg0 = env->spr[SPR_SPRG0];
470 regs.sprg1 = env->spr[SPR_SPRG1];
471 regs.sprg2 = env->spr[SPR_SPRG2];
472 regs.sprg3 = env->spr[SPR_SPRG3];
473 regs.sprg4 = env->spr[SPR_SPRG4];
474 regs.sprg5 = env->spr[SPR_SPRG5];
475 regs.sprg6 = env->spr[SPR_SPRG6];
476 regs.sprg7 = env->spr[SPR_SPRG7];
478 regs.pid = env->spr[SPR_BOOKE_PID];
480 for (i = 0;i < 32; i++)
481 regs.gpr[i] = env->gpr[i];
483 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
484 if (ret < 0)
485 return ret;
487 if (env->tlb_dirty) {
488 kvm_sw_tlb_put(cpu);
489 env->tlb_dirty = false;
492 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
493 struct kvm_sregs sregs;
495 sregs.pvr = env->spr[SPR_PVR];
497 sregs.u.s.sdr1 = env->spr[SPR_SDR1];
499 /* Sync SLB */
500 #ifdef TARGET_PPC64
501 for (i = 0; i < 64; i++) {
502 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
503 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
505 #endif
507 /* Sync SRs */
508 for (i = 0; i < 16; i++) {
509 sregs.u.s.ppc32.sr[i] = env->sr[i];
512 /* Sync BATs */
513 for (i = 0; i < 8; i++) {
514 /* Beware. We have to swap upper and lower bits here */
515 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
516 | env->DBAT[1][i];
517 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
518 | env->IBAT[1][i];
521 ret = kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
522 if (ret) {
523 return ret;
527 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
528 uint64_t hior = env->spr[SPR_HIOR];
529 struct kvm_one_reg reg = {
530 .id = KVM_REG_PPC_HIOR,
531 .addr = (uintptr_t) &hior,
534 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
535 if (ret) {
536 return ret;
540 return ret;
543 int kvm_arch_get_registers(CPUState *cs)
545 PowerPCCPU *cpu = POWERPC_CPU(cs);
546 CPUPPCState *env = &cpu->env;
547 struct kvm_regs regs;
548 struct kvm_sregs sregs;
549 uint32_t cr;
550 int i, ret;
552 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
553 if (ret < 0)
554 return ret;
556 cr = regs.cr;
557 for (i = 7; i >= 0; i--) {
558 env->crf[i] = cr & 15;
559 cr >>= 4;
562 env->ctr = regs.ctr;
563 env->lr = regs.lr;
564 env->xer = regs.xer;
565 env->msr = regs.msr;
566 env->nip = regs.pc;
568 env->spr[SPR_SRR0] = regs.srr0;
569 env->spr[SPR_SRR1] = regs.srr1;
571 env->spr[SPR_SPRG0] = regs.sprg0;
572 env->spr[SPR_SPRG1] = regs.sprg1;
573 env->spr[SPR_SPRG2] = regs.sprg2;
574 env->spr[SPR_SPRG3] = regs.sprg3;
575 env->spr[SPR_SPRG4] = regs.sprg4;
576 env->spr[SPR_SPRG5] = regs.sprg5;
577 env->spr[SPR_SPRG6] = regs.sprg6;
578 env->spr[SPR_SPRG7] = regs.sprg7;
580 env->spr[SPR_BOOKE_PID] = regs.pid;
582 for (i = 0;i < 32; i++)
583 env->gpr[i] = regs.gpr[i];
585 if (cap_booke_sregs) {
586 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
587 if (ret < 0) {
588 return ret;
591 if (sregs.u.e.features & KVM_SREGS_E_BASE) {
592 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
593 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
594 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
595 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
596 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
597 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
598 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
599 env->spr[SPR_DECR] = sregs.u.e.dec;
600 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
601 env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
602 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
605 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
606 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
607 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
608 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
609 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
610 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
613 if (sregs.u.e.features & KVM_SREGS_E_64) {
614 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
617 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
618 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
621 if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
622 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
623 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
624 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
625 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
626 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
627 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
628 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
629 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
630 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
631 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
632 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
633 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
634 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
635 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
636 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
637 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
639 if (sregs.u.e.features & KVM_SREGS_E_SPE) {
640 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
641 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
642 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
645 if (sregs.u.e.features & KVM_SREGS_E_PM) {
646 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
649 if (sregs.u.e.features & KVM_SREGS_E_PC) {
650 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
651 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
655 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
656 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
657 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
658 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
659 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
660 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
661 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
662 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
663 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
664 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
665 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
668 if (sregs.u.e.features & KVM_SREGS_EXP) {
669 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
672 if (sregs.u.e.features & KVM_SREGS_E_PD) {
673 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
674 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
677 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
678 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
679 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
680 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
682 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
683 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
684 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
689 if (cap_segstate) {
690 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
691 if (ret < 0) {
692 return ret;
695 ppc_store_sdr1(env, sregs.u.s.sdr1);
697 /* Sync SLB */
698 #ifdef TARGET_PPC64
699 for (i = 0; i < 64; i++) {
700 ppc_store_slb(env, sregs.u.s.ppc64.slb[i].slbe,
701 sregs.u.s.ppc64.slb[i].slbv);
703 #endif
705 /* Sync SRs */
706 for (i = 0; i < 16; i++) {
707 env->sr[i] = sregs.u.s.ppc32.sr[i];
710 /* Sync BATs */
711 for (i = 0; i < 8; i++) {
712 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
713 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
714 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
715 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
719 return 0;
722 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
724 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
726 if (irq != PPC_INTERRUPT_EXT) {
727 return 0;
730 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
731 return 0;
734 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
736 return 0;
739 #if defined(TARGET_PPCEMB)
740 #define PPC_INPUT_INT PPC40x_INPUT_INT
741 #elif defined(TARGET_PPC64)
742 #define PPC_INPUT_INT PPC970_INPUT_INT
743 #else
744 #define PPC_INPUT_INT PPC6xx_INPUT_INT
745 #endif
747 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
749 PowerPCCPU *cpu = POWERPC_CPU(cs);
750 CPUPPCState *env = &cpu->env;
751 int r;
752 unsigned irq;
754 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
755 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
756 if (!cap_interrupt_level &&
757 run->ready_for_interrupt_injection &&
758 (env->interrupt_request & CPU_INTERRUPT_HARD) &&
759 (env->irq_input_state & (1<<PPC_INPUT_INT)))
761 /* For now KVM disregards the 'irq' argument. However, in the
762 * future KVM could cache it in-kernel to avoid a heavyweight exit
763 * when reading the UIC.
765 irq = KVM_INTERRUPT_SET;
767 dprintf("injected interrupt %d\n", irq);
768 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
769 if (r < 0) {
770 printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
773 /* Always wake up soon in case the interrupt was level based */
774 qemu_mod_timer(idle_timer, qemu_get_clock_ns(vm_clock) +
775 (get_ticks_per_sec() / 50));
778 /* We don't know if there are more interrupts pending after this. However,
779 * the guest will return to userspace in the course of handling this one
780 * anyways, so we will get a chance to deliver the rest. */
783 void kvm_arch_post_run(CPUState *cpu, struct kvm_run *run)
787 int kvm_arch_process_async_events(CPUState *cs)
789 PowerPCCPU *cpu = POWERPC_CPU(cs);
790 return cpu->env.halted;
793 static int kvmppc_handle_halt(CPUPPCState *env)
795 if (!(env->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
796 env->halted = 1;
797 env->exception_index = EXCP_HLT;
800 return 0;
803 /* map dcr access to existing qemu dcr emulation */
804 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
806 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
807 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
809 return 0;
812 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
814 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
815 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
817 return 0;
820 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
822 PowerPCCPU *cpu = POWERPC_CPU(cs);
823 CPUPPCState *env = &cpu->env;
824 int ret;
826 switch (run->exit_reason) {
827 case KVM_EXIT_DCR:
828 if (run->dcr.is_write) {
829 dprintf("handle dcr write\n");
830 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
831 } else {
832 dprintf("handle dcr read\n");
833 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
835 break;
836 case KVM_EXIT_HLT:
837 dprintf("handle halt\n");
838 ret = kvmppc_handle_halt(env);
839 break;
840 #ifdef CONFIG_PSERIES
841 case KVM_EXIT_PAPR_HCALL:
842 dprintf("handle PAPR hypercall\n");
843 run->papr_hcall.ret = spapr_hypercall(cpu,
844 run->papr_hcall.nr,
845 run->papr_hcall.args);
846 ret = 0;
847 break;
848 #endif
849 case KVM_EXIT_EPR:
850 dprintf("handle epr\n");
851 run->epr.epr = ldl_phys(env->mpic_iack);
852 ret = 0;
853 break;
854 default:
855 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
856 ret = -1;
857 break;
860 return ret;
863 static int read_cpuinfo(const char *field, char *value, int len)
865 FILE *f;
866 int ret = -1;
867 int field_len = strlen(field);
868 char line[512];
870 f = fopen("/proc/cpuinfo", "r");
871 if (!f) {
872 return -1;
875 do {
876 if(!fgets(line, sizeof(line), f)) {
877 break;
879 if (!strncmp(line, field, field_len)) {
880 pstrcpy(value, len, line);
881 ret = 0;
882 break;
884 } while(*line);
886 fclose(f);
888 return ret;
891 uint32_t kvmppc_get_tbfreq(void)
893 char line[512];
894 char *ns;
895 uint32_t retval = get_ticks_per_sec();
897 if (read_cpuinfo("timebase", line, sizeof(line))) {
898 return retval;
901 if (!(ns = strchr(line, ':'))) {
902 return retval;
905 ns++;
907 retval = atoi(ns);
908 return retval;
911 /* Try to find a device tree node for a CPU with clock-frequency property */
912 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
914 struct dirent *dirp;
915 DIR *dp;
917 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
918 printf("Can't open directory " PROC_DEVTREE_CPU "\n");
919 return -1;
922 buf[0] = '\0';
923 while ((dirp = readdir(dp)) != NULL) {
924 FILE *f;
925 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
926 dirp->d_name);
927 f = fopen(buf, "r");
928 if (f) {
929 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
930 fclose(f);
931 break;
933 buf[0] = '\0';
935 closedir(dp);
936 if (buf[0] == '\0') {
937 printf("Unknown host!\n");
938 return -1;
941 return 0;
944 /* Read a CPU node property from the host device tree that's a single
945 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
946 * (can't find or open the property, or doesn't understand the
947 * format) */
948 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
950 char buf[PATH_MAX];
951 union {
952 uint32_t v32;
953 uint64_t v64;
954 } u;
955 FILE *f;
956 int len;
958 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
959 return -1;
962 strncat(buf, "/", sizeof(buf) - strlen(buf));
963 strncat(buf, propname, sizeof(buf) - strlen(buf));
965 f = fopen(buf, "rb");
966 if (!f) {
967 return -1;
970 len = fread(&u, 1, sizeof(u), f);
971 fclose(f);
972 switch (len) {
973 case 4:
974 /* property is a 32-bit quantity */
975 return be32_to_cpu(u.v32);
976 case 8:
977 return be64_to_cpu(u.v64);
980 return 0;
983 uint64_t kvmppc_get_clockfreq(void)
985 return kvmppc_read_int_cpu_dt("clock-frequency");
988 uint32_t kvmppc_get_vmx(void)
990 return kvmppc_read_int_cpu_dt("ibm,vmx");
993 uint32_t kvmppc_get_dfp(void)
995 return kvmppc_read_int_cpu_dt("ibm,dfp");
998 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1000 PowerPCCPU *cpu = ppc_env_get_cpu(env);
1001 CPUState *cs = CPU(cpu);
1003 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1004 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1005 return 0;
1008 return 1;
1011 int kvmppc_get_hasidle(CPUPPCState *env)
1013 struct kvm_ppc_pvinfo pvinfo;
1015 if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1016 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1017 return 1;
1020 return 0;
1023 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1025 uint32_t *hc = (uint32_t*)buf;
1026 struct kvm_ppc_pvinfo pvinfo;
1028 if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1029 memcpy(buf, pvinfo.hcall, buf_len);
1030 return 0;
1034 * Fallback to always fail hypercalls:
1036 * li r3, -1
1037 * nop
1038 * nop
1039 * nop
1042 hc[0] = 0x3860ffff;
1043 hc[1] = 0x60000000;
1044 hc[2] = 0x60000000;
1045 hc[3] = 0x60000000;
1047 return 0;
1050 void kvmppc_set_papr(PowerPCCPU *cpu)
1052 CPUPPCState *env = &cpu->env;
1053 CPUState *cs = CPU(cpu);
1054 struct kvm_enable_cap cap = {};
1055 int ret;
1057 cap.cap = KVM_CAP_PPC_PAPR;
1058 ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &cap);
1060 if (ret) {
1061 cpu_abort(env, "This KVM version does not support PAPR\n");
1065 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
1067 CPUPPCState *env = &cpu->env;
1068 CPUState *cs = CPU(cpu);
1069 struct kvm_enable_cap cap = {};
1070 int ret;
1072 cap.cap = KVM_CAP_PPC_EPR;
1073 cap.args[0] = mpic_proxy;
1074 ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &cap);
1076 if (ret && mpic_proxy) {
1077 cpu_abort(env, "This KVM version does not support EPR\n");
1081 int kvmppc_smt_threads(void)
1083 return cap_ppc_smt ? cap_ppc_smt : 1;
1086 #ifdef TARGET_PPC64
1087 off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem)
1089 void *rma;
1090 off_t size;
1091 int fd;
1092 struct kvm_allocate_rma ret;
1093 MemoryRegion *rma_region;
1095 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
1096 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
1097 * not necessary on this hardware
1098 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
1100 * FIXME: We should allow the user to force contiguous RMA
1101 * allocation in the cap_ppc_rma==1 case.
1103 if (cap_ppc_rma < 2) {
1104 return 0;
1107 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
1108 if (fd < 0) {
1109 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
1110 strerror(errno));
1111 return -1;
1114 size = MIN(ret.rma_size, 256ul << 20);
1116 rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1117 if (rma == MAP_FAILED) {
1118 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
1119 return -1;
1122 rma_region = g_new(MemoryRegion, 1);
1123 memory_region_init_ram_ptr(rma_region, name, size, rma);
1124 vmstate_register_ram_global(rma_region);
1125 memory_region_add_subregion(sysmem, 0, rma_region);
1127 return size;
1130 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
1132 if (cap_ppc_rma >= 2) {
1133 return current_size;
1135 return MIN(current_size,
1136 getrampagesize() << (hash_shift - 7));
1138 #endif
1140 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd)
1142 struct kvm_create_spapr_tce args = {
1143 .liobn = liobn,
1144 .window_size = window_size,
1146 long len;
1147 int fd;
1148 void *table;
1150 /* Must set fd to -1 so we don't try to munmap when called for
1151 * destroying the table, which the upper layers -will- do
1153 *pfd = -1;
1154 if (!cap_spapr_tce) {
1155 return NULL;
1158 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
1159 if (fd < 0) {
1160 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
1161 liobn);
1162 return NULL;
1165 len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(sPAPRTCE);
1166 /* FIXME: round this up to page size */
1168 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1169 if (table == MAP_FAILED) {
1170 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
1171 liobn);
1172 close(fd);
1173 return NULL;
1176 *pfd = fd;
1177 return table;
1180 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t window_size)
1182 long len;
1184 if (fd < 0) {
1185 return -1;
1188 len = (window_size / SPAPR_TCE_PAGE_SIZE)*sizeof(sPAPRTCE);
1189 if ((munmap(table, len) < 0) ||
1190 (close(fd) < 0)) {
1191 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
1192 strerror(errno));
1193 /* Leak the table */
1196 return 0;
1199 int kvmppc_reset_htab(int shift_hint)
1201 uint32_t shift = shift_hint;
1203 if (!kvm_enabled()) {
1204 /* Full emulation, tell caller to allocate htab itself */
1205 return 0;
1207 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
1208 int ret;
1209 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
1210 if (ret == -ENOTTY) {
1211 /* At least some versions of PR KVM advertise the
1212 * capability, but don't implement the ioctl(). Oops.
1213 * Return 0 so that we allocate the htab in qemu, as is
1214 * correct for PR. */
1215 return 0;
1216 } else if (ret < 0) {
1217 return ret;
1219 return shift;
1222 /* We have a kernel that predates the htab reset calls. For PR
1223 * KVM, we need to allocate the htab ourselves, for an HV KVM of
1224 * this era, it has allocated a 16MB fixed size hash table
1225 * already. Kernels of this era have the GET_PVINFO capability
1226 * only on PR, so we use this hack to determine the right
1227 * answer */
1228 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
1229 /* PR - tell caller to allocate htab */
1230 return 0;
1231 } else {
1232 /* HV - assume 16MB kernel allocated htab */
1233 return 24;
1237 static inline uint32_t mfpvr(void)
1239 uint32_t pvr;
1241 asm ("mfpvr %0"
1242 : "=r"(pvr));
1243 return pvr;
1246 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
1248 if (on) {
1249 *word |= flags;
1250 } else {
1251 *word &= ~flags;
1255 static void kvmppc_host_cpu_initfn(Object *obj)
1257 PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(obj);
1259 assert(kvm_enabled());
1261 if (pcc->info->pvr != mfpvr()) {
1262 fprintf(stderr, "Your host CPU is unsupported.\n"
1263 "Please choose a supported model instead, see -cpu ?.\n");
1264 exit(1);
1268 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
1270 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
1271 uint32_t host_pvr = mfpvr();
1272 PowerPCCPUClass *pvr_pcc;
1273 ppc_def_t *spec;
1274 uint32_t vmx = kvmppc_get_vmx();
1275 uint32_t dfp = kvmppc_get_dfp();
1277 spec = g_malloc0(sizeof(*spec));
1279 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
1280 if (pvr_pcc != NULL) {
1281 memcpy(spec, pvr_pcc->info, sizeof(*spec));
1283 pcc->info = spec;
1284 /* Override the display name for -cpu ? and QMP */
1285 pcc->info->name = "host";
1287 /* Now fix up the spec with information we can query from the host */
1289 if (vmx != -1) {
1290 /* Only override when we know what the host supports */
1291 alter_insns(&spec->insns_flags, PPC_ALTIVEC, vmx > 0);
1292 alter_insns(&spec->insns_flags2, PPC2_VSX, vmx > 1);
1294 if (dfp != -1) {
1295 /* Only override when we know what the host supports */
1296 alter_insns(&spec->insns_flags2, PPC2_DFP, dfp);
1300 int kvmppc_fixup_cpu(PowerPCCPU *cpu)
1302 CPUState *cs = CPU(cpu);
1303 int smt;
1305 /* Adjust cpu index for SMT */
1306 smt = kvmppc_smt_threads();
1307 cs->cpu_index = (cs->cpu_index / smp_threads) * smt
1308 + (cs->cpu_index % smp_threads);
1310 return 0;
1314 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
1316 return true;
1319 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
1321 return 1;
1324 int kvm_arch_on_sigbus(int code, void *addr)
1326 return 1;
1329 static const TypeInfo kvm_host_cpu_type_info = {
1330 .name = TYPE_HOST_POWERPC_CPU,
1331 .parent = TYPE_POWERPC_CPU,
1332 .instance_init = kvmppc_host_cpu_initfn,
1333 .class_init = kvmppc_host_cpu_class_init,
1336 static void kvm_ppc_register_types(void)
1338 type_register_static(&kvm_host_cpu_type_info);
1341 type_init(kvm_ppc_register_types)