Merge commit 'c9159fe9aa9abe24115ea4d16127179e9cb07e22' into upstream-merge
[qemu-kvm.git] / target-ppc / kvm.c
blob7f6e4e0b8706a3d83a1129a985f83b08500580af
1 /*
2 * PowerPC implementation of KVM hooks
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
7 * Authors:
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
17 #include <dirent.h>
18 #include <sys/types.h>
19 #include <sys/ioctl.h>
20 #include <sys/mman.h>
21 #include <sys/vfs.h>
23 #include <linux/kvm.h>
25 #include "qemu-common.h"
26 #include "qemu-timer.h"
27 #include "sysemu.h"
28 #include "kvm.h"
29 #include "kvm_ppc.h"
30 #include "cpu.h"
31 #include "cpus.h"
32 #include "device_tree.h"
33 #include "hw/sysbus.h"
34 #include "hw/spapr.h"
36 #include "hw/sysbus.h"
37 #include "hw/spapr.h"
38 #include "hw/spapr_vio.h"
40 //#define DEBUG_KVM
42 #ifdef DEBUG_KVM
43 #define dprintf(fmt, ...) \
44 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
45 #else
46 #define dprintf(fmt, ...) \
47 do { } while (0)
48 #endif
50 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
52 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
53 KVM_CAP_LAST_INFO
56 static int cap_interrupt_unset = false;
57 static int cap_interrupt_level = false;
58 static int cap_segstate;
59 static int cap_booke_sregs;
60 static int cap_ppc_smt;
61 static int cap_ppc_rma;
62 static int cap_spapr_tce;
64 /* XXX We have a race condition where we actually have a level triggered
65 * interrupt, but the infrastructure can't expose that yet, so the guest
66 * takes but ignores it, goes to sleep and never gets notified that there's
67 * still an interrupt pending.
69 * As a quick workaround, let's just wake up again 20 ms after we injected
70 * an interrupt. That way we can assure that we're always reinjecting
71 * interrupts in case the guest swallowed them.
73 static QEMUTimer *idle_timer;
75 static void kvm_kick_env(void *env)
77 qemu_cpu_kick(env);
80 int kvm_arch_init(KVMState *s)
82 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
83 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
84 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
85 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
86 cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
87 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
88 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
90 if (!cap_interrupt_level) {
91 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
92 "VM to stall at times!\n");
95 return 0;
98 static int kvm_arch_sync_sregs(CPUPPCState *cenv)
100 struct kvm_sregs sregs;
101 int ret;
103 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
104 /* What we're really trying to say is "if we're on BookE, we use
105 the native PVR for now". This is the only sane way to check
106 it though, so we potentially confuse users that they can run
107 BookE guests on BookS. Let's hope nobody dares enough :) */
108 return 0;
109 } else {
110 if (!cap_segstate) {
111 fprintf(stderr, "kvm error: missing PVR setting capability\n");
112 return -ENOSYS;
116 ret = kvm_vcpu_ioctl(cenv, KVM_GET_SREGS, &sregs);
117 if (ret) {
118 return ret;
121 sregs.pvr = cenv->spr[SPR_PVR];
122 return kvm_vcpu_ioctl(cenv, KVM_SET_SREGS, &sregs);
125 /* Set up a shared TLB array with KVM */
126 static int kvm_booke206_tlb_init(CPUPPCState *env)
128 struct kvm_book3e_206_tlb_params params = {};
129 struct kvm_config_tlb cfg = {};
130 struct kvm_enable_cap encap = {};
131 unsigned int entries = 0;
132 int ret, i;
134 if (!kvm_enabled() ||
135 !kvm_check_extension(env->kvm_state, KVM_CAP_SW_TLB)) {
136 return 0;
139 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
141 for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
142 params.tlb_sizes[i] = booke206_tlb_size(env, i);
143 params.tlb_ways[i] = booke206_tlb_ways(env, i);
144 entries += params.tlb_sizes[i];
147 assert(entries == env->nb_tlb);
148 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
150 env->tlb_dirty = true;
152 cfg.array = (uintptr_t)env->tlb.tlbm;
153 cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
154 cfg.params = (uintptr_t)&params;
155 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
157 encap.cap = KVM_CAP_SW_TLB;
158 encap.args[0] = (uintptr_t)&cfg;
160 ret = kvm_vcpu_ioctl(env, KVM_ENABLE_CAP, &encap);
161 if (ret < 0) {
162 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
163 __func__, strerror(-ret));
164 return ret;
167 env->kvm_sw_tlb = true;
168 return 0;
172 #if defined(TARGET_PPC64)
173 static void kvm_get_fallback_smmu_info(CPUPPCState *env,
174 struct kvm_ppc_smmu_info *info)
176 memset(info, 0, sizeof(*info));
178 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
179 * need to "guess" what the supported page sizes are.
181 * For that to work we make a few assumptions:
183 * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
184 * KVM which only supports 4K and 16M pages, but supports them
185 * regardless of the backing store characteritics. We also don't
186 * support 1T segments.
188 * This is safe as if HV KVM ever supports that capability or PR
189 * KVM grows supports for more page/segment sizes, those versions
190 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
191 * will not hit this fallback
193 * - Else we are running HV KVM. This means we only support page
194 * sizes that fit in the backing store. Additionally we only
195 * advertize 64K pages if the processor is ARCH 2.06 and we assume
196 * P7 encodings for the SLB and hash table. Here too, we assume
197 * support for any newer processor will mean a kernel that
198 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
199 * this fallback.
201 if (kvm_check_extension(env->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
202 /* No flags */
203 info->flags = 0;
204 info->slb_size = 64;
206 /* Standard 4k base page size segment */
207 info->sps[0].page_shift = 12;
208 info->sps[0].slb_enc = 0;
209 info->sps[0].enc[0].page_shift = 12;
210 info->sps[0].enc[0].pte_enc = 0;
212 /* Standard 16M large page size segment */
213 info->sps[1].page_shift = 24;
214 info->sps[1].slb_enc = SLB_VSID_L;
215 info->sps[1].enc[0].page_shift = 24;
216 info->sps[1].enc[0].pte_enc = 0;
217 } else {
218 int i = 0;
220 /* HV KVM has backing store size restrictions */
221 info->flags = KVM_PPC_PAGE_SIZES_REAL;
223 if (env->mmu_model & POWERPC_MMU_1TSEG) {
224 info->flags |= KVM_PPC_1T_SEGMENTS;
227 if (env->mmu_model == POWERPC_MMU_2_06) {
228 info->slb_size = 32;
229 } else {
230 info->slb_size = 64;
233 /* Standard 4k base page size segment */
234 info->sps[i].page_shift = 12;
235 info->sps[i].slb_enc = 0;
236 info->sps[i].enc[0].page_shift = 12;
237 info->sps[i].enc[0].pte_enc = 0;
238 i++;
240 /* 64K on MMU 2.06 */
241 if (env->mmu_model == POWERPC_MMU_2_06) {
242 info->sps[i].page_shift = 16;
243 info->sps[i].slb_enc = 0x110;
244 info->sps[i].enc[0].page_shift = 16;
245 info->sps[i].enc[0].pte_enc = 1;
246 i++;
249 /* Standard 16M large page size segment */
250 info->sps[i].page_shift = 24;
251 info->sps[i].slb_enc = SLB_VSID_L;
252 info->sps[i].enc[0].page_shift = 24;
253 info->sps[i].enc[0].pte_enc = 0;
257 static void kvm_get_smmu_info(CPUPPCState *env, struct kvm_ppc_smmu_info *info)
259 int ret;
261 if (kvm_check_extension(env->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
262 ret = kvm_vm_ioctl(env->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
263 if (ret == 0) {
264 return;
268 kvm_get_fallback_smmu_info(env, info);
271 static long getrampagesize(void)
273 struct statfs fs;
274 int ret;
276 if (!mem_path) {
277 /* guest RAM is backed by normal anonymous pages */
278 return getpagesize();
281 do {
282 ret = statfs(mem_path, &fs);
283 } while (ret != 0 && errno == EINTR);
285 if (ret != 0) {
286 fprintf(stderr, "Couldn't statfs() memory path: %s\n",
287 strerror(errno));
288 exit(1);
291 #define HUGETLBFS_MAGIC 0x958458f6
293 if (fs.f_type != HUGETLBFS_MAGIC) {
294 /* Explicit mempath, but it's ordinary pages */
295 return getpagesize();
298 /* It's hugepage, return the huge page size */
299 return fs.f_bsize;
302 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
304 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
305 return true;
308 return (1ul << shift) <= rampgsize;
311 static void kvm_fixup_page_sizes(CPUPPCState *env)
313 static struct kvm_ppc_smmu_info smmu_info;
314 static bool has_smmu_info;
315 long rampagesize;
316 int iq, ik, jq, jk;
318 /* We only handle page sizes for 64-bit server guests for now */
319 if (!(env->mmu_model & POWERPC_MMU_64)) {
320 return;
323 /* Collect MMU info from kernel if not already */
324 if (!has_smmu_info) {
325 kvm_get_smmu_info(env, &smmu_info);
326 has_smmu_info = true;
329 rampagesize = getrampagesize();
331 /* Convert to QEMU form */
332 memset(&env->sps, 0, sizeof(env->sps));
334 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
335 struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
336 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
338 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
339 ksps->page_shift)) {
340 continue;
342 qsps->page_shift = ksps->page_shift;
343 qsps->slb_enc = ksps->slb_enc;
344 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
345 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
346 ksps->enc[jk].page_shift)) {
347 continue;
349 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
350 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
351 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
352 break;
355 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
356 break;
359 env->slb_nr = smmu_info.slb_size;
360 if (smmu_info.flags & KVM_PPC_1T_SEGMENTS) {
361 env->mmu_model |= POWERPC_MMU_1TSEG;
362 } else {
363 env->mmu_model &= ~POWERPC_MMU_1TSEG;
366 #else /* defined (TARGET_PPC64) */
368 static inline void kvm_fixup_page_sizes(CPUPPCState *env)
372 #endif /* !defined (TARGET_PPC64) */
374 int kvm_arch_init_vcpu(CPUPPCState *cenv)
376 int ret;
378 /* Gather server mmu info from KVM and update the CPU state */
379 kvm_fixup_page_sizes(cenv);
381 /* Synchronize sregs with kvm */
382 ret = kvm_arch_sync_sregs(cenv);
383 if (ret) {
384 return ret;
387 idle_timer = qemu_new_timer_ns(vm_clock, kvm_kick_env, cenv);
389 /* Some targets support access to KVM's guest TLB. */
390 switch (cenv->mmu_model) {
391 case POWERPC_MMU_BOOKE206:
392 ret = kvm_booke206_tlb_init(cenv);
393 break;
394 default:
395 break;
398 return ret;
401 void kvm_arch_reset_vcpu(CPUPPCState *env)
405 static void kvm_sw_tlb_put(CPUPPCState *env)
407 struct kvm_dirty_tlb dirty_tlb;
408 unsigned char *bitmap;
409 int ret;
411 if (!env->kvm_sw_tlb) {
412 return;
415 bitmap = g_malloc((env->nb_tlb + 7) / 8);
416 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
418 dirty_tlb.bitmap = (uintptr_t)bitmap;
419 dirty_tlb.num_dirty = env->nb_tlb;
421 ret = kvm_vcpu_ioctl(env, KVM_DIRTY_TLB, &dirty_tlb);
422 if (ret) {
423 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
424 __func__, strerror(-ret));
427 g_free(bitmap);
430 int kvm_arch_put_registers(CPUPPCState *env, int level)
432 struct kvm_regs regs;
433 int ret;
434 int i;
436 ret = kvm_vcpu_ioctl(env, KVM_GET_REGS, &regs);
437 if (ret < 0)
438 return ret;
440 regs.ctr = env->ctr;
441 regs.lr = env->lr;
442 regs.xer = env->xer;
443 regs.msr = env->msr;
444 regs.pc = env->nip;
446 regs.srr0 = env->spr[SPR_SRR0];
447 regs.srr1 = env->spr[SPR_SRR1];
449 regs.sprg0 = env->spr[SPR_SPRG0];
450 regs.sprg1 = env->spr[SPR_SPRG1];
451 regs.sprg2 = env->spr[SPR_SPRG2];
452 regs.sprg3 = env->spr[SPR_SPRG3];
453 regs.sprg4 = env->spr[SPR_SPRG4];
454 regs.sprg5 = env->spr[SPR_SPRG5];
455 regs.sprg6 = env->spr[SPR_SPRG6];
456 regs.sprg7 = env->spr[SPR_SPRG7];
458 regs.pid = env->spr[SPR_BOOKE_PID];
460 for (i = 0;i < 32; i++)
461 regs.gpr[i] = env->gpr[i];
463 ret = kvm_vcpu_ioctl(env, KVM_SET_REGS, &regs);
464 if (ret < 0)
465 return ret;
467 if (env->tlb_dirty) {
468 kvm_sw_tlb_put(env);
469 env->tlb_dirty = false;
472 return ret;
475 int kvm_arch_get_registers(CPUPPCState *env)
477 struct kvm_regs regs;
478 struct kvm_sregs sregs;
479 uint32_t cr;
480 int i, ret;
482 ret = kvm_vcpu_ioctl(env, KVM_GET_REGS, &regs);
483 if (ret < 0)
484 return ret;
486 cr = regs.cr;
487 for (i = 7; i >= 0; i--) {
488 env->crf[i] = cr & 15;
489 cr >>= 4;
492 env->ctr = regs.ctr;
493 env->lr = regs.lr;
494 env->xer = regs.xer;
495 env->msr = regs.msr;
496 env->nip = regs.pc;
498 env->spr[SPR_SRR0] = regs.srr0;
499 env->spr[SPR_SRR1] = regs.srr1;
501 env->spr[SPR_SPRG0] = regs.sprg0;
502 env->spr[SPR_SPRG1] = regs.sprg1;
503 env->spr[SPR_SPRG2] = regs.sprg2;
504 env->spr[SPR_SPRG3] = regs.sprg3;
505 env->spr[SPR_SPRG4] = regs.sprg4;
506 env->spr[SPR_SPRG5] = regs.sprg5;
507 env->spr[SPR_SPRG6] = regs.sprg6;
508 env->spr[SPR_SPRG7] = regs.sprg7;
510 env->spr[SPR_BOOKE_PID] = regs.pid;
512 for (i = 0;i < 32; i++)
513 env->gpr[i] = regs.gpr[i];
515 if (cap_booke_sregs) {
516 ret = kvm_vcpu_ioctl(env, KVM_GET_SREGS, &sregs);
517 if (ret < 0) {
518 return ret;
521 if (sregs.u.e.features & KVM_SREGS_E_BASE) {
522 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
523 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
524 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
525 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
526 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
527 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
528 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
529 env->spr[SPR_DECR] = sregs.u.e.dec;
530 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
531 env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
532 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
535 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
536 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
537 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
538 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
539 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
540 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
543 if (sregs.u.e.features & KVM_SREGS_E_64) {
544 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
547 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
548 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
551 if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
552 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
553 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
554 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
555 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
556 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
557 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
558 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
559 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
560 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
561 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
562 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
563 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
564 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
565 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
566 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
567 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
569 if (sregs.u.e.features & KVM_SREGS_E_SPE) {
570 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
571 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
572 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
575 if (sregs.u.e.features & KVM_SREGS_E_PM) {
576 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
579 if (sregs.u.e.features & KVM_SREGS_E_PC) {
580 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
581 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
585 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
586 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
587 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
588 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
589 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
590 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
591 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
592 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
593 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
594 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
595 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
598 if (sregs.u.e.features & KVM_SREGS_EXP) {
599 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
602 if (sregs.u.e.features & KVM_SREGS_E_PD) {
603 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
604 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
607 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
608 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
609 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
610 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
612 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
613 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
614 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
619 if (cap_segstate) {
620 ret = kvm_vcpu_ioctl(env, KVM_GET_SREGS, &sregs);
621 if (ret < 0) {
622 return ret;
625 ppc_store_sdr1(env, sregs.u.s.sdr1);
627 /* Sync SLB */
628 #ifdef TARGET_PPC64
629 for (i = 0; i < 64; i++) {
630 ppc_store_slb(env, sregs.u.s.ppc64.slb[i].slbe,
631 sregs.u.s.ppc64.slb[i].slbv);
633 #endif
635 /* Sync SRs */
636 for (i = 0; i < 16; i++) {
637 env->sr[i] = sregs.u.s.ppc32.sr[i];
640 /* Sync BATs */
641 for (i = 0; i < 8; i++) {
642 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
643 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
644 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
645 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
649 return 0;
652 int kvmppc_set_interrupt(CPUPPCState *env, int irq, int level)
654 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
656 if (irq != PPC_INTERRUPT_EXT) {
657 return 0;
660 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
661 return 0;
664 kvm_vcpu_ioctl(env, KVM_INTERRUPT, &virq);
666 return 0;
669 #if defined(TARGET_PPCEMB)
670 #define PPC_INPUT_INT PPC40x_INPUT_INT
671 #elif defined(TARGET_PPC64)
672 #define PPC_INPUT_INT PPC970_INPUT_INT
673 #else
674 #define PPC_INPUT_INT PPC6xx_INPUT_INT
675 #endif
677 void kvm_arch_pre_run(CPUPPCState *env, struct kvm_run *run)
679 int r;
680 unsigned irq;
682 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
683 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
684 if (!cap_interrupt_level &&
685 run->ready_for_interrupt_injection &&
686 (env->interrupt_request & CPU_INTERRUPT_HARD) &&
687 (env->irq_input_state & (1<<PPC_INPUT_INT)))
689 /* For now KVM disregards the 'irq' argument. However, in the
690 * future KVM could cache it in-kernel to avoid a heavyweight exit
691 * when reading the UIC.
693 irq = KVM_INTERRUPT_SET;
695 dprintf("injected interrupt %d\n", irq);
696 r = kvm_vcpu_ioctl(env, KVM_INTERRUPT, &irq);
697 if (r < 0)
698 printf("cpu %d fail inject %x\n", env->cpu_index, irq);
700 /* Always wake up soon in case the interrupt was level based */
701 qemu_mod_timer(idle_timer, qemu_get_clock_ns(vm_clock) +
702 (get_ticks_per_sec() / 50));
705 /* We don't know if there are more interrupts pending after this. However,
706 * the guest will return to userspace in the course of handling this one
707 * anyways, so we will get a chance to deliver the rest. */
710 void kvm_arch_post_run(CPUPPCState *env, struct kvm_run *run)
714 int kvm_arch_process_async_events(CPUPPCState *env)
716 return env->halted;
719 static int kvmppc_handle_halt(CPUPPCState *env)
721 if (!(env->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
722 env->halted = 1;
723 env->exception_index = EXCP_HLT;
726 return 0;
729 /* map dcr access to existing qemu dcr emulation */
730 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
732 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
733 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
735 return 0;
738 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
740 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
741 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
743 return 0;
746 int kvm_arch_handle_exit(CPUPPCState *env, struct kvm_run *run)
748 int ret;
750 switch (run->exit_reason) {
751 case KVM_EXIT_DCR:
752 if (run->dcr.is_write) {
753 dprintf("handle dcr write\n");
754 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
755 } else {
756 dprintf("handle dcr read\n");
757 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
759 break;
760 case KVM_EXIT_HLT:
761 dprintf("handle halt\n");
762 ret = kvmppc_handle_halt(env);
763 break;
764 #ifdef CONFIG_PSERIES
765 case KVM_EXIT_PAPR_HCALL:
766 dprintf("handle PAPR hypercall\n");
767 run->papr_hcall.ret = spapr_hypercall(env, run->papr_hcall.nr,
768 run->papr_hcall.args);
769 ret = 0;
770 break;
771 #endif
772 default:
773 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
774 ret = -1;
775 break;
778 return ret;
781 static int read_cpuinfo(const char *field, char *value, int len)
783 FILE *f;
784 int ret = -1;
785 int field_len = strlen(field);
786 char line[512];
788 f = fopen("/proc/cpuinfo", "r");
789 if (!f) {
790 return -1;
793 do {
794 if(!fgets(line, sizeof(line), f)) {
795 break;
797 if (!strncmp(line, field, field_len)) {
798 pstrcpy(value, len, line);
799 ret = 0;
800 break;
802 } while(*line);
804 fclose(f);
806 return ret;
809 uint32_t kvmppc_get_tbfreq(void)
811 char line[512];
812 char *ns;
813 uint32_t retval = get_ticks_per_sec();
815 if (read_cpuinfo("timebase", line, sizeof(line))) {
816 return retval;
819 if (!(ns = strchr(line, ':'))) {
820 return retval;
823 ns++;
825 retval = atoi(ns);
826 return retval;
829 /* Try to find a device tree node for a CPU with clock-frequency property */
830 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
832 struct dirent *dirp;
833 DIR *dp;
835 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
836 printf("Can't open directory " PROC_DEVTREE_CPU "\n");
837 return -1;
840 buf[0] = '\0';
841 while ((dirp = readdir(dp)) != NULL) {
842 FILE *f;
843 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
844 dirp->d_name);
845 f = fopen(buf, "r");
846 if (f) {
847 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
848 fclose(f);
849 break;
851 buf[0] = '\0';
853 closedir(dp);
854 if (buf[0] == '\0') {
855 printf("Unknown host!\n");
856 return -1;
859 return 0;
862 /* Read a CPU node property from the host device tree that's a single
863 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
864 * (can't find or open the property, or doesn't understand the
865 * format) */
866 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
868 char buf[PATH_MAX];
869 union {
870 uint32_t v32;
871 uint64_t v64;
872 } u;
873 FILE *f;
874 int len;
876 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
877 return -1;
880 strncat(buf, "/", sizeof(buf) - strlen(buf));
881 strncat(buf, propname, sizeof(buf) - strlen(buf));
883 f = fopen(buf, "rb");
884 if (!f) {
885 return -1;
888 len = fread(&u, 1, sizeof(u), f);
889 fclose(f);
890 switch (len) {
891 case 4:
892 /* property is a 32-bit quantity */
893 return be32_to_cpu(u.v32);
894 case 8:
895 return be64_to_cpu(u.v64);
898 return 0;
901 uint64_t kvmppc_get_clockfreq(void)
903 return kvmppc_read_int_cpu_dt("clock-frequency");
906 uint32_t kvmppc_get_vmx(void)
908 return kvmppc_read_int_cpu_dt("ibm,vmx");
911 uint32_t kvmppc_get_dfp(void)
913 return kvmppc_read_int_cpu_dt("ibm,dfp");
916 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
918 uint32_t *hc = (uint32_t*)buf;
920 struct kvm_ppc_pvinfo pvinfo;
922 if (kvm_check_extension(env->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
923 !kvm_vm_ioctl(env->kvm_state, KVM_PPC_GET_PVINFO, &pvinfo)) {
924 memcpy(buf, pvinfo.hcall, buf_len);
926 return 0;
930 * Fallback to always fail hypercalls:
932 * li r3, -1
933 * nop
934 * nop
935 * nop
938 hc[0] = 0x3860ffff;
939 hc[1] = 0x60000000;
940 hc[2] = 0x60000000;
941 hc[3] = 0x60000000;
943 return 0;
946 void kvmppc_set_papr(CPUPPCState *env)
948 struct kvm_enable_cap cap = {};
949 struct kvm_one_reg reg = {};
950 struct kvm_sregs sregs = {};
951 int ret;
952 uint64_t hior = env->spr[SPR_HIOR];
954 cap.cap = KVM_CAP_PPC_PAPR;
955 ret = kvm_vcpu_ioctl(env, KVM_ENABLE_CAP, &cap);
957 if (ret) {
958 goto fail;
962 * XXX We set HIOR here. It really should be a qdev property of
963 * the CPU node, but we don't have CPUs converted to qdev yet.
965 * Once we have qdev CPUs, move HIOR to a qdev property and
966 * remove this chunk.
968 reg.id = KVM_REG_PPC_HIOR;
969 reg.addr = (uintptr_t)&hior;
970 ret = kvm_vcpu_ioctl(env, KVM_SET_ONE_REG, &reg);
971 if (ret) {
972 fprintf(stderr, "Couldn't set HIOR. Maybe you're running an old \n"
973 "kernel with support for HV KVM but no PAPR PR \n"
974 "KVM in which case things will work. If they don't \n"
975 "please update your host kernel!\n");
978 /* Set SDR1 so kernel space finds the HTAB */
979 ret = kvm_vcpu_ioctl(env, KVM_GET_SREGS, &sregs);
980 if (ret) {
981 goto fail;
984 sregs.u.s.sdr1 = env->spr[SPR_SDR1];
986 ret = kvm_vcpu_ioctl(env, KVM_SET_SREGS, &sregs);
987 if (ret) {
988 goto fail;
991 return;
993 fail:
994 cpu_abort(env, "This KVM version does not support PAPR\n");
997 int kvmppc_smt_threads(void)
999 return cap_ppc_smt ? cap_ppc_smt : 1;
1002 off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem)
1004 void *rma;
1005 off_t size;
1006 int fd;
1007 struct kvm_allocate_rma ret;
1008 MemoryRegion *rma_region;
1010 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
1011 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
1012 * not necessary on this hardware
1013 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
1015 * FIXME: We should allow the user to force contiguous RMA
1016 * allocation in the cap_ppc_rma==1 case.
1018 if (cap_ppc_rma < 2) {
1019 return 0;
1022 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
1023 if (fd < 0) {
1024 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
1025 strerror(errno));
1026 return -1;
1029 size = MIN(ret.rma_size, 256ul << 20);
1031 rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1032 if (rma == MAP_FAILED) {
1033 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
1034 return -1;
1037 rma_region = g_new(MemoryRegion, 1);
1038 memory_region_init_ram_ptr(rma_region, name, size, rma);
1039 vmstate_register_ram_global(rma_region);
1040 memory_region_add_subregion(sysmem, 0, rma_region);
1042 return size;
1045 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd)
1047 struct kvm_create_spapr_tce args = {
1048 .liobn = liobn,
1049 .window_size = window_size,
1051 long len;
1052 int fd;
1053 void *table;
1055 /* Must set fd to -1 so we don't try to munmap when called for
1056 * destroying the table, which the upper layers -will- do
1058 *pfd = -1;
1059 if (!cap_spapr_tce) {
1060 return NULL;
1063 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
1064 if (fd < 0) {
1065 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
1066 liobn);
1067 return NULL;
1070 len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(sPAPRTCE);
1071 /* FIXME: round this up to page size */
1073 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1074 if (table == MAP_FAILED) {
1075 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
1076 liobn);
1077 close(fd);
1078 return NULL;
1081 *pfd = fd;
1082 return table;
1085 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t window_size)
1087 long len;
1089 if (fd < 0) {
1090 return -1;
1093 len = (window_size / SPAPR_TCE_PAGE_SIZE)*sizeof(sPAPRTCE);
1094 if ((munmap(table, len) < 0) ||
1095 (close(fd) < 0)) {
1096 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
1097 strerror(errno));
1098 /* Leak the table */
1101 return 0;
1104 static inline uint32_t mfpvr(void)
1106 uint32_t pvr;
1108 asm ("mfpvr %0"
1109 : "=r"(pvr));
1110 return pvr;
1113 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
1115 if (on) {
1116 *word |= flags;
1117 } else {
1118 *word &= ~flags;
1122 const ppc_def_t *kvmppc_host_cpu_def(void)
1124 uint32_t host_pvr = mfpvr();
1125 const ppc_def_t *base_spec;
1126 ppc_def_t *spec;
1127 uint32_t vmx = kvmppc_get_vmx();
1128 uint32_t dfp = kvmppc_get_dfp();
1130 base_spec = ppc_find_by_pvr(host_pvr);
1132 spec = g_malloc0(sizeof(*spec));
1133 memcpy(spec, base_spec, sizeof(*spec));
1135 /* Now fix up the spec with information we can query from the host */
1137 if (vmx != -1) {
1138 /* Only override when we know what the host supports */
1139 alter_insns(&spec->insns_flags, PPC_ALTIVEC, vmx > 0);
1140 alter_insns(&spec->insns_flags2, PPC2_VSX, vmx > 1);
1142 if (dfp != -1) {
1143 /* Only override when we know what the host supports */
1144 alter_insns(&spec->insns_flags2, PPC2_DFP, dfp);
1147 return spec;
1150 int kvmppc_fixup_cpu(CPUPPCState *env)
1152 int smt;
1154 /* Adjust cpu index for SMT */
1155 smt = kvmppc_smt_threads();
1156 env->cpu_index = (env->cpu_index / smp_threads) * smt
1157 + (env->cpu_index % smp_threads);
1159 return 0;
1163 bool kvm_arch_stop_on_emulation_error(CPUPPCState *env)
1165 return true;
1168 int kvm_arch_on_sigbus_vcpu(CPUPPCState *env, int code, void *addr)
1170 return 1;
1173 int kvm_arch_on_sigbus(int code, void *addr)
1175 return 1;