nbd: Remove unused functions
[qemu/ar7.git] / target-ppc / kvm.c
blobe663ff0acb5899789d8b689a747df5f9d2ef9f1e
1 /*
2 * PowerPC implementation of KVM hooks
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
7 * Authors:
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
17 #include <dirent.h>
18 #include <sys/types.h>
19 #include <sys/ioctl.h>
20 #include <sys/mman.h>
21 #include <sys/vfs.h>
23 #include <linux/kvm.h>
25 #include "qemu-common.h"
26 #include "qemu/timer.h"
27 #include "sysemu/sysemu.h"
28 #include "sysemu/kvm.h"
29 #include "kvm_ppc.h"
30 #include "cpu.h"
31 #include "sysemu/cpus.h"
32 #include "sysemu/device_tree.h"
33 #include "hw/sysbus.h"
34 #include "hw/spapr.h"
36 #include "hw/sysbus.h"
37 #include "hw/spapr.h"
38 #include "hw/spapr_vio.h"
40 //#define DEBUG_KVM
42 #ifdef DEBUG_KVM
43 #define dprintf(fmt, ...) \
44 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
45 #else
46 #define dprintf(fmt, ...) \
47 do { } while (0)
48 #endif
50 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
52 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
53 KVM_CAP_LAST_INFO
56 static int cap_interrupt_unset = false;
57 static int cap_interrupt_level = false;
58 static int cap_segstate;
59 static int cap_booke_sregs;
60 static int cap_ppc_smt;
61 static int cap_ppc_rma;
62 static int cap_spapr_tce;
63 static int cap_hior;
64 static int cap_one_reg;
66 /* XXX We have a race condition where we actually have a level triggered
67 * interrupt, but the infrastructure can't expose that yet, so the guest
68 * takes but ignores it, goes to sleep and never gets notified that there's
69 * still an interrupt pending.
71 * As a quick workaround, let's just wake up again 20 ms after we injected
72 * an interrupt. That way we can assure that we're always reinjecting
73 * interrupts in case the guest swallowed them.
75 static QEMUTimer *idle_timer;
77 static void kvm_kick_cpu(void *opaque)
79 PowerPCCPU *cpu = opaque;
81 qemu_cpu_kick(CPU(cpu));
84 static int kvm_ppc_register_host_cpu_type(void);
86 int kvm_arch_init(KVMState *s)
88 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
89 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
90 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
91 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
92 cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
93 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
94 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
95 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
96 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
98 if (!cap_interrupt_level) {
99 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
100 "VM to stall at times!\n");
103 kvm_ppc_register_host_cpu_type();
105 return 0;
108 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
110 CPUPPCState *cenv = &cpu->env;
111 CPUState *cs = CPU(cpu);
112 struct kvm_sregs sregs;
113 int ret;
115 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
116 /* What we're really trying to say is "if we're on BookE, we use
117 the native PVR for now". This is the only sane way to check
118 it though, so we potentially confuse users that they can run
119 BookE guests on BookS. Let's hope nobody dares enough :) */
120 return 0;
121 } else {
122 if (!cap_segstate) {
123 fprintf(stderr, "kvm error: missing PVR setting capability\n");
124 return -ENOSYS;
128 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
129 if (ret) {
130 return ret;
133 sregs.pvr = cenv->spr[SPR_PVR];
134 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
137 /* Set up a shared TLB array with KVM */
138 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
140 CPUPPCState *env = &cpu->env;
141 CPUState *cs = CPU(cpu);
142 struct kvm_book3e_206_tlb_params params = {};
143 struct kvm_config_tlb cfg = {};
144 struct kvm_enable_cap encap = {};
145 unsigned int entries = 0;
146 int ret, i;
148 if (!kvm_enabled() ||
149 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
150 return 0;
153 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
155 for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
156 params.tlb_sizes[i] = booke206_tlb_size(env, i);
157 params.tlb_ways[i] = booke206_tlb_ways(env, i);
158 entries += params.tlb_sizes[i];
161 assert(entries == env->nb_tlb);
162 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
164 env->tlb_dirty = true;
166 cfg.array = (uintptr_t)env->tlb.tlbm;
167 cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
168 cfg.params = (uintptr_t)&params;
169 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
171 encap.cap = KVM_CAP_SW_TLB;
172 encap.args[0] = (uintptr_t)&cfg;
174 ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &encap);
175 if (ret < 0) {
176 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
177 __func__, strerror(-ret));
178 return ret;
181 env->kvm_sw_tlb = true;
182 return 0;
186 #if defined(TARGET_PPC64)
187 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
188 struct kvm_ppc_smmu_info *info)
190 CPUPPCState *env = &cpu->env;
191 CPUState *cs = CPU(cpu);
193 memset(info, 0, sizeof(*info));
195 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
196 * need to "guess" what the supported page sizes are.
198 * For that to work we make a few assumptions:
200 * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
201 * KVM which only supports 4K and 16M pages, but supports them
202 * regardless of the backing store characteritics. We also don't
203 * support 1T segments.
205 * This is safe as if HV KVM ever supports that capability or PR
206 * KVM grows supports for more page/segment sizes, those versions
207 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
208 * will not hit this fallback
210 * - Else we are running HV KVM. This means we only support page
211 * sizes that fit in the backing store. Additionally we only
212 * advertize 64K pages if the processor is ARCH 2.06 and we assume
213 * P7 encodings for the SLB and hash table. Here too, we assume
214 * support for any newer processor will mean a kernel that
215 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
216 * this fallback.
218 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
219 /* No flags */
220 info->flags = 0;
221 info->slb_size = 64;
223 /* Standard 4k base page size segment */
224 info->sps[0].page_shift = 12;
225 info->sps[0].slb_enc = 0;
226 info->sps[0].enc[0].page_shift = 12;
227 info->sps[0].enc[0].pte_enc = 0;
229 /* Standard 16M large page size segment */
230 info->sps[1].page_shift = 24;
231 info->sps[1].slb_enc = SLB_VSID_L;
232 info->sps[1].enc[0].page_shift = 24;
233 info->sps[1].enc[0].pte_enc = 0;
234 } else {
235 int i = 0;
237 /* HV KVM has backing store size restrictions */
238 info->flags = KVM_PPC_PAGE_SIZES_REAL;
240 if (env->mmu_model & POWERPC_MMU_1TSEG) {
241 info->flags |= KVM_PPC_1T_SEGMENTS;
244 if (env->mmu_model == POWERPC_MMU_2_06) {
245 info->slb_size = 32;
246 } else {
247 info->slb_size = 64;
250 /* Standard 4k base page size segment */
251 info->sps[i].page_shift = 12;
252 info->sps[i].slb_enc = 0;
253 info->sps[i].enc[0].page_shift = 12;
254 info->sps[i].enc[0].pte_enc = 0;
255 i++;
257 /* 64K on MMU 2.06 */
258 if (env->mmu_model == POWERPC_MMU_2_06) {
259 info->sps[i].page_shift = 16;
260 info->sps[i].slb_enc = 0x110;
261 info->sps[i].enc[0].page_shift = 16;
262 info->sps[i].enc[0].pte_enc = 1;
263 i++;
266 /* Standard 16M large page size segment */
267 info->sps[i].page_shift = 24;
268 info->sps[i].slb_enc = SLB_VSID_L;
269 info->sps[i].enc[0].page_shift = 24;
270 info->sps[i].enc[0].pte_enc = 0;
274 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
276 CPUState *cs = CPU(cpu);
277 int ret;
279 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
280 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
281 if (ret == 0) {
282 return;
286 kvm_get_fallback_smmu_info(cpu, info);
289 static long getrampagesize(void)
291 struct statfs fs;
292 int ret;
294 if (!mem_path) {
295 /* guest RAM is backed by normal anonymous pages */
296 return getpagesize();
299 do {
300 ret = statfs(mem_path, &fs);
301 } while (ret != 0 && errno == EINTR);
303 if (ret != 0) {
304 fprintf(stderr, "Couldn't statfs() memory path: %s\n",
305 strerror(errno));
306 exit(1);
309 #define HUGETLBFS_MAGIC 0x958458f6
311 if (fs.f_type != HUGETLBFS_MAGIC) {
312 /* Explicit mempath, but it's ordinary pages */
313 return getpagesize();
316 /* It's hugepage, return the huge page size */
317 return fs.f_bsize;
320 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
322 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
323 return true;
326 return (1ul << shift) <= rampgsize;
329 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
331 static struct kvm_ppc_smmu_info smmu_info;
332 static bool has_smmu_info;
333 CPUPPCState *env = &cpu->env;
334 long rampagesize;
335 int iq, ik, jq, jk;
337 /* We only handle page sizes for 64-bit server guests for now */
338 if (!(env->mmu_model & POWERPC_MMU_64)) {
339 return;
342 /* Collect MMU info from kernel if not already */
343 if (!has_smmu_info) {
344 kvm_get_smmu_info(cpu, &smmu_info);
345 has_smmu_info = true;
348 rampagesize = getrampagesize();
350 /* Convert to QEMU form */
351 memset(&env->sps, 0, sizeof(env->sps));
353 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
354 struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
355 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
357 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
358 ksps->page_shift)) {
359 continue;
361 qsps->page_shift = ksps->page_shift;
362 qsps->slb_enc = ksps->slb_enc;
363 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
364 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
365 ksps->enc[jk].page_shift)) {
366 continue;
368 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
369 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
370 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
371 break;
374 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
375 break;
378 env->slb_nr = smmu_info.slb_size;
379 if (smmu_info.flags & KVM_PPC_1T_SEGMENTS) {
380 env->mmu_model |= POWERPC_MMU_1TSEG;
381 } else {
382 env->mmu_model &= ~POWERPC_MMU_1TSEG;
385 #else /* defined (TARGET_PPC64) */
387 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
391 #endif /* !defined (TARGET_PPC64) */
393 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
395 return cpu->cpu_index;
398 int kvm_arch_init_vcpu(CPUState *cs)
400 PowerPCCPU *cpu = POWERPC_CPU(cs);
401 CPUPPCState *cenv = &cpu->env;
402 int ret;
404 /* Gather server mmu info from KVM and update the CPU state */
405 kvm_fixup_page_sizes(cpu);
407 /* Synchronize sregs with kvm */
408 ret = kvm_arch_sync_sregs(cpu);
409 if (ret) {
410 return ret;
413 idle_timer = qemu_new_timer_ns(vm_clock, kvm_kick_cpu, cpu);
415 /* Some targets support access to KVM's guest TLB. */
416 switch (cenv->mmu_model) {
417 case POWERPC_MMU_BOOKE206:
418 ret = kvm_booke206_tlb_init(cpu);
419 break;
420 default:
421 break;
424 return ret;
427 void kvm_arch_reset_vcpu(CPUState *cpu)
431 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
433 CPUPPCState *env = &cpu->env;
434 CPUState *cs = CPU(cpu);
435 struct kvm_dirty_tlb dirty_tlb;
436 unsigned char *bitmap;
437 int ret;
439 if (!env->kvm_sw_tlb) {
440 return;
443 bitmap = g_malloc((env->nb_tlb + 7) / 8);
444 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
446 dirty_tlb.bitmap = (uintptr_t)bitmap;
447 dirty_tlb.num_dirty = env->nb_tlb;
449 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
450 if (ret) {
451 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
452 __func__, strerror(-ret));
455 g_free(bitmap);
458 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
460 PowerPCCPU *cpu = POWERPC_CPU(cs);
461 CPUPPCState *env = &cpu->env;
462 union {
463 uint32_t u32;
464 uint64_t u64;
465 } val;
466 struct kvm_one_reg reg = {
467 .id = id,
468 .addr = (uintptr_t) &val,
470 int ret;
472 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
473 if (ret != 0) {
474 fprintf(stderr, "Warning: Unable to retrieve SPR %d from KVM: %s\n",
475 spr, strerror(errno));
476 } else {
477 switch (id & KVM_REG_SIZE_MASK) {
478 case KVM_REG_SIZE_U32:
479 env->spr[spr] = val.u32;
480 break;
482 case KVM_REG_SIZE_U64:
483 env->spr[spr] = val.u64;
484 break;
486 default:
487 /* Don't handle this size yet */
488 abort();
493 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
495 PowerPCCPU *cpu = POWERPC_CPU(cs);
496 CPUPPCState *env = &cpu->env;
497 union {
498 uint32_t u32;
499 uint64_t u64;
500 } val;
501 struct kvm_one_reg reg = {
502 .id = id,
503 .addr = (uintptr_t) &val,
505 int ret;
507 switch (id & KVM_REG_SIZE_MASK) {
508 case KVM_REG_SIZE_U32:
509 val.u32 = env->spr[spr];
510 break;
512 case KVM_REG_SIZE_U64:
513 val.u64 = env->spr[spr];
514 break;
516 default:
517 /* Don't handle this size yet */
518 abort();
521 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
522 if (ret != 0) {
523 fprintf(stderr, "Warning: Unable to set SPR %d to KVM: %s\n",
524 spr, strerror(errno));
528 static int kvm_put_fp(CPUState *cs)
530 PowerPCCPU *cpu = POWERPC_CPU(cs);
531 CPUPPCState *env = &cpu->env;
532 struct kvm_one_reg reg;
533 int i;
534 int ret;
536 if (env->insns_flags & PPC_FLOAT) {
537 uint64_t fpscr = env->fpscr;
538 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
540 reg.id = KVM_REG_PPC_FPSCR;
541 reg.addr = (uintptr_t)&fpscr;
542 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
543 if (ret < 0) {
544 dprintf("Unable to set FPSCR to KVM: %s\n", strerror(errno));
545 return ret;
548 for (i = 0; i < 32; i++) {
549 uint64_t vsr[2];
551 vsr[0] = float64_val(env->fpr[i]);
552 vsr[1] = env->vsr[i];
553 reg.addr = (uintptr_t) &vsr;
554 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
556 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
557 if (ret < 0) {
558 dprintf("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
559 i, strerror(errno));
560 return ret;
565 if (env->insns_flags & PPC_ALTIVEC) {
566 reg.id = KVM_REG_PPC_VSCR;
567 reg.addr = (uintptr_t)&env->vscr;
568 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
569 if (ret < 0) {
570 dprintf("Unable to set VSCR to KVM: %s\n", strerror(errno));
571 return ret;
574 for (i = 0; i < 32; i++) {
575 reg.id = KVM_REG_PPC_VR(i);
576 reg.addr = (uintptr_t)&env->avr[i];
577 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
578 if (ret < 0) {
579 dprintf("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
580 return ret;
585 return 0;
588 static int kvm_get_fp(CPUState *cs)
590 PowerPCCPU *cpu = POWERPC_CPU(cs);
591 CPUPPCState *env = &cpu->env;
592 struct kvm_one_reg reg;
593 int i;
594 int ret;
596 if (env->insns_flags & PPC_FLOAT) {
597 uint64_t fpscr;
598 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
600 reg.id = KVM_REG_PPC_FPSCR;
601 reg.addr = (uintptr_t)&fpscr;
602 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
603 if (ret < 0) {
604 dprintf("Unable to get FPSCR from KVM: %s\n", strerror(errno));
605 return ret;
606 } else {
607 env->fpscr = fpscr;
610 for (i = 0; i < 32; i++) {
611 uint64_t vsr[2];
613 reg.addr = (uintptr_t) &vsr;
614 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
616 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
617 if (ret < 0) {
618 dprintf("Unable to get %s%d from KVM: %s\n",
619 vsx ? "VSR" : "FPR", i, strerror(errno));
620 return ret;
621 } else {
622 env->fpr[i] = vsr[0];
623 if (vsx) {
624 env->vsr[i] = vsr[1];
630 if (env->insns_flags & PPC_ALTIVEC) {
631 reg.id = KVM_REG_PPC_VSCR;
632 reg.addr = (uintptr_t)&env->vscr;
633 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
634 if (ret < 0) {
635 dprintf("Unable to get VSCR from KVM: %s\n", strerror(errno));
636 return ret;
639 for (i = 0; i < 32; i++) {
640 reg.id = KVM_REG_PPC_VR(i);
641 reg.addr = (uintptr_t)&env->avr[i];
642 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
643 if (ret < 0) {
644 dprintf("Unable to get VR%d from KVM: %s\n",
645 i, strerror(errno));
646 return ret;
651 return 0;
654 int kvm_arch_put_registers(CPUState *cs, int level)
656 PowerPCCPU *cpu = POWERPC_CPU(cs);
657 CPUPPCState *env = &cpu->env;
658 struct kvm_regs regs;
659 int ret;
660 int i;
662 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
663 if (ret < 0) {
664 return ret;
667 regs.ctr = env->ctr;
668 regs.lr = env->lr;
669 regs.xer = cpu_read_xer(env);
670 regs.msr = env->msr;
671 regs.pc = env->nip;
673 regs.srr0 = env->spr[SPR_SRR0];
674 regs.srr1 = env->spr[SPR_SRR1];
676 regs.sprg0 = env->spr[SPR_SPRG0];
677 regs.sprg1 = env->spr[SPR_SPRG1];
678 regs.sprg2 = env->spr[SPR_SPRG2];
679 regs.sprg3 = env->spr[SPR_SPRG3];
680 regs.sprg4 = env->spr[SPR_SPRG4];
681 regs.sprg5 = env->spr[SPR_SPRG5];
682 regs.sprg6 = env->spr[SPR_SPRG6];
683 regs.sprg7 = env->spr[SPR_SPRG7];
685 regs.pid = env->spr[SPR_BOOKE_PID];
687 for (i = 0;i < 32; i++)
688 regs.gpr[i] = env->gpr[i];
690 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
691 if (ret < 0)
692 return ret;
694 kvm_put_fp(cs);
696 if (env->tlb_dirty) {
697 kvm_sw_tlb_put(cpu);
698 env->tlb_dirty = false;
701 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
702 struct kvm_sregs sregs;
704 sregs.pvr = env->spr[SPR_PVR];
706 sregs.u.s.sdr1 = env->spr[SPR_SDR1];
708 /* Sync SLB */
709 #ifdef TARGET_PPC64
710 for (i = 0; i < 64; i++) {
711 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
712 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
714 #endif
716 /* Sync SRs */
717 for (i = 0; i < 16; i++) {
718 sregs.u.s.ppc32.sr[i] = env->sr[i];
721 /* Sync BATs */
722 for (i = 0; i < 8; i++) {
723 /* Beware. We have to swap upper and lower bits here */
724 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
725 | env->DBAT[1][i];
726 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
727 | env->IBAT[1][i];
730 ret = kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
731 if (ret) {
732 return ret;
736 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
737 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
740 if (cap_one_reg) {
741 int i;
743 /* We deliberately ignore errors here, for kernels which have
744 * the ONE_REG calls, but don't support the specific
745 * registers, there's a reasonable chance things will still
746 * work, at least until we try to migrate. */
747 for (i = 0; i < 1024; i++) {
748 uint64_t id = env->spr_cb[i].one_reg_id;
750 if (id != 0) {
751 kvm_put_one_spr(cs, id, i);
756 return ret;
759 int kvm_arch_get_registers(CPUState *cs)
761 PowerPCCPU *cpu = POWERPC_CPU(cs);
762 CPUPPCState *env = &cpu->env;
763 struct kvm_regs regs;
764 struct kvm_sregs sregs;
765 uint32_t cr;
766 int i, ret;
768 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
769 if (ret < 0)
770 return ret;
772 cr = regs.cr;
773 for (i = 7; i >= 0; i--) {
774 env->crf[i] = cr & 15;
775 cr >>= 4;
778 env->ctr = regs.ctr;
779 env->lr = regs.lr;
780 cpu_write_xer(env, regs.xer);
781 env->msr = regs.msr;
782 env->nip = regs.pc;
784 env->spr[SPR_SRR0] = regs.srr0;
785 env->spr[SPR_SRR1] = regs.srr1;
787 env->spr[SPR_SPRG0] = regs.sprg0;
788 env->spr[SPR_SPRG1] = regs.sprg1;
789 env->spr[SPR_SPRG2] = regs.sprg2;
790 env->spr[SPR_SPRG3] = regs.sprg3;
791 env->spr[SPR_SPRG4] = regs.sprg4;
792 env->spr[SPR_SPRG5] = regs.sprg5;
793 env->spr[SPR_SPRG6] = regs.sprg6;
794 env->spr[SPR_SPRG7] = regs.sprg7;
796 env->spr[SPR_BOOKE_PID] = regs.pid;
798 for (i = 0;i < 32; i++)
799 env->gpr[i] = regs.gpr[i];
801 kvm_get_fp(cs);
803 if (cap_booke_sregs) {
804 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
805 if (ret < 0) {
806 return ret;
809 if (sregs.u.e.features & KVM_SREGS_E_BASE) {
810 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
811 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
812 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
813 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
814 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
815 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
816 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
817 env->spr[SPR_DECR] = sregs.u.e.dec;
818 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
819 env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
820 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
823 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
824 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
825 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
826 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
827 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
828 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
831 if (sregs.u.e.features & KVM_SREGS_E_64) {
832 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
835 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
836 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
839 if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
840 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
841 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
842 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
843 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
844 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
845 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
846 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
847 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
848 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
849 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
850 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
851 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
852 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
853 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
854 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
855 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
857 if (sregs.u.e.features & KVM_SREGS_E_SPE) {
858 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
859 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
860 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
863 if (sregs.u.e.features & KVM_SREGS_E_PM) {
864 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
867 if (sregs.u.e.features & KVM_SREGS_E_PC) {
868 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
869 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
873 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
874 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
875 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
876 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
877 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
878 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
879 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
880 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
881 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
882 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
883 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
886 if (sregs.u.e.features & KVM_SREGS_EXP) {
887 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
890 if (sregs.u.e.features & KVM_SREGS_E_PD) {
891 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
892 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
895 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
896 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
897 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
898 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
900 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
901 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
902 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
907 if (cap_segstate) {
908 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
909 if (ret < 0) {
910 return ret;
913 ppc_store_sdr1(env, sregs.u.s.sdr1);
915 /* Sync SLB */
916 #ifdef TARGET_PPC64
917 for (i = 0; i < 64; i++) {
918 ppc_store_slb(env, sregs.u.s.ppc64.slb[i].slbe,
919 sregs.u.s.ppc64.slb[i].slbv);
921 #endif
923 /* Sync SRs */
924 for (i = 0; i < 16; i++) {
925 env->sr[i] = sregs.u.s.ppc32.sr[i];
928 /* Sync BATs */
929 for (i = 0; i < 8; i++) {
930 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
931 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
932 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
933 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
937 if (cap_hior) {
938 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
941 if (cap_one_reg) {
942 int i;
944 /* We deliberately ignore errors here, for kernels which have
945 * the ONE_REG calls, but don't support the specific
946 * registers, there's a reasonable chance things will still
947 * work, at least until we try to migrate. */
948 for (i = 0; i < 1024; i++) {
949 uint64_t id = env->spr_cb[i].one_reg_id;
951 if (id != 0) {
952 kvm_get_one_spr(cs, id, i);
957 return 0;
960 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
962 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
964 if (irq != PPC_INTERRUPT_EXT) {
965 return 0;
968 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
969 return 0;
972 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
974 return 0;
977 #if defined(TARGET_PPCEMB)
978 #define PPC_INPUT_INT PPC40x_INPUT_INT
979 #elif defined(TARGET_PPC64)
980 #define PPC_INPUT_INT PPC970_INPUT_INT
981 #else
982 #define PPC_INPUT_INT PPC6xx_INPUT_INT
983 #endif
985 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
987 PowerPCCPU *cpu = POWERPC_CPU(cs);
988 CPUPPCState *env = &cpu->env;
989 int r;
990 unsigned irq;
992 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
993 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
994 if (!cap_interrupt_level &&
995 run->ready_for_interrupt_injection &&
996 (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
997 (env->irq_input_state & (1<<PPC_INPUT_INT)))
999 /* For now KVM disregards the 'irq' argument. However, in the
1000 * future KVM could cache it in-kernel to avoid a heavyweight exit
1001 * when reading the UIC.
1003 irq = KVM_INTERRUPT_SET;
1005 dprintf("injected interrupt %d\n", irq);
1006 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1007 if (r < 0) {
1008 printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1011 /* Always wake up soon in case the interrupt was level based */
1012 qemu_mod_timer(idle_timer, qemu_get_clock_ns(vm_clock) +
1013 (get_ticks_per_sec() / 50));
1016 /* We don't know if there are more interrupts pending after this. However,
1017 * the guest will return to userspace in the course of handling this one
1018 * anyways, so we will get a chance to deliver the rest. */
1021 void kvm_arch_post_run(CPUState *cpu, struct kvm_run *run)
1025 int kvm_arch_process_async_events(CPUState *cs)
1027 return cs->halted;
1030 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1032 CPUState *cs = CPU(cpu);
1033 CPUPPCState *env = &cpu->env;
1035 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1036 cs->halted = 1;
1037 env->exception_index = EXCP_HLT;
1040 return 0;
1043 /* map dcr access to existing qemu dcr emulation */
1044 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1046 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1047 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1049 return 0;
1052 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1054 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1055 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1057 return 0;
1060 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1062 PowerPCCPU *cpu = POWERPC_CPU(cs);
1063 CPUPPCState *env = &cpu->env;
1064 int ret;
1066 switch (run->exit_reason) {
1067 case KVM_EXIT_DCR:
1068 if (run->dcr.is_write) {
1069 dprintf("handle dcr write\n");
1070 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1071 } else {
1072 dprintf("handle dcr read\n");
1073 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1075 break;
1076 case KVM_EXIT_HLT:
1077 dprintf("handle halt\n");
1078 ret = kvmppc_handle_halt(cpu);
1079 break;
1080 #ifdef CONFIG_PSERIES
1081 case KVM_EXIT_PAPR_HCALL:
1082 dprintf("handle PAPR hypercall\n");
1083 run->papr_hcall.ret = spapr_hypercall(cpu,
1084 run->papr_hcall.nr,
1085 run->papr_hcall.args);
1086 ret = 0;
1087 break;
1088 #endif
1089 case KVM_EXIT_EPR:
1090 dprintf("handle epr\n");
1091 run->epr.epr = ldl_phys(env->mpic_iack);
1092 ret = 0;
1093 break;
1094 default:
1095 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1096 ret = -1;
1097 break;
1100 return ret;
1103 static int read_cpuinfo(const char *field, char *value, int len)
1105 FILE *f;
1106 int ret = -1;
1107 int field_len = strlen(field);
1108 char line[512];
1110 f = fopen("/proc/cpuinfo", "r");
1111 if (!f) {
1112 return -1;
1115 do {
1116 if(!fgets(line, sizeof(line), f)) {
1117 break;
1119 if (!strncmp(line, field, field_len)) {
1120 pstrcpy(value, len, line);
1121 ret = 0;
1122 break;
1124 } while(*line);
1126 fclose(f);
1128 return ret;
1131 uint32_t kvmppc_get_tbfreq(void)
1133 char line[512];
1134 char *ns;
1135 uint32_t retval = get_ticks_per_sec();
1137 if (read_cpuinfo("timebase", line, sizeof(line))) {
1138 return retval;
1141 if (!(ns = strchr(line, ':'))) {
1142 return retval;
1145 ns++;
1147 retval = atoi(ns);
1148 return retval;
1151 /* Try to find a device tree node for a CPU with clock-frequency property */
1152 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1154 struct dirent *dirp;
1155 DIR *dp;
1157 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1158 printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1159 return -1;
1162 buf[0] = '\0';
1163 while ((dirp = readdir(dp)) != NULL) {
1164 FILE *f;
1165 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1166 dirp->d_name);
1167 f = fopen(buf, "r");
1168 if (f) {
1169 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1170 fclose(f);
1171 break;
1173 buf[0] = '\0';
1175 closedir(dp);
1176 if (buf[0] == '\0') {
1177 printf("Unknown host!\n");
1178 return -1;
1181 return 0;
1184 /* Read a CPU node property from the host device tree that's a single
1185 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1186 * (can't find or open the property, or doesn't understand the
1187 * format) */
1188 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1190 char buf[PATH_MAX];
1191 union {
1192 uint32_t v32;
1193 uint64_t v64;
1194 } u;
1195 FILE *f;
1196 int len;
1198 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1199 return -1;
1202 strncat(buf, "/", sizeof(buf) - strlen(buf));
1203 strncat(buf, propname, sizeof(buf) - strlen(buf));
1205 f = fopen(buf, "rb");
1206 if (!f) {
1207 return -1;
1210 len = fread(&u, 1, sizeof(u), f);
1211 fclose(f);
1212 switch (len) {
1213 case 4:
1214 /* property is a 32-bit quantity */
1215 return be32_to_cpu(u.v32);
1216 case 8:
1217 return be64_to_cpu(u.v64);
1220 return 0;
1223 uint64_t kvmppc_get_clockfreq(void)
1225 return kvmppc_read_int_cpu_dt("clock-frequency");
1228 uint32_t kvmppc_get_vmx(void)
1230 return kvmppc_read_int_cpu_dt("ibm,vmx");
1233 uint32_t kvmppc_get_dfp(void)
1235 return kvmppc_read_int_cpu_dt("ibm,dfp");
1238 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1240 PowerPCCPU *cpu = ppc_env_get_cpu(env);
1241 CPUState *cs = CPU(cpu);
1243 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1244 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1245 return 0;
1248 return 1;
1251 int kvmppc_get_hasidle(CPUPPCState *env)
1253 struct kvm_ppc_pvinfo pvinfo;
1255 if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1256 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1257 return 1;
1260 return 0;
1263 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1265 uint32_t *hc = (uint32_t*)buf;
1266 struct kvm_ppc_pvinfo pvinfo;
1268 if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1269 memcpy(buf, pvinfo.hcall, buf_len);
1270 return 0;
1274 * Fallback to always fail hypercalls:
1276 * li r3, -1
1277 * nop
1278 * nop
1279 * nop
1282 hc[0] = 0x3860ffff;
1283 hc[1] = 0x60000000;
1284 hc[2] = 0x60000000;
1285 hc[3] = 0x60000000;
1287 return 0;
1290 void kvmppc_set_papr(PowerPCCPU *cpu)
1292 CPUPPCState *env = &cpu->env;
1293 CPUState *cs = CPU(cpu);
1294 struct kvm_enable_cap cap = {};
1295 int ret;
1297 cap.cap = KVM_CAP_PPC_PAPR;
1298 ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &cap);
1300 if (ret) {
1301 cpu_abort(env, "This KVM version does not support PAPR\n");
1305 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
1307 CPUPPCState *env = &cpu->env;
1308 CPUState *cs = CPU(cpu);
1309 struct kvm_enable_cap cap = {};
1310 int ret;
1312 cap.cap = KVM_CAP_PPC_EPR;
1313 cap.args[0] = mpic_proxy;
1314 ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &cap);
1316 if (ret && mpic_proxy) {
1317 cpu_abort(env, "This KVM version does not support EPR\n");
1321 int kvmppc_smt_threads(void)
1323 return cap_ppc_smt ? cap_ppc_smt : 1;
1326 #ifdef TARGET_PPC64
1327 off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem)
1329 void *rma;
1330 off_t size;
1331 int fd;
1332 struct kvm_allocate_rma ret;
1333 MemoryRegion *rma_region;
1335 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
1336 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
1337 * not necessary on this hardware
1338 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
1340 * FIXME: We should allow the user to force contiguous RMA
1341 * allocation in the cap_ppc_rma==1 case.
1343 if (cap_ppc_rma < 2) {
1344 return 0;
1347 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
1348 if (fd < 0) {
1349 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
1350 strerror(errno));
1351 return -1;
1354 size = MIN(ret.rma_size, 256ul << 20);
1356 rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1357 if (rma == MAP_FAILED) {
1358 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
1359 return -1;
1362 rma_region = g_new(MemoryRegion, 1);
1363 memory_region_init_ram_ptr(rma_region, name, size, rma);
1364 vmstate_register_ram_global(rma_region);
1365 memory_region_add_subregion(sysmem, 0, rma_region);
1367 return size;
1370 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
1372 if (cap_ppc_rma >= 2) {
1373 return current_size;
1375 return MIN(current_size,
1376 getrampagesize() << (hash_shift - 7));
1378 #endif
1380 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd)
1382 struct kvm_create_spapr_tce args = {
1383 .liobn = liobn,
1384 .window_size = window_size,
1386 long len;
1387 int fd;
1388 void *table;
1390 /* Must set fd to -1 so we don't try to munmap when called for
1391 * destroying the table, which the upper layers -will- do
1393 *pfd = -1;
1394 if (!cap_spapr_tce) {
1395 return NULL;
1398 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
1399 if (fd < 0) {
1400 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
1401 liobn);
1402 return NULL;
1405 len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(sPAPRTCE);
1406 /* FIXME: round this up to page size */
1408 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1409 if (table == MAP_FAILED) {
1410 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
1411 liobn);
1412 close(fd);
1413 return NULL;
1416 *pfd = fd;
1417 return table;
1420 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t window_size)
1422 long len;
1424 if (fd < 0) {
1425 return -1;
1428 len = (window_size / SPAPR_TCE_PAGE_SIZE)*sizeof(sPAPRTCE);
1429 if ((munmap(table, len) < 0) ||
1430 (close(fd) < 0)) {
1431 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
1432 strerror(errno));
1433 /* Leak the table */
1436 return 0;
1439 int kvmppc_reset_htab(int shift_hint)
1441 uint32_t shift = shift_hint;
1443 if (!kvm_enabled()) {
1444 /* Full emulation, tell caller to allocate htab itself */
1445 return 0;
1447 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
1448 int ret;
1449 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
1450 if (ret == -ENOTTY) {
1451 /* At least some versions of PR KVM advertise the
1452 * capability, but don't implement the ioctl(). Oops.
1453 * Return 0 so that we allocate the htab in qemu, as is
1454 * correct for PR. */
1455 return 0;
1456 } else if (ret < 0) {
1457 return ret;
1459 return shift;
1462 /* We have a kernel that predates the htab reset calls. For PR
1463 * KVM, we need to allocate the htab ourselves, for an HV KVM of
1464 * this era, it has allocated a 16MB fixed size hash table
1465 * already. Kernels of this era have the GET_PVINFO capability
1466 * only on PR, so we use this hack to determine the right
1467 * answer */
1468 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
1469 /* PR - tell caller to allocate htab */
1470 return 0;
1471 } else {
1472 /* HV - assume 16MB kernel allocated htab */
1473 return 24;
1477 static inline uint32_t mfpvr(void)
1479 uint32_t pvr;
1481 asm ("mfpvr %0"
1482 : "=r"(pvr));
1483 return pvr;
1486 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
1488 if (on) {
1489 *word |= flags;
1490 } else {
1491 *word &= ~flags;
1495 static void kvmppc_host_cpu_initfn(Object *obj)
1497 assert(kvm_enabled());
1500 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
1502 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
1503 uint32_t vmx = kvmppc_get_vmx();
1504 uint32_t dfp = kvmppc_get_dfp();
1506 /* Now fix up the class with information we can query from the host */
1508 if (vmx != -1) {
1509 /* Only override when we know what the host supports */
1510 alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
1511 alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
1513 if (dfp != -1) {
1514 /* Only override when we know what the host supports */
1515 alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
1519 int kvmppc_fixup_cpu(PowerPCCPU *cpu)
1521 CPUState *cs = CPU(cpu);
1522 int smt;
1524 /* Adjust cpu index for SMT */
1525 smt = kvmppc_smt_threads();
1526 cs->cpu_index = (cs->cpu_index / smp_threads) * smt
1527 + (cs->cpu_index % smp_threads);
1529 return 0;
1532 static int kvm_ppc_register_host_cpu_type(void)
1534 TypeInfo type_info = {
1535 .name = TYPE_HOST_POWERPC_CPU,
1536 .instance_init = kvmppc_host_cpu_initfn,
1537 .class_init = kvmppc_host_cpu_class_init,
1539 uint32_t host_pvr = mfpvr();
1540 PowerPCCPUClass *pvr_pcc;
1542 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
1543 if (pvr_pcc == NULL) {
1544 return -1;
1546 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
1547 type_register(&type_info);
1548 return 0;
1552 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
1554 return true;
1557 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
1559 return 1;
1562 int kvm_arch_on_sigbus(int code, void *addr)
1564 return 1;