migration: push qemu_savevm_state_cancel out of qemu_savevm_state_*
[qemu.git] / target-ppc / kvm.c
blob9dff7607f16813ad84cd22b2ae2d0b8bf9e994ac
1 /*
2 * PowerPC implementation of KVM hooks
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
7 * Authors:
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
17 #include <dirent.h>
18 #include <sys/types.h>
19 #include <sys/ioctl.h>
20 #include <sys/mman.h>
21 #include <sys/vfs.h>
23 #include <linux/kvm.h>
25 #include "qemu-common.h"
26 #include "qemu/timer.h"
27 #include "sysemu/sysemu.h"
28 #include "sysemu/kvm.h"
29 #include "kvm_ppc.h"
30 #include "cpu.h"
31 #include "sysemu/cpus.h"
32 #include "sysemu/device_tree.h"
33 #include "hw/sysbus.h"
34 #include "hw/spapr.h"
36 #include "hw/sysbus.h"
37 #include "hw/spapr.h"
38 #include "hw/spapr_vio.h"
40 //#define DEBUG_KVM
42 #ifdef DEBUG_KVM
43 #define dprintf(fmt, ...) \
44 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
45 #else
46 #define dprintf(fmt, ...) \
47 do { } while (0)
48 #endif
50 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
52 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
53 KVM_CAP_LAST_INFO
56 static int cap_interrupt_unset = false;
57 static int cap_interrupt_level = false;
58 static int cap_segstate;
59 static int cap_booke_sregs;
60 static int cap_ppc_smt;
61 static int cap_ppc_rma;
62 static int cap_spapr_tce;
63 static int cap_hior;
64 static int cap_one_reg;
66 /* XXX We have a race condition where we actually have a level triggered
67 * interrupt, but the infrastructure can't expose that yet, so the guest
68 * takes but ignores it, goes to sleep and never gets notified that there's
69 * still an interrupt pending.
71 * As a quick workaround, let's just wake up again 20 ms after we injected
72 * an interrupt. That way we can assure that we're always reinjecting
73 * interrupts in case the guest swallowed them.
75 static QEMUTimer *idle_timer;
77 static void kvm_kick_cpu(void *opaque)
79 PowerPCCPU *cpu = opaque;
81 qemu_cpu_kick(CPU(cpu));
84 static int kvm_ppc_register_host_cpu_type(void);
86 int kvm_arch_init(KVMState *s)
88 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
89 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
90 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
91 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
92 cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
93 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
94 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
95 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
96 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
98 if (!cap_interrupt_level) {
99 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
100 "VM to stall at times!\n");
103 kvm_ppc_register_host_cpu_type();
105 return 0;
108 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
110 CPUPPCState *cenv = &cpu->env;
111 CPUState *cs = CPU(cpu);
112 struct kvm_sregs sregs;
113 int ret;
115 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
116 /* What we're really trying to say is "if we're on BookE, we use
117 the native PVR for now". This is the only sane way to check
118 it though, so we potentially confuse users that they can run
119 BookE guests on BookS. Let's hope nobody dares enough :) */
120 return 0;
121 } else {
122 if (!cap_segstate) {
123 fprintf(stderr, "kvm error: missing PVR setting capability\n");
124 return -ENOSYS;
128 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
129 if (ret) {
130 return ret;
133 sregs.pvr = cenv->spr[SPR_PVR];
134 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
137 /* Set up a shared TLB array with KVM */
138 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
140 CPUPPCState *env = &cpu->env;
141 CPUState *cs = CPU(cpu);
142 struct kvm_book3e_206_tlb_params params = {};
143 struct kvm_config_tlb cfg = {};
144 struct kvm_enable_cap encap = {};
145 unsigned int entries = 0;
146 int ret, i;
148 if (!kvm_enabled() ||
149 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
150 return 0;
153 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
155 for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
156 params.tlb_sizes[i] = booke206_tlb_size(env, i);
157 params.tlb_ways[i] = booke206_tlb_ways(env, i);
158 entries += params.tlb_sizes[i];
161 assert(entries == env->nb_tlb);
162 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
164 env->tlb_dirty = true;
166 cfg.array = (uintptr_t)env->tlb.tlbm;
167 cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
168 cfg.params = (uintptr_t)&params;
169 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
171 encap.cap = KVM_CAP_SW_TLB;
172 encap.args[0] = (uintptr_t)&cfg;
174 ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &encap);
175 if (ret < 0) {
176 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
177 __func__, strerror(-ret));
178 return ret;
181 env->kvm_sw_tlb = true;
182 return 0;
186 #if defined(TARGET_PPC64)
187 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
188 struct kvm_ppc_smmu_info *info)
190 CPUPPCState *env = &cpu->env;
191 CPUState *cs = CPU(cpu);
193 memset(info, 0, sizeof(*info));
195 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
196 * need to "guess" what the supported page sizes are.
198 * For that to work we make a few assumptions:
200 * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
201 * KVM which only supports 4K and 16M pages, but supports them
202 * regardless of the backing store characteritics. We also don't
203 * support 1T segments.
205 * This is safe as if HV KVM ever supports that capability or PR
206 * KVM grows supports for more page/segment sizes, those versions
207 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
208 * will not hit this fallback
210 * - Else we are running HV KVM. This means we only support page
211 * sizes that fit in the backing store. Additionally we only
212 * advertize 64K pages if the processor is ARCH 2.06 and we assume
213 * P7 encodings for the SLB and hash table. Here too, we assume
214 * support for any newer processor will mean a kernel that
215 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
216 * this fallback.
218 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
219 /* No flags */
220 info->flags = 0;
221 info->slb_size = 64;
223 /* Standard 4k base page size segment */
224 info->sps[0].page_shift = 12;
225 info->sps[0].slb_enc = 0;
226 info->sps[0].enc[0].page_shift = 12;
227 info->sps[0].enc[0].pte_enc = 0;
229 /* Standard 16M large page size segment */
230 info->sps[1].page_shift = 24;
231 info->sps[1].slb_enc = SLB_VSID_L;
232 info->sps[1].enc[0].page_shift = 24;
233 info->sps[1].enc[0].pte_enc = 0;
234 } else {
235 int i = 0;
237 /* HV KVM has backing store size restrictions */
238 info->flags = KVM_PPC_PAGE_SIZES_REAL;
240 if (env->mmu_model & POWERPC_MMU_1TSEG) {
241 info->flags |= KVM_PPC_1T_SEGMENTS;
244 if (env->mmu_model == POWERPC_MMU_2_06) {
245 info->slb_size = 32;
246 } else {
247 info->slb_size = 64;
250 /* Standard 4k base page size segment */
251 info->sps[i].page_shift = 12;
252 info->sps[i].slb_enc = 0;
253 info->sps[i].enc[0].page_shift = 12;
254 info->sps[i].enc[0].pte_enc = 0;
255 i++;
257 /* 64K on MMU 2.06 */
258 if (env->mmu_model == POWERPC_MMU_2_06) {
259 info->sps[i].page_shift = 16;
260 info->sps[i].slb_enc = 0x110;
261 info->sps[i].enc[0].page_shift = 16;
262 info->sps[i].enc[0].pte_enc = 1;
263 i++;
266 /* Standard 16M large page size segment */
267 info->sps[i].page_shift = 24;
268 info->sps[i].slb_enc = SLB_VSID_L;
269 info->sps[i].enc[0].page_shift = 24;
270 info->sps[i].enc[0].pte_enc = 0;
274 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
276 CPUState *cs = CPU(cpu);
277 int ret;
279 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
280 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
281 if (ret == 0) {
282 return;
286 kvm_get_fallback_smmu_info(cpu, info);
289 static long getrampagesize(void)
291 struct statfs fs;
292 int ret;
294 if (!mem_path) {
295 /* guest RAM is backed by normal anonymous pages */
296 return getpagesize();
299 do {
300 ret = statfs(mem_path, &fs);
301 } while (ret != 0 && errno == EINTR);
303 if (ret != 0) {
304 fprintf(stderr, "Couldn't statfs() memory path: %s\n",
305 strerror(errno));
306 exit(1);
309 #define HUGETLBFS_MAGIC 0x958458f6
311 if (fs.f_type != HUGETLBFS_MAGIC) {
312 /* Explicit mempath, but it's ordinary pages */
313 return getpagesize();
316 /* It's hugepage, return the huge page size */
317 return fs.f_bsize;
320 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
322 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
323 return true;
326 return (1ul << shift) <= rampgsize;
329 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
331 static struct kvm_ppc_smmu_info smmu_info;
332 static bool has_smmu_info;
333 CPUPPCState *env = &cpu->env;
334 long rampagesize;
335 int iq, ik, jq, jk;
337 /* We only handle page sizes for 64-bit server guests for now */
338 if (!(env->mmu_model & POWERPC_MMU_64)) {
339 return;
342 /* Collect MMU info from kernel if not already */
343 if (!has_smmu_info) {
344 kvm_get_smmu_info(cpu, &smmu_info);
345 has_smmu_info = true;
348 rampagesize = getrampagesize();
350 /* Convert to QEMU form */
351 memset(&env->sps, 0, sizeof(env->sps));
353 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
354 struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
355 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
357 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
358 ksps->page_shift)) {
359 continue;
361 qsps->page_shift = ksps->page_shift;
362 qsps->slb_enc = ksps->slb_enc;
363 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
364 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
365 ksps->enc[jk].page_shift)) {
366 continue;
368 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
369 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
370 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
371 break;
374 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
375 break;
378 env->slb_nr = smmu_info.slb_size;
379 if (smmu_info.flags & KVM_PPC_1T_SEGMENTS) {
380 env->mmu_model |= POWERPC_MMU_1TSEG;
381 } else {
382 env->mmu_model &= ~POWERPC_MMU_1TSEG;
385 #else /* defined (TARGET_PPC64) */
387 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
391 #endif /* !defined (TARGET_PPC64) */
393 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
395 return cpu->cpu_index;
398 int kvm_arch_init_vcpu(CPUState *cs)
400 PowerPCCPU *cpu = POWERPC_CPU(cs);
401 CPUPPCState *cenv = &cpu->env;
402 int ret;
404 /* Gather server mmu info from KVM and update the CPU state */
405 kvm_fixup_page_sizes(cpu);
407 /* Synchronize sregs with kvm */
408 ret = kvm_arch_sync_sregs(cpu);
409 if (ret) {
410 return ret;
413 idle_timer = qemu_new_timer_ns(vm_clock, kvm_kick_cpu, cpu);
415 /* Some targets support access to KVM's guest TLB. */
416 switch (cenv->mmu_model) {
417 case POWERPC_MMU_BOOKE206:
418 ret = kvm_booke206_tlb_init(cpu);
419 break;
420 default:
421 break;
424 return ret;
427 void kvm_arch_reset_vcpu(CPUState *cpu)
431 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
433 CPUPPCState *env = &cpu->env;
434 CPUState *cs = CPU(cpu);
435 struct kvm_dirty_tlb dirty_tlb;
436 unsigned char *bitmap;
437 int ret;
439 if (!env->kvm_sw_tlb) {
440 return;
443 bitmap = g_malloc((env->nb_tlb + 7) / 8);
444 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
446 dirty_tlb.bitmap = (uintptr_t)bitmap;
447 dirty_tlb.num_dirty = env->nb_tlb;
449 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
450 if (ret) {
451 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
452 __func__, strerror(-ret));
455 g_free(bitmap);
458 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
460 PowerPCCPU *cpu = POWERPC_CPU(cs);
461 CPUPPCState *env = &cpu->env;
462 union {
463 uint32_t u32;
464 uint64_t u64;
465 } val;
466 struct kvm_one_reg reg = {
467 .id = id,
468 .addr = (uintptr_t) &val,
470 int ret;
472 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
473 if (ret != 0) {
474 fprintf(stderr, "Warning: Unable to retrieve SPR %d from KVM: %s\n",
475 spr, strerror(errno));
476 } else {
477 switch (id & KVM_REG_SIZE_MASK) {
478 case KVM_REG_SIZE_U32:
479 env->spr[spr] = val.u32;
480 break;
482 case KVM_REG_SIZE_U64:
483 env->spr[spr] = val.u64;
484 break;
486 default:
487 /* Don't handle this size yet */
488 abort();
493 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
495 PowerPCCPU *cpu = POWERPC_CPU(cs);
496 CPUPPCState *env = &cpu->env;
497 union {
498 uint32_t u32;
499 uint64_t u64;
500 } val;
501 struct kvm_one_reg reg = {
502 .id = id,
503 .addr = (uintptr_t) &val,
505 int ret;
507 switch (id & KVM_REG_SIZE_MASK) {
508 case KVM_REG_SIZE_U32:
509 val.u32 = env->spr[spr];
510 break;
512 case KVM_REG_SIZE_U64:
513 val.u64 = env->spr[spr];
514 break;
516 default:
517 /* Don't handle this size yet */
518 abort();
521 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
522 if (ret != 0) {
523 fprintf(stderr, "Warning: Unable to set SPR %d to KVM: %s\n",
524 spr, strerror(errno));
528 static int kvm_put_fp(CPUState *cs)
530 PowerPCCPU *cpu = POWERPC_CPU(cs);
531 CPUPPCState *env = &cpu->env;
532 struct kvm_one_reg reg;
533 int i;
534 int ret;
536 if (env->insns_flags & PPC_FLOAT) {
537 uint64_t fpscr = env->fpscr;
538 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
540 reg.id = KVM_REG_PPC_FPSCR;
541 reg.addr = (uintptr_t)&fpscr;
542 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
543 if (ret < 0) {
544 dprintf("Unable to set FPSCR to KVM: %s\n", strerror(errno));
545 return ret;
548 for (i = 0; i < 32; i++) {
549 uint64_t vsr[2];
551 vsr[0] = float64_val(env->fpr[i]);
552 vsr[1] = env->vsr[i];
553 reg.addr = (uintptr_t) &vsr;
554 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
556 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
557 if (ret < 0) {
558 dprintf("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
559 i, strerror(errno));
560 return ret;
565 if (env->insns_flags & PPC_ALTIVEC) {
566 reg.id = KVM_REG_PPC_VSCR;
567 reg.addr = (uintptr_t)&env->vscr;
568 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
569 if (ret < 0) {
570 dprintf("Unable to set VSCR to KVM: %s\n", strerror(errno));
571 return ret;
574 for (i = 0; i < 32; i++) {
575 reg.id = KVM_REG_PPC_VR(i);
576 reg.addr = (uintptr_t)&env->avr[i];
577 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
578 if (ret < 0) {
579 dprintf("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
580 return ret;
585 return 0;
588 static int kvm_get_fp(CPUState *cs)
590 PowerPCCPU *cpu = POWERPC_CPU(cs);
591 CPUPPCState *env = &cpu->env;
592 struct kvm_one_reg reg;
593 int i;
594 int ret;
596 if (env->insns_flags & PPC_FLOAT) {
597 uint64_t fpscr;
598 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
600 reg.id = KVM_REG_PPC_FPSCR;
601 reg.addr = (uintptr_t)&fpscr;
602 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
603 if (ret < 0) {
604 dprintf("Unable to get FPSCR from KVM: %s\n", strerror(errno));
605 return ret;
606 } else {
607 env->fpscr = fpscr;
610 for (i = 0; i < 32; i++) {
611 uint64_t vsr[2];
613 reg.addr = (uintptr_t) &vsr;
614 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
616 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
617 if (ret < 0) {
618 dprintf("Unable to get %s%d from KVM: %s\n",
619 vsx ? "VSR" : "FPR", i, strerror(errno));
620 return ret;
621 } else {
622 env->fpr[i] = vsr[0];
623 if (vsx) {
624 env->vsr[i] = vsr[1];
630 if (env->insns_flags & PPC_ALTIVEC) {
631 reg.id = KVM_REG_PPC_VSCR;
632 reg.addr = (uintptr_t)&env->vscr;
633 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
634 if (ret < 0) {
635 dprintf("Unable to get VSCR from KVM: %s\n", strerror(errno));
636 return ret;
639 for (i = 0; i < 32; i++) {
640 reg.id = KVM_REG_PPC_VR(i);
641 reg.addr = (uintptr_t)&env->avr[i];
642 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
643 if (ret < 0) {
644 dprintf("Unable to get VR%d from KVM: %s\n",
645 i, strerror(errno));
646 return ret;
651 return 0;
654 int kvm_arch_put_registers(CPUState *cs, int level)
656 PowerPCCPU *cpu = POWERPC_CPU(cs);
657 CPUPPCState *env = &cpu->env;
658 struct kvm_regs regs;
659 int ret;
660 int i;
662 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
663 if (ret < 0) {
664 return ret;
667 regs.ctr = env->ctr;
668 regs.lr = env->lr;
669 regs.xer = cpu_read_xer(env);
670 regs.msr = env->msr;
671 regs.pc = env->nip;
673 regs.srr0 = env->spr[SPR_SRR0];
674 regs.srr1 = env->spr[SPR_SRR1];
676 regs.sprg0 = env->spr[SPR_SPRG0];
677 regs.sprg1 = env->spr[SPR_SPRG1];
678 regs.sprg2 = env->spr[SPR_SPRG2];
679 regs.sprg3 = env->spr[SPR_SPRG3];
680 regs.sprg4 = env->spr[SPR_SPRG4];
681 regs.sprg5 = env->spr[SPR_SPRG5];
682 regs.sprg6 = env->spr[SPR_SPRG6];
683 regs.sprg7 = env->spr[SPR_SPRG7];
685 regs.pid = env->spr[SPR_BOOKE_PID];
687 for (i = 0;i < 32; i++)
688 regs.gpr[i] = env->gpr[i];
690 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
691 if (ret < 0)
692 return ret;
694 kvm_put_fp(cs);
696 if (env->tlb_dirty) {
697 kvm_sw_tlb_put(cpu);
698 env->tlb_dirty = false;
701 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
702 struct kvm_sregs sregs;
704 sregs.pvr = env->spr[SPR_PVR];
706 sregs.u.s.sdr1 = env->spr[SPR_SDR1];
708 /* Sync SLB */
709 #ifdef TARGET_PPC64
710 for (i = 0; i < 64; i++) {
711 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
712 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
714 #endif
716 /* Sync SRs */
717 for (i = 0; i < 16; i++) {
718 sregs.u.s.ppc32.sr[i] = env->sr[i];
721 /* Sync BATs */
722 for (i = 0; i < 8; i++) {
723 /* Beware. We have to swap upper and lower bits here */
724 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
725 | env->DBAT[1][i];
726 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
727 | env->IBAT[1][i];
730 ret = kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
731 if (ret) {
732 return ret;
736 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
737 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
740 if (cap_one_reg) {
741 int i;
743 /* We deliberately ignore errors here, for kernels which have
744 * the ONE_REG calls, but don't support the specific
745 * registers, there's a reasonable chance things will still
746 * work, at least until we try to migrate. */
747 for (i = 0; i < 1024; i++) {
748 uint64_t id = env->spr_cb[i].one_reg_id;
750 if (id != 0) {
751 kvm_put_one_spr(cs, id, i);
756 return ret;
759 int kvm_arch_get_registers(CPUState *cs)
761 PowerPCCPU *cpu = POWERPC_CPU(cs);
762 CPUPPCState *env = &cpu->env;
763 struct kvm_regs regs;
764 struct kvm_sregs sregs;
765 uint32_t cr;
766 int i, ret;
768 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
769 if (ret < 0)
770 return ret;
772 cr = regs.cr;
773 for (i = 7; i >= 0; i--) {
774 env->crf[i] = cr & 15;
775 cr >>= 4;
778 env->ctr = regs.ctr;
779 env->lr = regs.lr;
780 cpu_write_xer(env, regs.xer);
781 env->msr = regs.msr;
782 env->nip = regs.pc;
784 env->spr[SPR_SRR0] = regs.srr0;
785 env->spr[SPR_SRR1] = regs.srr1;
787 env->spr[SPR_SPRG0] = regs.sprg0;
788 env->spr[SPR_SPRG1] = regs.sprg1;
789 env->spr[SPR_SPRG2] = regs.sprg2;
790 env->spr[SPR_SPRG3] = regs.sprg3;
791 env->spr[SPR_SPRG4] = regs.sprg4;
792 env->spr[SPR_SPRG5] = regs.sprg5;
793 env->spr[SPR_SPRG6] = regs.sprg6;
794 env->spr[SPR_SPRG7] = regs.sprg7;
796 env->spr[SPR_BOOKE_PID] = regs.pid;
798 for (i = 0;i < 32; i++)
799 env->gpr[i] = regs.gpr[i];
801 kvm_get_fp(cs);
803 if (cap_booke_sregs) {
804 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
805 if (ret < 0) {
806 return ret;
809 if (sregs.u.e.features & KVM_SREGS_E_BASE) {
810 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
811 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
812 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
813 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
814 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
815 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
816 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
817 env->spr[SPR_DECR] = sregs.u.e.dec;
818 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
819 env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
820 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
823 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
824 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
825 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
826 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
827 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
828 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
831 if (sregs.u.e.features & KVM_SREGS_E_64) {
832 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
835 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
836 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
839 if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
840 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
841 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
842 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
843 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
844 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
845 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
846 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
847 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
848 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
849 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
850 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
851 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
852 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
853 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
854 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
855 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
857 if (sregs.u.e.features & KVM_SREGS_E_SPE) {
858 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
859 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
860 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
863 if (sregs.u.e.features & KVM_SREGS_E_PM) {
864 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
867 if (sregs.u.e.features & KVM_SREGS_E_PC) {
868 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
869 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
873 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
874 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
875 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
876 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
877 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
878 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
879 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
880 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
881 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
882 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
883 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
886 if (sregs.u.e.features & KVM_SREGS_EXP) {
887 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
890 if (sregs.u.e.features & KVM_SREGS_E_PD) {
891 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
892 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
895 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
896 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
897 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
898 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
900 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
901 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
902 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
907 if (cap_segstate) {
908 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
909 if (ret < 0) {
910 return ret;
913 ppc_store_sdr1(env, sregs.u.s.sdr1);
915 /* Sync SLB */
916 #ifdef TARGET_PPC64
917 for (i = 0; i < 64; i++) {
918 ppc_store_slb(env, sregs.u.s.ppc64.slb[i].slbe,
919 sregs.u.s.ppc64.slb[i].slbv);
921 #endif
923 /* Sync SRs */
924 for (i = 0; i < 16; i++) {
925 env->sr[i] = sregs.u.s.ppc32.sr[i];
928 /* Sync BATs */
929 for (i = 0; i < 8; i++) {
930 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
931 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
932 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
933 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
937 if (cap_hior) {
938 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
941 if (cap_one_reg) {
942 int i;
944 /* We deliberately ignore errors here, for kernels which have
945 * the ONE_REG calls, but don't support the specific
946 * registers, there's a reasonable chance things will still
947 * work, at least until we try to migrate. */
948 for (i = 0; i < 1024; i++) {
949 uint64_t id = env->spr_cb[i].one_reg_id;
951 if (id != 0) {
952 kvm_get_one_spr(cs, id, i);
957 return 0;
960 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
962 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
964 if (irq != PPC_INTERRUPT_EXT) {
965 return 0;
968 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
969 return 0;
972 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
974 return 0;
977 #if defined(TARGET_PPCEMB)
978 #define PPC_INPUT_INT PPC40x_INPUT_INT
979 #elif defined(TARGET_PPC64)
980 #define PPC_INPUT_INT PPC970_INPUT_INT
981 #else
982 #define PPC_INPUT_INT PPC6xx_INPUT_INT
983 #endif
985 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
987 PowerPCCPU *cpu = POWERPC_CPU(cs);
988 CPUPPCState *env = &cpu->env;
989 int r;
990 unsigned irq;
992 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
993 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
994 if (!cap_interrupt_level &&
995 run->ready_for_interrupt_injection &&
996 (env->interrupt_request & CPU_INTERRUPT_HARD) &&
997 (env->irq_input_state & (1<<PPC_INPUT_INT)))
999 /* For now KVM disregards the 'irq' argument. However, in the
1000 * future KVM could cache it in-kernel to avoid a heavyweight exit
1001 * when reading the UIC.
1003 irq = KVM_INTERRUPT_SET;
1005 dprintf("injected interrupt %d\n", irq);
1006 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1007 if (r < 0) {
1008 printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1011 /* Always wake up soon in case the interrupt was level based */
1012 qemu_mod_timer(idle_timer, qemu_get_clock_ns(vm_clock) +
1013 (get_ticks_per_sec() / 50));
1016 /* We don't know if there are more interrupts pending after this. However,
1017 * the guest will return to userspace in the course of handling this one
1018 * anyways, so we will get a chance to deliver the rest. */
1021 void kvm_arch_post_run(CPUState *cpu, struct kvm_run *run)
1025 int kvm_arch_process_async_events(CPUState *cs)
1027 PowerPCCPU *cpu = POWERPC_CPU(cs);
1028 return cpu->env.halted;
1031 static int kvmppc_handle_halt(CPUPPCState *env)
1033 if (!(env->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1034 env->halted = 1;
1035 env->exception_index = EXCP_HLT;
1038 return 0;
1041 /* map dcr access to existing qemu dcr emulation */
1042 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1044 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1045 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1047 return 0;
1050 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1052 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1053 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1055 return 0;
1058 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1060 PowerPCCPU *cpu = POWERPC_CPU(cs);
1061 CPUPPCState *env = &cpu->env;
1062 int ret;
1064 switch (run->exit_reason) {
1065 case KVM_EXIT_DCR:
1066 if (run->dcr.is_write) {
1067 dprintf("handle dcr write\n");
1068 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1069 } else {
1070 dprintf("handle dcr read\n");
1071 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1073 break;
1074 case KVM_EXIT_HLT:
1075 dprintf("handle halt\n");
1076 ret = kvmppc_handle_halt(env);
1077 break;
1078 #ifdef CONFIG_PSERIES
1079 case KVM_EXIT_PAPR_HCALL:
1080 dprintf("handle PAPR hypercall\n");
1081 run->papr_hcall.ret = spapr_hypercall(cpu,
1082 run->papr_hcall.nr,
1083 run->papr_hcall.args);
1084 ret = 0;
1085 break;
1086 #endif
1087 case KVM_EXIT_EPR:
1088 dprintf("handle epr\n");
1089 run->epr.epr = ldl_phys(env->mpic_iack);
1090 ret = 0;
1091 break;
1092 default:
1093 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1094 ret = -1;
1095 break;
1098 return ret;
1101 static int read_cpuinfo(const char *field, char *value, int len)
1103 FILE *f;
1104 int ret = -1;
1105 int field_len = strlen(field);
1106 char line[512];
1108 f = fopen("/proc/cpuinfo", "r");
1109 if (!f) {
1110 return -1;
1113 do {
1114 if(!fgets(line, sizeof(line), f)) {
1115 break;
1117 if (!strncmp(line, field, field_len)) {
1118 pstrcpy(value, len, line);
1119 ret = 0;
1120 break;
1122 } while(*line);
1124 fclose(f);
1126 return ret;
1129 uint32_t kvmppc_get_tbfreq(void)
1131 char line[512];
1132 char *ns;
1133 uint32_t retval = get_ticks_per_sec();
1135 if (read_cpuinfo("timebase", line, sizeof(line))) {
1136 return retval;
1139 if (!(ns = strchr(line, ':'))) {
1140 return retval;
1143 ns++;
1145 retval = atoi(ns);
1146 return retval;
1149 /* Try to find a device tree node for a CPU with clock-frequency property */
1150 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1152 struct dirent *dirp;
1153 DIR *dp;
1155 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1156 printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1157 return -1;
1160 buf[0] = '\0';
1161 while ((dirp = readdir(dp)) != NULL) {
1162 FILE *f;
1163 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1164 dirp->d_name);
1165 f = fopen(buf, "r");
1166 if (f) {
1167 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1168 fclose(f);
1169 break;
1171 buf[0] = '\0';
1173 closedir(dp);
1174 if (buf[0] == '\0') {
1175 printf("Unknown host!\n");
1176 return -1;
1179 return 0;
1182 /* Read a CPU node property from the host device tree that's a single
1183 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1184 * (can't find or open the property, or doesn't understand the
1185 * format) */
1186 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1188 char buf[PATH_MAX];
1189 union {
1190 uint32_t v32;
1191 uint64_t v64;
1192 } u;
1193 FILE *f;
1194 int len;
1196 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1197 return -1;
1200 strncat(buf, "/", sizeof(buf) - strlen(buf));
1201 strncat(buf, propname, sizeof(buf) - strlen(buf));
1203 f = fopen(buf, "rb");
1204 if (!f) {
1205 return -1;
1208 len = fread(&u, 1, sizeof(u), f);
1209 fclose(f);
1210 switch (len) {
1211 case 4:
1212 /* property is a 32-bit quantity */
1213 return be32_to_cpu(u.v32);
1214 case 8:
1215 return be64_to_cpu(u.v64);
1218 return 0;
1221 uint64_t kvmppc_get_clockfreq(void)
1223 return kvmppc_read_int_cpu_dt("clock-frequency");
1226 uint32_t kvmppc_get_vmx(void)
1228 return kvmppc_read_int_cpu_dt("ibm,vmx");
1231 uint32_t kvmppc_get_dfp(void)
1233 return kvmppc_read_int_cpu_dt("ibm,dfp");
1236 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1238 PowerPCCPU *cpu = ppc_env_get_cpu(env);
1239 CPUState *cs = CPU(cpu);
1241 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1242 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1243 return 0;
1246 return 1;
1249 int kvmppc_get_hasidle(CPUPPCState *env)
1251 struct kvm_ppc_pvinfo pvinfo;
1253 if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1254 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1255 return 1;
1258 return 0;
1261 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1263 uint32_t *hc = (uint32_t*)buf;
1264 struct kvm_ppc_pvinfo pvinfo;
1266 if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1267 memcpy(buf, pvinfo.hcall, buf_len);
1268 return 0;
1272 * Fallback to always fail hypercalls:
1274 * li r3, -1
1275 * nop
1276 * nop
1277 * nop
1280 hc[0] = 0x3860ffff;
1281 hc[1] = 0x60000000;
1282 hc[2] = 0x60000000;
1283 hc[3] = 0x60000000;
1285 return 0;
1288 void kvmppc_set_papr(PowerPCCPU *cpu)
1290 CPUPPCState *env = &cpu->env;
1291 CPUState *cs = CPU(cpu);
1292 struct kvm_enable_cap cap = {};
1293 int ret;
1295 cap.cap = KVM_CAP_PPC_PAPR;
1296 ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &cap);
1298 if (ret) {
1299 cpu_abort(env, "This KVM version does not support PAPR\n");
1303 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
1305 CPUPPCState *env = &cpu->env;
1306 CPUState *cs = CPU(cpu);
1307 struct kvm_enable_cap cap = {};
1308 int ret;
1310 cap.cap = KVM_CAP_PPC_EPR;
1311 cap.args[0] = mpic_proxy;
1312 ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &cap);
1314 if (ret && mpic_proxy) {
1315 cpu_abort(env, "This KVM version does not support EPR\n");
1319 int kvmppc_smt_threads(void)
1321 return cap_ppc_smt ? cap_ppc_smt : 1;
1324 #ifdef TARGET_PPC64
1325 off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem)
1327 void *rma;
1328 off_t size;
1329 int fd;
1330 struct kvm_allocate_rma ret;
1331 MemoryRegion *rma_region;
1333 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
1334 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
1335 * not necessary on this hardware
1336 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
1338 * FIXME: We should allow the user to force contiguous RMA
1339 * allocation in the cap_ppc_rma==1 case.
1341 if (cap_ppc_rma < 2) {
1342 return 0;
1345 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
1346 if (fd < 0) {
1347 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
1348 strerror(errno));
1349 return -1;
1352 size = MIN(ret.rma_size, 256ul << 20);
1354 rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1355 if (rma == MAP_FAILED) {
1356 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
1357 return -1;
1360 rma_region = g_new(MemoryRegion, 1);
1361 memory_region_init_ram_ptr(rma_region, name, size, rma);
1362 vmstate_register_ram_global(rma_region);
1363 memory_region_add_subregion(sysmem, 0, rma_region);
1365 return size;
1368 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
1370 if (cap_ppc_rma >= 2) {
1371 return current_size;
1373 return MIN(current_size,
1374 getrampagesize() << (hash_shift - 7));
1376 #endif
1378 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd)
1380 struct kvm_create_spapr_tce args = {
1381 .liobn = liobn,
1382 .window_size = window_size,
1384 long len;
1385 int fd;
1386 void *table;
1388 /* Must set fd to -1 so we don't try to munmap when called for
1389 * destroying the table, which the upper layers -will- do
1391 *pfd = -1;
1392 if (!cap_spapr_tce) {
1393 return NULL;
1396 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
1397 if (fd < 0) {
1398 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
1399 liobn);
1400 return NULL;
1403 len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(sPAPRTCE);
1404 /* FIXME: round this up to page size */
1406 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1407 if (table == MAP_FAILED) {
1408 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
1409 liobn);
1410 close(fd);
1411 return NULL;
1414 *pfd = fd;
1415 return table;
1418 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t window_size)
1420 long len;
1422 if (fd < 0) {
1423 return -1;
1426 len = (window_size / SPAPR_TCE_PAGE_SIZE)*sizeof(sPAPRTCE);
1427 if ((munmap(table, len) < 0) ||
1428 (close(fd) < 0)) {
1429 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
1430 strerror(errno));
1431 /* Leak the table */
1434 return 0;
1437 int kvmppc_reset_htab(int shift_hint)
1439 uint32_t shift = shift_hint;
1441 if (!kvm_enabled()) {
1442 /* Full emulation, tell caller to allocate htab itself */
1443 return 0;
1445 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
1446 int ret;
1447 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
1448 if (ret == -ENOTTY) {
1449 /* At least some versions of PR KVM advertise the
1450 * capability, but don't implement the ioctl(). Oops.
1451 * Return 0 so that we allocate the htab in qemu, as is
1452 * correct for PR. */
1453 return 0;
1454 } else if (ret < 0) {
1455 return ret;
1457 return shift;
1460 /* We have a kernel that predates the htab reset calls. For PR
1461 * KVM, we need to allocate the htab ourselves, for an HV KVM of
1462 * this era, it has allocated a 16MB fixed size hash table
1463 * already. Kernels of this era have the GET_PVINFO capability
1464 * only on PR, so we use this hack to determine the right
1465 * answer */
1466 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
1467 /* PR - tell caller to allocate htab */
1468 return 0;
1469 } else {
1470 /* HV - assume 16MB kernel allocated htab */
1471 return 24;
1475 static inline uint32_t mfpvr(void)
1477 uint32_t pvr;
1479 asm ("mfpvr %0"
1480 : "=r"(pvr));
1481 return pvr;
1484 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
1486 if (on) {
1487 *word |= flags;
1488 } else {
1489 *word &= ~flags;
1493 static void kvmppc_host_cpu_initfn(Object *obj)
1495 assert(kvm_enabled());
1498 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
1500 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
1501 uint32_t vmx = kvmppc_get_vmx();
1502 uint32_t dfp = kvmppc_get_dfp();
1504 /* Now fix up the class with information we can query from the host */
1506 if (vmx != -1) {
1507 /* Only override when we know what the host supports */
1508 alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
1509 alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
1511 if (dfp != -1) {
1512 /* Only override when we know what the host supports */
1513 alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
1517 int kvmppc_fixup_cpu(PowerPCCPU *cpu)
1519 CPUState *cs = CPU(cpu);
1520 int smt;
1522 /* Adjust cpu index for SMT */
1523 smt = kvmppc_smt_threads();
1524 cs->cpu_index = (cs->cpu_index / smp_threads) * smt
1525 + (cs->cpu_index % smp_threads);
1527 return 0;
1530 static int kvm_ppc_register_host_cpu_type(void)
1532 TypeInfo type_info = {
1533 .name = TYPE_HOST_POWERPC_CPU,
1534 .instance_init = kvmppc_host_cpu_initfn,
1535 .class_init = kvmppc_host_cpu_class_init,
1537 uint32_t host_pvr = mfpvr();
1538 PowerPCCPUClass *pvr_pcc;
1540 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
1541 if (pvr_pcc == NULL) {
1542 return -1;
1544 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
1545 type_register(&type_info);
1546 return 0;
1550 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
1552 return true;
1555 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
1557 return 1;
1560 int kvm_arch_on_sigbus(int code, void *addr)
1562 return 1;