vhost-scsi-s390: new device supporting the tcm_vhost Linux kernel module
[qemu/ar7.git] / target-ppc / kvm.c
blob5e9dddbb5446a2fdbb2a1dd4c729e14fd34d4241
1 /*
2 * PowerPC implementation of KVM hooks
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
7 * Authors:
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
17 #include <dirent.h>
18 #include <sys/types.h>
19 #include <sys/ioctl.h>
20 #include <sys/mman.h>
21 #include <sys/vfs.h>
23 #include <linux/kvm.h>
25 #include "qemu-common.h"
26 #include "qemu/timer.h"
27 #include "sysemu/sysemu.h"
28 #include "sysemu/kvm.h"
29 #include "kvm_ppc.h"
30 #include "cpu.h"
31 #include "sysemu/cpus.h"
32 #include "sysemu/device_tree.h"
33 #include "hw/sysbus.h"
34 #include "hw/ppc/spapr.h"
35 #include "mmu-hash64.h"
37 #include "hw/sysbus.h"
38 #include "hw/ppc/spapr.h"
39 #include "hw/ppc/spapr_vio.h"
41 //#define DEBUG_KVM
43 #ifdef DEBUG_KVM
44 #define dprintf(fmt, ...) \
45 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
46 #else
47 #define dprintf(fmt, ...) \
48 do { } while (0)
49 #endif
51 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
53 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
54 KVM_CAP_LAST_INFO
57 static int cap_interrupt_unset = false;
58 static int cap_interrupt_level = false;
59 static int cap_segstate;
60 static int cap_booke_sregs;
61 static int cap_ppc_smt;
62 static int cap_ppc_rma;
63 static int cap_spapr_tce;
64 static int cap_hior;
65 static int cap_one_reg;
67 /* XXX We have a race condition where we actually have a level triggered
68 * interrupt, but the infrastructure can't expose that yet, so the guest
69 * takes but ignores it, goes to sleep and never gets notified that there's
70 * still an interrupt pending.
72 * As a quick workaround, let's just wake up again 20 ms after we injected
73 * an interrupt. That way we can assure that we're always reinjecting
74 * interrupts in case the guest swallowed them.
76 static QEMUTimer *idle_timer;
78 static void kvm_kick_cpu(void *opaque)
80 PowerPCCPU *cpu = opaque;
82 qemu_cpu_kick(CPU(cpu));
85 static int kvm_ppc_register_host_cpu_type(void);
87 int kvm_arch_init(KVMState *s)
89 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
90 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
91 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
92 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
93 cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
94 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
95 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
96 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
97 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
99 if (!cap_interrupt_level) {
100 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
101 "VM to stall at times!\n");
104 kvm_ppc_register_host_cpu_type();
106 return 0;
109 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
111 CPUPPCState *cenv = &cpu->env;
112 CPUState *cs = CPU(cpu);
113 struct kvm_sregs sregs;
114 int ret;
116 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
117 /* What we're really trying to say is "if we're on BookE, we use
118 the native PVR for now". This is the only sane way to check
119 it though, so we potentially confuse users that they can run
120 BookE guests on BookS. Let's hope nobody dares enough :) */
121 return 0;
122 } else {
123 if (!cap_segstate) {
124 fprintf(stderr, "kvm error: missing PVR setting capability\n");
125 return -ENOSYS;
129 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
130 if (ret) {
131 return ret;
134 sregs.pvr = cenv->spr[SPR_PVR];
135 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
138 /* Set up a shared TLB array with KVM */
139 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
141 CPUPPCState *env = &cpu->env;
142 CPUState *cs = CPU(cpu);
143 struct kvm_book3e_206_tlb_params params = {};
144 struct kvm_config_tlb cfg = {};
145 struct kvm_enable_cap encap = {};
146 unsigned int entries = 0;
147 int ret, i;
149 if (!kvm_enabled() ||
150 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
151 return 0;
154 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
156 for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
157 params.tlb_sizes[i] = booke206_tlb_size(env, i);
158 params.tlb_ways[i] = booke206_tlb_ways(env, i);
159 entries += params.tlb_sizes[i];
162 assert(entries == env->nb_tlb);
163 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
165 env->tlb_dirty = true;
167 cfg.array = (uintptr_t)env->tlb.tlbm;
168 cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
169 cfg.params = (uintptr_t)&params;
170 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
172 encap.cap = KVM_CAP_SW_TLB;
173 encap.args[0] = (uintptr_t)&cfg;
175 ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &encap);
176 if (ret < 0) {
177 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
178 __func__, strerror(-ret));
179 return ret;
182 env->kvm_sw_tlb = true;
183 return 0;
187 #if defined(TARGET_PPC64)
188 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
189 struct kvm_ppc_smmu_info *info)
191 CPUPPCState *env = &cpu->env;
192 CPUState *cs = CPU(cpu);
194 memset(info, 0, sizeof(*info));
196 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
197 * need to "guess" what the supported page sizes are.
199 * For that to work we make a few assumptions:
201 * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
202 * KVM which only supports 4K and 16M pages, but supports them
203 * regardless of the backing store characteritics. We also don't
204 * support 1T segments.
206 * This is safe as if HV KVM ever supports that capability or PR
207 * KVM grows supports for more page/segment sizes, those versions
208 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
209 * will not hit this fallback
211 * - Else we are running HV KVM. This means we only support page
212 * sizes that fit in the backing store. Additionally we only
213 * advertize 64K pages if the processor is ARCH 2.06 and we assume
214 * P7 encodings for the SLB and hash table. Here too, we assume
215 * support for any newer processor will mean a kernel that
216 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
217 * this fallback.
219 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
220 /* No flags */
221 info->flags = 0;
222 info->slb_size = 64;
224 /* Standard 4k base page size segment */
225 info->sps[0].page_shift = 12;
226 info->sps[0].slb_enc = 0;
227 info->sps[0].enc[0].page_shift = 12;
228 info->sps[0].enc[0].pte_enc = 0;
230 /* Standard 16M large page size segment */
231 info->sps[1].page_shift = 24;
232 info->sps[1].slb_enc = SLB_VSID_L;
233 info->sps[1].enc[0].page_shift = 24;
234 info->sps[1].enc[0].pte_enc = 0;
235 } else {
236 int i = 0;
238 /* HV KVM has backing store size restrictions */
239 info->flags = KVM_PPC_PAGE_SIZES_REAL;
241 if (env->mmu_model & POWERPC_MMU_1TSEG) {
242 info->flags |= KVM_PPC_1T_SEGMENTS;
245 if (env->mmu_model == POWERPC_MMU_2_06) {
246 info->slb_size = 32;
247 } else {
248 info->slb_size = 64;
251 /* Standard 4k base page size segment */
252 info->sps[i].page_shift = 12;
253 info->sps[i].slb_enc = 0;
254 info->sps[i].enc[0].page_shift = 12;
255 info->sps[i].enc[0].pte_enc = 0;
256 i++;
258 /* 64K on MMU 2.06 */
259 if (env->mmu_model == POWERPC_MMU_2_06) {
260 info->sps[i].page_shift = 16;
261 info->sps[i].slb_enc = 0x110;
262 info->sps[i].enc[0].page_shift = 16;
263 info->sps[i].enc[0].pte_enc = 1;
264 i++;
267 /* Standard 16M large page size segment */
268 info->sps[i].page_shift = 24;
269 info->sps[i].slb_enc = SLB_VSID_L;
270 info->sps[i].enc[0].page_shift = 24;
271 info->sps[i].enc[0].pte_enc = 0;
275 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
277 CPUState *cs = CPU(cpu);
278 int ret;
280 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
281 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
282 if (ret == 0) {
283 return;
287 kvm_get_fallback_smmu_info(cpu, info);
290 static long getrampagesize(void)
292 struct statfs fs;
293 int ret;
295 if (!mem_path) {
296 /* guest RAM is backed by normal anonymous pages */
297 return getpagesize();
300 do {
301 ret = statfs(mem_path, &fs);
302 } while (ret != 0 && errno == EINTR);
304 if (ret != 0) {
305 fprintf(stderr, "Couldn't statfs() memory path: %s\n",
306 strerror(errno));
307 exit(1);
310 #define HUGETLBFS_MAGIC 0x958458f6
312 if (fs.f_type != HUGETLBFS_MAGIC) {
313 /* Explicit mempath, but it's ordinary pages */
314 return getpagesize();
317 /* It's hugepage, return the huge page size */
318 return fs.f_bsize;
321 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
323 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
324 return true;
327 return (1ul << shift) <= rampgsize;
330 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
332 static struct kvm_ppc_smmu_info smmu_info;
333 static bool has_smmu_info;
334 CPUPPCState *env = &cpu->env;
335 long rampagesize;
336 int iq, ik, jq, jk;
338 /* We only handle page sizes for 64-bit server guests for now */
339 if (!(env->mmu_model & POWERPC_MMU_64)) {
340 return;
343 /* Collect MMU info from kernel if not already */
344 if (!has_smmu_info) {
345 kvm_get_smmu_info(cpu, &smmu_info);
346 has_smmu_info = true;
349 rampagesize = getrampagesize();
351 /* Convert to QEMU form */
352 memset(&env->sps, 0, sizeof(env->sps));
354 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
355 struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
356 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
358 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
359 ksps->page_shift)) {
360 continue;
362 qsps->page_shift = ksps->page_shift;
363 qsps->slb_enc = ksps->slb_enc;
364 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
365 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
366 ksps->enc[jk].page_shift)) {
367 continue;
369 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
370 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
371 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
372 break;
375 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
376 break;
379 env->slb_nr = smmu_info.slb_size;
380 if (smmu_info.flags & KVM_PPC_1T_SEGMENTS) {
381 env->mmu_model |= POWERPC_MMU_1TSEG;
382 } else {
383 env->mmu_model &= ~POWERPC_MMU_1TSEG;
386 #else /* defined (TARGET_PPC64) */
388 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
392 #endif /* !defined (TARGET_PPC64) */
394 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
396 return cpu->cpu_index;
399 int kvm_arch_init_vcpu(CPUState *cs)
401 PowerPCCPU *cpu = POWERPC_CPU(cs);
402 CPUPPCState *cenv = &cpu->env;
403 int ret;
405 /* Gather server mmu info from KVM and update the CPU state */
406 kvm_fixup_page_sizes(cpu);
408 /* Synchronize sregs with kvm */
409 ret = kvm_arch_sync_sregs(cpu);
410 if (ret) {
411 return ret;
414 idle_timer = qemu_new_timer_ns(vm_clock, kvm_kick_cpu, cpu);
416 /* Some targets support access to KVM's guest TLB. */
417 switch (cenv->mmu_model) {
418 case POWERPC_MMU_BOOKE206:
419 ret = kvm_booke206_tlb_init(cpu);
420 break;
421 default:
422 break;
425 return ret;
428 void kvm_arch_reset_vcpu(CPUState *cpu)
432 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
434 CPUPPCState *env = &cpu->env;
435 CPUState *cs = CPU(cpu);
436 struct kvm_dirty_tlb dirty_tlb;
437 unsigned char *bitmap;
438 int ret;
440 if (!env->kvm_sw_tlb) {
441 return;
444 bitmap = g_malloc((env->nb_tlb + 7) / 8);
445 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
447 dirty_tlb.bitmap = (uintptr_t)bitmap;
448 dirty_tlb.num_dirty = env->nb_tlb;
450 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
451 if (ret) {
452 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
453 __func__, strerror(-ret));
456 g_free(bitmap);
459 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
461 PowerPCCPU *cpu = POWERPC_CPU(cs);
462 CPUPPCState *env = &cpu->env;
463 union {
464 uint32_t u32;
465 uint64_t u64;
466 } val;
467 struct kvm_one_reg reg = {
468 .id = id,
469 .addr = (uintptr_t) &val,
471 int ret;
473 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
474 if (ret != 0) {
475 fprintf(stderr, "Warning: Unable to retrieve SPR %d from KVM: %s\n",
476 spr, strerror(errno));
477 } else {
478 switch (id & KVM_REG_SIZE_MASK) {
479 case KVM_REG_SIZE_U32:
480 env->spr[spr] = val.u32;
481 break;
483 case KVM_REG_SIZE_U64:
484 env->spr[spr] = val.u64;
485 break;
487 default:
488 /* Don't handle this size yet */
489 abort();
494 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
496 PowerPCCPU *cpu = POWERPC_CPU(cs);
497 CPUPPCState *env = &cpu->env;
498 union {
499 uint32_t u32;
500 uint64_t u64;
501 } val;
502 struct kvm_one_reg reg = {
503 .id = id,
504 .addr = (uintptr_t) &val,
506 int ret;
508 switch (id & KVM_REG_SIZE_MASK) {
509 case KVM_REG_SIZE_U32:
510 val.u32 = env->spr[spr];
511 break;
513 case KVM_REG_SIZE_U64:
514 val.u64 = env->spr[spr];
515 break;
517 default:
518 /* Don't handle this size yet */
519 abort();
522 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
523 if (ret != 0) {
524 fprintf(stderr, "Warning: Unable to set SPR %d to KVM: %s\n",
525 spr, strerror(errno));
529 static int kvm_put_fp(CPUState *cs)
531 PowerPCCPU *cpu = POWERPC_CPU(cs);
532 CPUPPCState *env = &cpu->env;
533 struct kvm_one_reg reg;
534 int i;
535 int ret;
537 if (env->insns_flags & PPC_FLOAT) {
538 uint64_t fpscr = env->fpscr;
539 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
541 reg.id = KVM_REG_PPC_FPSCR;
542 reg.addr = (uintptr_t)&fpscr;
543 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
544 if (ret < 0) {
545 dprintf("Unable to set FPSCR to KVM: %s\n", strerror(errno));
546 return ret;
549 for (i = 0; i < 32; i++) {
550 uint64_t vsr[2];
552 vsr[0] = float64_val(env->fpr[i]);
553 vsr[1] = env->vsr[i];
554 reg.addr = (uintptr_t) &vsr;
555 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
557 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
558 if (ret < 0) {
559 dprintf("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
560 i, strerror(errno));
561 return ret;
566 if (env->insns_flags & PPC_ALTIVEC) {
567 reg.id = KVM_REG_PPC_VSCR;
568 reg.addr = (uintptr_t)&env->vscr;
569 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
570 if (ret < 0) {
571 dprintf("Unable to set VSCR to KVM: %s\n", strerror(errno));
572 return ret;
575 for (i = 0; i < 32; i++) {
576 reg.id = KVM_REG_PPC_VR(i);
577 reg.addr = (uintptr_t)&env->avr[i];
578 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
579 if (ret < 0) {
580 dprintf("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
581 return ret;
586 return 0;
589 static int kvm_get_fp(CPUState *cs)
591 PowerPCCPU *cpu = POWERPC_CPU(cs);
592 CPUPPCState *env = &cpu->env;
593 struct kvm_one_reg reg;
594 int i;
595 int ret;
597 if (env->insns_flags & PPC_FLOAT) {
598 uint64_t fpscr;
599 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
601 reg.id = KVM_REG_PPC_FPSCR;
602 reg.addr = (uintptr_t)&fpscr;
603 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
604 if (ret < 0) {
605 dprintf("Unable to get FPSCR from KVM: %s\n", strerror(errno));
606 return ret;
607 } else {
608 env->fpscr = fpscr;
611 for (i = 0; i < 32; i++) {
612 uint64_t vsr[2];
614 reg.addr = (uintptr_t) &vsr;
615 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
617 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
618 if (ret < 0) {
619 dprintf("Unable to get %s%d from KVM: %s\n",
620 vsx ? "VSR" : "FPR", i, strerror(errno));
621 return ret;
622 } else {
623 env->fpr[i] = vsr[0];
624 if (vsx) {
625 env->vsr[i] = vsr[1];
631 if (env->insns_flags & PPC_ALTIVEC) {
632 reg.id = KVM_REG_PPC_VSCR;
633 reg.addr = (uintptr_t)&env->vscr;
634 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
635 if (ret < 0) {
636 dprintf("Unable to get VSCR from KVM: %s\n", strerror(errno));
637 return ret;
640 for (i = 0; i < 32; i++) {
641 reg.id = KVM_REG_PPC_VR(i);
642 reg.addr = (uintptr_t)&env->avr[i];
643 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
644 if (ret < 0) {
645 dprintf("Unable to get VR%d from KVM: %s\n",
646 i, strerror(errno));
647 return ret;
652 return 0;
655 int kvm_arch_put_registers(CPUState *cs, int level)
657 PowerPCCPU *cpu = POWERPC_CPU(cs);
658 CPUPPCState *env = &cpu->env;
659 struct kvm_regs regs;
660 int ret;
661 int i;
663 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
664 if (ret < 0) {
665 return ret;
668 regs.ctr = env->ctr;
669 regs.lr = env->lr;
670 regs.xer = cpu_read_xer(env);
671 regs.msr = env->msr;
672 regs.pc = env->nip;
674 regs.srr0 = env->spr[SPR_SRR0];
675 regs.srr1 = env->spr[SPR_SRR1];
677 regs.sprg0 = env->spr[SPR_SPRG0];
678 regs.sprg1 = env->spr[SPR_SPRG1];
679 regs.sprg2 = env->spr[SPR_SPRG2];
680 regs.sprg3 = env->spr[SPR_SPRG3];
681 regs.sprg4 = env->spr[SPR_SPRG4];
682 regs.sprg5 = env->spr[SPR_SPRG5];
683 regs.sprg6 = env->spr[SPR_SPRG6];
684 regs.sprg7 = env->spr[SPR_SPRG7];
686 regs.pid = env->spr[SPR_BOOKE_PID];
688 for (i = 0;i < 32; i++)
689 regs.gpr[i] = env->gpr[i];
691 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
692 if (ret < 0)
693 return ret;
695 kvm_put_fp(cs);
697 if (env->tlb_dirty) {
698 kvm_sw_tlb_put(cpu);
699 env->tlb_dirty = false;
702 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
703 struct kvm_sregs sregs;
705 sregs.pvr = env->spr[SPR_PVR];
707 sregs.u.s.sdr1 = env->spr[SPR_SDR1];
709 /* Sync SLB */
710 #ifdef TARGET_PPC64
711 for (i = 0; i < 64; i++) {
712 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
713 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
715 #endif
717 /* Sync SRs */
718 for (i = 0; i < 16; i++) {
719 sregs.u.s.ppc32.sr[i] = env->sr[i];
722 /* Sync BATs */
723 for (i = 0; i < 8; i++) {
724 /* Beware. We have to swap upper and lower bits here */
725 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
726 | env->DBAT[1][i];
727 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
728 | env->IBAT[1][i];
731 ret = kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
732 if (ret) {
733 return ret;
737 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
738 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
741 if (cap_one_reg) {
742 int i;
744 /* We deliberately ignore errors here, for kernels which have
745 * the ONE_REG calls, but don't support the specific
746 * registers, there's a reasonable chance things will still
747 * work, at least until we try to migrate. */
748 for (i = 0; i < 1024; i++) {
749 uint64_t id = env->spr_cb[i].one_reg_id;
751 if (id != 0) {
752 kvm_put_one_spr(cs, id, i);
757 return ret;
760 int kvm_arch_get_registers(CPUState *cs)
762 PowerPCCPU *cpu = POWERPC_CPU(cs);
763 CPUPPCState *env = &cpu->env;
764 struct kvm_regs regs;
765 struct kvm_sregs sregs;
766 uint32_t cr;
767 int i, ret;
769 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
770 if (ret < 0)
771 return ret;
773 cr = regs.cr;
774 for (i = 7; i >= 0; i--) {
775 env->crf[i] = cr & 15;
776 cr >>= 4;
779 env->ctr = regs.ctr;
780 env->lr = regs.lr;
781 cpu_write_xer(env, regs.xer);
782 env->msr = regs.msr;
783 env->nip = regs.pc;
785 env->spr[SPR_SRR0] = regs.srr0;
786 env->spr[SPR_SRR1] = regs.srr1;
788 env->spr[SPR_SPRG0] = regs.sprg0;
789 env->spr[SPR_SPRG1] = regs.sprg1;
790 env->spr[SPR_SPRG2] = regs.sprg2;
791 env->spr[SPR_SPRG3] = regs.sprg3;
792 env->spr[SPR_SPRG4] = regs.sprg4;
793 env->spr[SPR_SPRG5] = regs.sprg5;
794 env->spr[SPR_SPRG6] = regs.sprg6;
795 env->spr[SPR_SPRG7] = regs.sprg7;
797 env->spr[SPR_BOOKE_PID] = regs.pid;
799 for (i = 0;i < 32; i++)
800 env->gpr[i] = regs.gpr[i];
802 kvm_get_fp(cs);
804 if (cap_booke_sregs) {
805 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
806 if (ret < 0) {
807 return ret;
810 if (sregs.u.e.features & KVM_SREGS_E_BASE) {
811 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
812 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
813 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
814 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
815 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
816 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
817 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
818 env->spr[SPR_DECR] = sregs.u.e.dec;
819 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
820 env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
821 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
824 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
825 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
826 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
827 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
828 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
829 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
832 if (sregs.u.e.features & KVM_SREGS_E_64) {
833 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
836 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
837 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
840 if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
841 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
842 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
843 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
844 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
845 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
846 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
847 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
848 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
849 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
850 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
851 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
852 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
853 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
854 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
855 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
856 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
858 if (sregs.u.e.features & KVM_SREGS_E_SPE) {
859 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
860 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
861 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
864 if (sregs.u.e.features & KVM_SREGS_E_PM) {
865 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
868 if (sregs.u.e.features & KVM_SREGS_E_PC) {
869 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
870 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
874 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
875 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
876 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
877 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
878 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
879 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
880 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
881 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
882 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
883 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
884 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
887 if (sregs.u.e.features & KVM_SREGS_EXP) {
888 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
891 if (sregs.u.e.features & KVM_SREGS_E_PD) {
892 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
893 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
896 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
897 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
898 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
899 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
901 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
902 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
903 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
908 if (cap_segstate) {
909 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
910 if (ret < 0) {
911 return ret;
914 ppc_store_sdr1(env, sregs.u.s.sdr1);
916 /* Sync SLB */
917 #ifdef TARGET_PPC64
918 for (i = 0; i < 64; i++) {
919 ppc_store_slb(env, sregs.u.s.ppc64.slb[i].slbe,
920 sregs.u.s.ppc64.slb[i].slbv);
922 #endif
924 /* Sync SRs */
925 for (i = 0; i < 16; i++) {
926 env->sr[i] = sregs.u.s.ppc32.sr[i];
929 /* Sync BATs */
930 for (i = 0; i < 8; i++) {
931 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
932 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
933 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
934 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
938 if (cap_hior) {
939 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
942 if (cap_one_reg) {
943 int i;
945 /* We deliberately ignore errors here, for kernels which have
946 * the ONE_REG calls, but don't support the specific
947 * registers, there's a reasonable chance things will still
948 * work, at least until we try to migrate. */
949 for (i = 0; i < 1024; i++) {
950 uint64_t id = env->spr_cb[i].one_reg_id;
952 if (id != 0) {
953 kvm_get_one_spr(cs, id, i);
958 return 0;
961 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
963 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
965 if (irq != PPC_INTERRUPT_EXT) {
966 return 0;
969 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
970 return 0;
973 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
975 return 0;
978 #if defined(TARGET_PPCEMB)
979 #define PPC_INPUT_INT PPC40x_INPUT_INT
980 #elif defined(TARGET_PPC64)
981 #define PPC_INPUT_INT PPC970_INPUT_INT
982 #else
983 #define PPC_INPUT_INT PPC6xx_INPUT_INT
984 #endif
986 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
988 PowerPCCPU *cpu = POWERPC_CPU(cs);
989 CPUPPCState *env = &cpu->env;
990 int r;
991 unsigned irq;
993 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
994 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
995 if (!cap_interrupt_level &&
996 run->ready_for_interrupt_injection &&
997 (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
998 (env->irq_input_state & (1<<PPC_INPUT_INT)))
1000 /* For now KVM disregards the 'irq' argument. However, in the
1001 * future KVM could cache it in-kernel to avoid a heavyweight exit
1002 * when reading the UIC.
1004 irq = KVM_INTERRUPT_SET;
1006 dprintf("injected interrupt %d\n", irq);
1007 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1008 if (r < 0) {
1009 printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1012 /* Always wake up soon in case the interrupt was level based */
1013 qemu_mod_timer(idle_timer, qemu_get_clock_ns(vm_clock) +
1014 (get_ticks_per_sec() / 50));
1017 /* We don't know if there are more interrupts pending after this. However,
1018 * the guest will return to userspace in the course of handling this one
1019 * anyways, so we will get a chance to deliver the rest. */
1022 void kvm_arch_post_run(CPUState *cpu, struct kvm_run *run)
1026 int kvm_arch_process_async_events(CPUState *cs)
1028 return cs->halted;
1031 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1033 CPUState *cs = CPU(cpu);
1034 CPUPPCState *env = &cpu->env;
1036 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1037 cs->halted = 1;
1038 env->exception_index = EXCP_HLT;
1041 return 0;
1044 /* map dcr access to existing qemu dcr emulation */
1045 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1047 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1048 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1050 return 0;
1053 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1055 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1056 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1058 return 0;
1061 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1063 PowerPCCPU *cpu = POWERPC_CPU(cs);
1064 CPUPPCState *env = &cpu->env;
1065 int ret;
1067 switch (run->exit_reason) {
1068 case KVM_EXIT_DCR:
1069 if (run->dcr.is_write) {
1070 dprintf("handle dcr write\n");
1071 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1072 } else {
1073 dprintf("handle dcr read\n");
1074 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1076 break;
1077 case KVM_EXIT_HLT:
1078 dprintf("handle halt\n");
1079 ret = kvmppc_handle_halt(cpu);
1080 break;
1081 #if defined(TARGET_PPC64)
1082 case KVM_EXIT_PAPR_HCALL:
1083 dprintf("handle PAPR hypercall\n");
1084 run->papr_hcall.ret = spapr_hypercall(cpu,
1085 run->papr_hcall.nr,
1086 run->papr_hcall.args);
1087 ret = 0;
1088 break;
1089 #endif
1090 case KVM_EXIT_EPR:
1091 dprintf("handle epr\n");
1092 run->epr.epr = ldl_phys(env->mpic_iack);
1093 ret = 0;
1094 break;
1095 default:
1096 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1097 ret = -1;
1098 break;
1101 return ret;
1104 static int read_cpuinfo(const char *field, char *value, int len)
1106 FILE *f;
1107 int ret = -1;
1108 int field_len = strlen(field);
1109 char line[512];
1111 f = fopen("/proc/cpuinfo", "r");
1112 if (!f) {
1113 return -1;
1116 do {
1117 if(!fgets(line, sizeof(line), f)) {
1118 break;
1120 if (!strncmp(line, field, field_len)) {
1121 pstrcpy(value, len, line);
1122 ret = 0;
1123 break;
1125 } while(*line);
1127 fclose(f);
1129 return ret;
1132 uint32_t kvmppc_get_tbfreq(void)
1134 char line[512];
1135 char *ns;
1136 uint32_t retval = get_ticks_per_sec();
1138 if (read_cpuinfo("timebase", line, sizeof(line))) {
1139 return retval;
1142 if (!(ns = strchr(line, ':'))) {
1143 return retval;
1146 ns++;
1148 retval = atoi(ns);
1149 return retval;
1152 /* Try to find a device tree node for a CPU with clock-frequency property */
1153 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1155 struct dirent *dirp;
1156 DIR *dp;
1158 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1159 printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1160 return -1;
1163 buf[0] = '\0';
1164 while ((dirp = readdir(dp)) != NULL) {
1165 FILE *f;
1166 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1167 dirp->d_name);
1168 f = fopen(buf, "r");
1169 if (f) {
1170 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1171 fclose(f);
1172 break;
1174 buf[0] = '\0';
1176 closedir(dp);
1177 if (buf[0] == '\0') {
1178 printf("Unknown host!\n");
1179 return -1;
1182 return 0;
1185 /* Read a CPU node property from the host device tree that's a single
1186 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1187 * (can't find or open the property, or doesn't understand the
1188 * format) */
1189 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1191 char buf[PATH_MAX];
1192 union {
1193 uint32_t v32;
1194 uint64_t v64;
1195 } u;
1196 FILE *f;
1197 int len;
1199 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1200 return -1;
1203 strncat(buf, "/", sizeof(buf) - strlen(buf));
1204 strncat(buf, propname, sizeof(buf) - strlen(buf));
1206 f = fopen(buf, "rb");
1207 if (!f) {
1208 return -1;
1211 len = fread(&u, 1, sizeof(u), f);
1212 fclose(f);
1213 switch (len) {
1214 case 4:
1215 /* property is a 32-bit quantity */
1216 return be32_to_cpu(u.v32);
1217 case 8:
1218 return be64_to_cpu(u.v64);
1221 return 0;
1224 uint64_t kvmppc_get_clockfreq(void)
1226 return kvmppc_read_int_cpu_dt("clock-frequency");
1229 uint32_t kvmppc_get_vmx(void)
1231 return kvmppc_read_int_cpu_dt("ibm,vmx");
1234 uint32_t kvmppc_get_dfp(void)
1236 return kvmppc_read_int_cpu_dt("ibm,dfp");
1239 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1241 PowerPCCPU *cpu = ppc_env_get_cpu(env);
1242 CPUState *cs = CPU(cpu);
1244 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1245 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1246 return 0;
1249 return 1;
1252 int kvmppc_get_hasidle(CPUPPCState *env)
1254 struct kvm_ppc_pvinfo pvinfo;
1256 if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1257 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1258 return 1;
1261 return 0;
1264 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1266 uint32_t *hc = (uint32_t*)buf;
1267 struct kvm_ppc_pvinfo pvinfo;
1269 if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1270 memcpy(buf, pvinfo.hcall, buf_len);
1271 return 0;
1275 * Fallback to always fail hypercalls:
1277 * li r3, -1
1278 * nop
1279 * nop
1280 * nop
1283 hc[0] = 0x3860ffff;
1284 hc[1] = 0x60000000;
1285 hc[2] = 0x60000000;
1286 hc[3] = 0x60000000;
1288 return 0;
1291 void kvmppc_set_papr(PowerPCCPU *cpu)
1293 CPUPPCState *env = &cpu->env;
1294 CPUState *cs = CPU(cpu);
1295 struct kvm_enable_cap cap = {};
1296 int ret;
1298 cap.cap = KVM_CAP_PPC_PAPR;
1299 ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &cap);
1301 if (ret) {
1302 cpu_abort(env, "This KVM version does not support PAPR\n");
1306 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
1308 CPUPPCState *env = &cpu->env;
1309 CPUState *cs = CPU(cpu);
1310 struct kvm_enable_cap cap = {};
1311 int ret;
1313 cap.cap = KVM_CAP_PPC_EPR;
1314 cap.args[0] = mpic_proxy;
1315 ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &cap);
1317 if (ret && mpic_proxy) {
1318 cpu_abort(env, "This KVM version does not support EPR\n");
1322 int kvmppc_smt_threads(void)
1324 return cap_ppc_smt ? cap_ppc_smt : 1;
1327 #ifdef TARGET_PPC64
1328 off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem)
1330 void *rma;
1331 off_t size;
1332 int fd;
1333 struct kvm_allocate_rma ret;
1334 MemoryRegion *rma_region;
1336 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
1337 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
1338 * not necessary on this hardware
1339 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
1341 * FIXME: We should allow the user to force contiguous RMA
1342 * allocation in the cap_ppc_rma==1 case.
1344 if (cap_ppc_rma < 2) {
1345 return 0;
1348 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
1349 if (fd < 0) {
1350 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
1351 strerror(errno));
1352 return -1;
1355 size = MIN(ret.rma_size, 256ul << 20);
1357 rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1358 if (rma == MAP_FAILED) {
1359 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
1360 return -1;
1363 rma_region = g_new(MemoryRegion, 1);
1364 memory_region_init_ram_ptr(rma_region, name, size, rma);
1365 vmstate_register_ram_global(rma_region);
1366 memory_region_add_subregion(sysmem, 0, rma_region);
1368 return size;
1371 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
1373 if (cap_ppc_rma >= 2) {
1374 return current_size;
1376 return MIN(current_size,
1377 getrampagesize() << (hash_shift - 7));
1379 #endif
1381 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd)
1383 struct kvm_create_spapr_tce args = {
1384 .liobn = liobn,
1385 .window_size = window_size,
1387 long len;
1388 int fd;
1389 void *table;
1391 /* Must set fd to -1 so we don't try to munmap when called for
1392 * destroying the table, which the upper layers -will- do
1394 *pfd = -1;
1395 if (!cap_spapr_tce) {
1396 return NULL;
1399 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
1400 if (fd < 0) {
1401 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
1402 liobn);
1403 return NULL;
1406 len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(sPAPRTCE);
1407 /* FIXME: round this up to page size */
1409 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1410 if (table == MAP_FAILED) {
1411 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
1412 liobn);
1413 close(fd);
1414 return NULL;
1417 *pfd = fd;
1418 return table;
1421 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t window_size)
1423 long len;
1425 if (fd < 0) {
1426 return -1;
1429 len = (window_size / SPAPR_TCE_PAGE_SIZE)*sizeof(sPAPRTCE);
1430 if ((munmap(table, len) < 0) ||
1431 (close(fd) < 0)) {
1432 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
1433 strerror(errno));
1434 /* Leak the table */
1437 return 0;
1440 int kvmppc_reset_htab(int shift_hint)
1442 uint32_t shift = shift_hint;
1444 if (!kvm_enabled()) {
1445 /* Full emulation, tell caller to allocate htab itself */
1446 return 0;
1448 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
1449 int ret;
1450 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
1451 if (ret == -ENOTTY) {
1452 /* At least some versions of PR KVM advertise the
1453 * capability, but don't implement the ioctl(). Oops.
1454 * Return 0 so that we allocate the htab in qemu, as is
1455 * correct for PR. */
1456 return 0;
1457 } else if (ret < 0) {
1458 return ret;
1460 return shift;
1463 /* We have a kernel that predates the htab reset calls. For PR
1464 * KVM, we need to allocate the htab ourselves, for an HV KVM of
1465 * this era, it has allocated a 16MB fixed size hash table
1466 * already. Kernels of this era have the GET_PVINFO capability
1467 * only on PR, so we use this hack to determine the right
1468 * answer */
1469 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
1470 /* PR - tell caller to allocate htab */
1471 return 0;
1472 } else {
1473 /* HV - assume 16MB kernel allocated htab */
1474 return 24;
1478 static inline uint32_t mfpvr(void)
1480 uint32_t pvr;
1482 asm ("mfpvr %0"
1483 : "=r"(pvr));
1484 return pvr;
1487 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
1489 if (on) {
1490 *word |= flags;
1491 } else {
1492 *word &= ~flags;
1496 static void kvmppc_host_cpu_initfn(Object *obj)
1498 assert(kvm_enabled());
1501 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
1503 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
1504 uint32_t vmx = kvmppc_get_vmx();
1505 uint32_t dfp = kvmppc_get_dfp();
1507 /* Now fix up the class with information we can query from the host */
1509 if (vmx != -1) {
1510 /* Only override when we know what the host supports */
1511 alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
1512 alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
1514 if (dfp != -1) {
1515 /* Only override when we know what the host supports */
1516 alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
1520 int kvmppc_fixup_cpu(PowerPCCPU *cpu)
1522 CPUState *cs = CPU(cpu);
1523 int smt;
1525 /* Adjust cpu index for SMT */
1526 smt = kvmppc_smt_threads();
1527 cs->cpu_index = (cs->cpu_index / smp_threads) * smt
1528 + (cs->cpu_index % smp_threads);
1530 return 0;
1533 static int kvm_ppc_register_host_cpu_type(void)
1535 TypeInfo type_info = {
1536 .name = TYPE_HOST_POWERPC_CPU,
1537 .instance_init = kvmppc_host_cpu_initfn,
1538 .class_init = kvmppc_host_cpu_class_init,
1540 uint32_t host_pvr = mfpvr();
1541 PowerPCCPUClass *pvr_pcc;
1543 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
1544 if (pvr_pcc == NULL) {
1545 return -1;
1547 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
1548 type_register(&type_info);
1549 return 0;
1553 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
1555 return true;
1558 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
1560 return 1;
1563 int kvm_arch_on_sigbus(int code, void *addr)
1565 return 1;