kvm: add KVM_IRQFD_FLAG_RESAMPLE support
[qemu/ar7.git] / target-ppc / kvm.c
blobb0099e122fdb30be1c62f1c4d89f7c51e300e6a0
1 /*
2 * PowerPC implementation of KVM hooks
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
7 * Authors:
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
17 #include <dirent.h>
18 #include <sys/types.h>
19 #include <sys/ioctl.h>
20 #include <sys/mman.h>
21 #include <sys/vfs.h>
23 #include <linux/kvm.h>
25 #include "qemu-common.h"
26 #include "qemu/timer.h"
27 #include "sysemu/sysemu.h"
28 #include "sysemu/kvm.h"
29 #include "kvm_ppc.h"
30 #include "cpu.h"
31 #include "sysemu/cpus.h"
32 #include "sysemu/device_tree.h"
33 #include "mmu-hash64.h"
35 #include "hw/sysbus.h"
36 #include "hw/ppc/spapr.h"
37 #include "hw/ppc/spapr_vio.h"
38 #include "sysemu/watchdog.h"
40 //#define DEBUG_KVM
42 #ifdef DEBUG_KVM
43 #define dprintf(fmt, ...) \
44 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
45 #else
46 #define dprintf(fmt, ...) \
47 do { } while (0)
48 #endif
50 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
52 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
53 KVM_CAP_LAST_INFO
56 static int cap_interrupt_unset = false;
57 static int cap_interrupt_level = false;
58 static int cap_segstate;
59 static int cap_booke_sregs;
60 static int cap_ppc_smt;
61 static int cap_ppc_rma;
62 static int cap_spapr_tce;
63 static int cap_hior;
64 static int cap_one_reg;
65 static int cap_epr;
66 static int cap_ppc_watchdog;
67 static int cap_papr;
69 /* XXX We have a race condition where we actually have a level triggered
70 * interrupt, but the infrastructure can't expose that yet, so the guest
71 * takes but ignores it, goes to sleep and never gets notified that there's
72 * still an interrupt pending.
74 * As a quick workaround, let's just wake up again 20 ms after we injected
75 * an interrupt. That way we can assure that we're always reinjecting
76 * interrupts in case the guest swallowed them.
78 static QEMUTimer *idle_timer;
80 static void kvm_kick_cpu(void *opaque)
82 PowerPCCPU *cpu = opaque;
84 qemu_cpu_kick(CPU(cpu));
87 static int kvm_ppc_register_host_cpu_type(void);
89 int kvm_arch_init(KVMState *s)
91 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
92 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
93 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
94 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
95 cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
96 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
97 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
98 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
99 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
100 cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
101 cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
102 /* Note: we don't set cap_papr here, because this capability is
103 * only activated after this by kvmppc_set_papr() */
105 if (!cap_interrupt_level) {
106 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
107 "VM to stall at times!\n");
110 kvm_ppc_register_host_cpu_type();
112 return 0;
115 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
117 CPUPPCState *cenv = &cpu->env;
118 CPUState *cs = CPU(cpu);
119 struct kvm_sregs sregs;
120 int ret;
122 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
123 /* What we're really trying to say is "if we're on BookE, we use
124 the native PVR for now". This is the only sane way to check
125 it though, so we potentially confuse users that they can run
126 BookE guests on BookS. Let's hope nobody dares enough :) */
127 return 0;
128 } else {
129 if (!cap_segstate) {
130 fprintf(stderr, "kvm error: missing PVR setting capability\n");
131 return -ENOSYS;
135 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
136 if (ret) {
137 return ret;
140 sregs.pvr = cenv->spr[SPR_PVR];
141 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
144 /* Set up a shared TLB array with KVM */
145 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
147 CPUPPCState *env = &cpu->env;
148 CPUState *cs = CPU(cpu);
149 struct kvm_book3e_206_tlb_params params = {};
150 struct kvm_config_tlb cfg = {};
151 struct kvm_enable_cap encap = {};
152 unsigned int entries = 0;
153 int ret, i;
155 if (!kvm_enabled() ||
156 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
157 return 0;
160 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
162 for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
163 params.tlb_sizes[i] = booke206_tlb_size(env, i);
164 params.tlb_ways[i] = booke206_tlb_ways(env, i);
165 entries += params.tlb_sizes[i];
168 assert(entries == env->nb_tlb);
169 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
171 env->tlb_dirty = true;
173 cfg.array = (uintptr_t)env->tlb.tlbm;
174 cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
175 cfg.params = (uintptr_t)&params;
176 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
178 encap.cap = KVM_CAP_SW_TLB;
179 encap.args[0] = (uintptr_t)&cfg;
181 ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &encap);
182 if (ret < 0) {
183 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
184 __func__, strerror(-ret));
185 return ret;
188 env->kvm_sw_tlb = true;
189 return 0;
193 #if defined(TARGET_PPC64)
194 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
195 struct kvm_ppc_smmu_info *info)
197 CPUPPCState *env = &cpu->env;
198 CPUState *cs = CPU(cpu);
200 memset(info, 0, sizeof(*info));
202 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
203 * need to "guess" what the supported page sizes are.
205 * For that to work we make a few assumptions:
207 * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
208 * KVM which only supports 4K and 16M pages, but supports them
209 * regardless of the backing store characteritics. We also don't
210 * support 1T segments.
212 * This is safe as if HV KVM ever supports that capability or PR
213 * KVM grows supports for more page/segment sizes, those versions
214 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
215 * will not hit this fallback
217 * - Else we are running HV KVM. This means we only support page
218 * sizes that fit in the backing store. Additionally we only
219 * advertize 64K pages if the processor is ARCH 2.06 and we assume
220 * P7 encodings for the SLB and hash table. Here too, we assume
221 * support for any newer processor will mean a kernel that
222 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
223 * this fallback.
225 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
226 /* No flags */
227 info->flags = 0;
228 info->slb_size = 64;
230 /* Standard 4k base page size segment */
231 info->sps[0].page_shift = 12;
232 info->sps[0].slb_enc = 0;
233 info->sps[0].enc[0].page_shift = 12;
234 info->sps[0].enc[0].pte_enc = 0;
236 /* Standard 16M large page size segment */
237 info->sps[1].page_shift = 24;
238 info->sps[1].slb_enc = SLB_VSID_L;
239 info->sps[1].enc[0].page_shift = 24;
240 info->sps[1].enc[0].pte_enc = 0;
241 } else {
242 int i = 0;
244 /* HV KVM has backing store size restrictions */
245 info->flags = KVM_PPC_PAGE_SIZES_REAL;
247 if (env->mmu_model & POWERPC_MMU_1TSEG) {
248 info->flags |= KVM_PPC_1T_SEGMENTS;
251 if (env->mmu_model == POWERPC_MMU_2_06) {
252 info->slb_size = 32;
253 } else {
254 info->slb_size = 64;
257 /* Standard 4k base page size segment */
258 info->sps[i].page_shift = 12;
259 info->sps[i].slb_enc = 0;
260 info->sps[i].enc[0].page_shift = 12;
261 info->sps[i].enc[0].pte_enc = 0;
262 i++;
264 /* 64K on MMU 2.06 */
265 if (env->mmu_model == POWERPC_MMU_2_06) {
266 info->sps[i].page_shift = 16;
267 info->sps[i].slb_enc = 0x110;
268 info->sps[i].enc[0].page_shift = 16;
269 info->sps[i].enc[0].pte_enc = 1;
270 i++;
273 /* Standard 16M large page size segment */
274 info->sps[i].page_shift = 24;
275 info->sps[i].slb_enc = SLB_VSID_L;
276 info->sps[i].enc[0].page_shift = 24;
277 info->sps[i].enc[0].pte_enc = 0;
281 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
283 CPUState *cs = CPU(cpu);
284 int ret;
286 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
287 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
288 if (ret == 0) {
289 return;
293 kvm_get_fallback_smmu_info(cpu, info);
296 static long getrampagesize(void)
298 struct statfs fs;
299 int ret;
301 if (!mem_path) {
302 /* guest RAM is backed by normal anonymous pages */
303 return getpagesize();
306 do {
307 ret = statfs(mem_path, &fs);
308 } while (ret != 0 && errno == EINTR);
310 if (ret != 0) {
311 fprintf(stderr, "Couldn't statfs() memory path: %s\n",
312 strerror(errno));
313 exit(1);
316 #define HUGETLBFS_MAGIC 0x958458f6
318 if (fs.f_type != HUGETLBFS_MAGIC) {
319 /* Explicit mempath, but it's ordinary pages */
320 return getpagesize();
323 /* It's hugepage, return the huge page size */
324 return fs.f_bsize;
327 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
329 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
330 return true;
333 return (1ul << shift) <= rampgsize;
336 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
338 static struct kvm_ppc_smmu_info smmu_info;
339 static bool has_smmu_info;
340 CPUPPCState *env = &cpu->env;
341 long rampagesize;
342 int iq, ik, jq, jk;
344 /* We only handle page sizes for 64-bit server guests for now */
345 if (!(env->mmu_model & POWERPC_MMU_64)) {
346 return;
349 /* Collect MMU info from kernel if not already */
350 if (!has_smmu_info) {
351 kvm_get_smmu_info(cpu, &smmu_info);
352 has_smmu_info = true;
355 rampagesize = getrampagesize();
357 /* Convert to QEMU form */
358 memset(&env->sps, 0, sizeof(env->sps));
360 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
361 struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
362 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
364 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
365 ksps->page_shift)) {
366 continue;
368 qsps->page_shift = ksps->page_shift;
369 qsps->slb_enc = ksps->slb_enc;
370 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
371 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
372 ksps->enc[jk].page_shift)) {
373 continue;
375 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
376 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
377 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
378 break;
381 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
382 break;
385 env->slb_nr = smmu_info.slb_size;
386 if (smmu_info.flags & KVM_PPC_1T_SEGMENTS) {
387 env->mmu_model |= POWERPC_MMU_1TSEG;
388 } else {
389 env->mmu_model &= ~POWERPC_MMU_1TSEG;
392 #else /* defined (TARGET_PPC64) */
394 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
398 #endif /* !defined (TARGET_PPC64) */
400 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
402 return cpu->cpu_index;
405 int kvm_arch_init_vcpu(CPUState *cs)
407 PowerPCCPU *cpu = POWERPC_CPU(cs);
408 CPUPPCState *cenv = &cpu->env;
409 int ret;
411 /* Gather server mmu info from KVM and update the CPU state */
412 kvm_fixup_page_sizes(cpu);
414 /* Synchronize sregs with kvm */
415 ret = kvm_arch_sync_sregs(cpu);
416 if (ret) {
417 return ret;
420 idle_timer = qemu_new_timer_ns(vm_clock, kvm_kick_cpu, cpu);
422 /* Some targets support access to KVM's guest TLB. */
423 switch (cenv->mmu_model) {
424 case POWERPC_MMU_BOOKE206:
425 ret = kvm_booke206_tlb_init(cpu);
426 break;
427 default:
428 break;
431 return ret;
434 void kvm_arch_reset_vcpu(CPUState *cpu)
438 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
440 CPUPPCState *env = &cpu->env;
441 CPUState *cs = CPU(cpu);
442 struct kvm_dirty_tlb dirty_tlb;
443 unsigned char *bitmap;
444 int ret;
446 if (!env->kvm_sw_tlb) {
447 return;
450 bitmap = g_malloc((env->nb_tlb + 7) / 8);
451 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
453 dirty_tlb.bitmap = (uintptr_t)bitmap;
454 dirty_tlb.num_dirty = env->nb_tlb;
456 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
457 if (ret) {
458 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
459 __func__, strerror(-ret));
462 g_free(bitmap);
465 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
467 PowerPCCPU *cpu = POWERPC_CPU(cs);
468 CPUPPCState *env = &cpu->env;
469 union {
470 uint32_t u32;
471 uint64_t u64;
472 } val;
473 struct kvm_one_reg reg = {
474 .id = id,
475 .addr = (uintptr_t) &val,
477 int ret;
479 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
480 if (ret != 0) {
481 fprintf(stderr, "Warning: Unable to retrieve SPR %d from KVM: %s\n",
482 spr, strerror(errno));
483 } else {
484 switch (id & KVM_REG_SIZE_MASK) {
485 case KVM_REG_SIZE_U32:
486 env->spr[spr] = val.u32;
487 break;
489 case KVM_REG_SIZE_U64:
490 env->spr[spr] = val.u64;
491 break;
493 default:
494 /* Don't handle this size yet */
495 abort();
500 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
502 PowerPCCPU *cpu = POWERPC_CPU(cs);
503 CPUPPCState *env = &cpu->env;
504 union {
505 uint32_t u32;
506 uint64_t u64;
507 } val;
508 struct kvm_one_reg reg = {
509 .id = id,
510 .addr = (uintptr_t) &val,
512 int ret;
514 switch (id & KVM_REG_SIZE_MASK) {
515 case KVM_REG_SIZE_U32:
516 val.u32 = env->spr[spr];
517 break;
519 case KVM_REG_SIZE_U64:
520 val.u64 = env->spr[spr];
521 break;
523 default:
524 /* Don't handle this size yet */
525 abort();
528 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
529 if (ret != 0) {
530 fprintf(stderr, "Warning: Unable to set SPR %d to KVM: %s\n",
531 spr, strerror(errno));
535 static int kvm_put_fp(CPUState *cs)
537 PowerPCCPU *cpu = POWERPC_CPU(cs);
538 CPUPPCState *env = &cpu->env;
539 struct kvm_one_reg reg;
540 int i;
541 int ret;
543 if (env->insns_flags & PPC_FLOAT) {
544 uint64_t fpscr = env->fpscr;
545 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
547 reg.id = KVM_REG_PPC_FPSCR;
548 reg.addr = (uintptr_t)&fpscr;
549 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
550 if (ret < 0) {
551 dprintf("Unable to set FPSCR to KVM: %s\n", strerror(errno));
552 return ret;
555 for (i = 0; i < 32; i++) {
556 uint64_t vsr[2];
558 vsr[0] = float64_val(env->fpr[i]);
559 vsr[1] = env->vsr[i];
560 reg.addr = (uintptr_t) &vsr;
561 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
563 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
564 if (ret < 0) {
565 dprintf("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
566 i, strerror(errno));
567 return ret;
572 if (env->insns_flags & PPC_ALTIVEC) {
573 reg.id = KVM_REG_PPC_VSCR;
574 reg.addr = (uintptr_t)&env->vscr;
575 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
576 if (ret < 0) {
577 dprintf("Unable to set VSCR to KVM: %s\n", strerror(errno));
578 return ret;
581 for (i = 0; i < 32; i++) {
582 reg.id = KVM_REG_PPC_VR(i);
583 reg.addr = (uintptr_t)&env->avr[i];
584 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
585 if (ret < 0) {
586 dprintf("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
587 return ret;
592 return 0;
595 static int kvm_get_fp(CPUState *cs)
597 PowerPCCPU *cpu = POWERPC_CPU(cs);
598 CPUPPCState *env = &cpu->env;
599 struct kvm_one_reg reg;
600 int i;
601 int ret;
603 if (env->insns_flags & PPC_FLOAT) {
604 uint64_t fpscr;
605 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
607 reg.id = KVM_REG_PPC_FPSCR;
608 reg.addr = (uintptr_t)&fpscr;
609 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
610 if (ret < 0) {
611 dprintf("Unable to get FPSCR from KVM: %s\n", strerror(errno));
612 return ret;
613 } else {
614 env->fpscr = fpscr;
617 for (i = 0; i < 32; i++) {
618 uint64_t vsr[2];
620 reg.addr = (uintptr_t) &vsr;
621 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
623 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
624 if (ret < 0) {
625 dprintf("Unable to get %s%d from KVM: %s\n",
626 vsx ? "VSR" : "FPR", i, strerror(errno));
627 return ret;
628 } else {
629 env->fpr[i] = vsr[0];
630 if (vsx) {
631 env->vsr[i] = vsr[1];
637 if (env->insns_flags & PPC_ALTIVEC) {
638 reg.id = KVM_REG_PPC_VSCR;
639 reg.addr = (uintptr_t)&env->vscr;
640 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
641 if (ret < 0) {
642 dprintf("Unable to get VSCR from KVM: %s\n", strerror(errno));
643 return ret;
646 for (i = 0; i < 32; i++) {
647 reg.id = KVM_REG_PPC_VR(i);
648 reg.addr = (uintptr_t)&env->avr[i];
649 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
650 if (ret < 0) {
651 dprintf("Unable to get VR%d from KVM: %s\n",
652 i, strerror(errno));
653 return ret;
658 return 0;
661 #if defined(TARGET_PPC64)
662 static int kvm_get_vpa(CPUState *cs)
664 PowerPCCPU *cpu = POWERPC_CPU(cs);
665 CPUPPCState *env = &cpu->env;
666 struct kvm_one_reg reg;
667 int ret;
669 reg.id = KVM_REG_PPC_VPA_ADDR;
670 reg.addr = (uintptr_t)&env->vpa_addr;
671 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
672 if (ret < 0) {
673 dprintf("Unable to get VPA address from KVM: %s\n", strerror(errno));
674 return ret;
677 assert((uintptr_t)&env->slb_shadow_size
678 == ((uintptr_t)&env->slb_shadow_addr + 8));
679 reg.id = KVM_REG_PPC_VPA_SLB;
680 reg.addr = (uintptr_t)&env->slb_shadow_addr;
681 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
682 if (ret < 0) {
683 dprintf("Unable to get SLB shadow state from KVM: %s\n",
684 strerror(errno));
685 return ret;
688 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
689 reg.id = KVM_REG_PPC_VPA_DTL;
690 reg.addr = (uintptr_t)&env->dtl_addr;
691 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
692 if (ret < 0) {
693 dprintf("Unable to get dispatch trace log state from KVM: %s\n",
694 strerror(errno));
695 return ret;
698 return 0;
701 static int kvm_put_vpa(CPUState *cs)
703 PowerPCCPU *cpu = POWERPC_CPU(cs);
704 CPUPPCState *env = &cpu->env;
705 struct kvm_one_reg reg;
706 int ret;
708 /* SLB shadow or DTL can't be registered unless a master VPA is
709 * registered. That means when restoring state, if a VPA *is*
710 * registered, we need to set that up first. If not, we need to
711 * deregister the others before deregistering the master VPA */
712 assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
714 if (env->vpa_addr) {
715 reg.id = KVM_REG_PPC_VPA_ADDR;
716 reg.addr = (uintptr_t)&env->vpa_addr;
717 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
718 if (ret < 0) {
719 dprintf("Unable to set VPA address to KVM: %s\n", strerror(errno));
720 return ret;
724 assert((uintptr_t)&env->slb_shadow_size
725 == ((uintptr_t)&env->slb_shadow_addr + 8));
726 reg.id = KVM_REG_PPC_VPA_SLB;
727 reg.addr = (uintptr_t)&env->slb_shadow_addr;
728 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
729 if (ret < 0) {
730 dprintf("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
731 return ret;
734 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
735 reg.id = KVM_REG_PPC_VPA_DTL;
736 reg.addr = (uintptr_t)&env->dtl_addr;
737 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
738 if (ret < 0) {
739 dprintf("Unable to set dispatch trace log state to KVM: %s\n",
740 strerror(errno));
741 return ret;
744 if (!env->vpa_addr) {
745 reg.id = KVM_REG_PPC_VPA_ADDR;
746 reg.addr = (uintptr_t)&env->vpa_addr;
747 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
748 if (ret < 0) {
749 dprintf("Unable to set VPA address to KVM: %s\n", strerror(errno));
750 return ret;
754 return 0;
756 #endif /* TARGET_PPC64 */
758 int kvm_arch_put_registers(CPUState *cs, int level)
760 PowerPCCPU *cpu = POWERPC_CPU(cs);
761 CPUPPCState *env = &cpu->env;
762 struct kvm_regs regs;
763 int ret;
764 int i;
766 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
767 if (ret < 0) {
768 return ret;
771 regs.ctr = env->ctr;
772 regs.lr = env->lr;
773 regs.xer = cpu_read_xer(env);
774 regs.msr = env->msr;
775 regs.pc = env->nip;
777 regs.srr0 = env->spr[SPR_SRR0];
778 regs.srr1 = env->spr[SPR_SRR1];
780 regs.sprg0 = env->spr[SPR_SPRG0];
781 regs.sprg1 = env->spr[SPR_SPRG1];
782 regs.sprg2 = env->spr[SPR_SPRG2];
783 regs.sprg3 = env->spr[SPR_SPRG3];
784 regs.sprg4 = env->spr[SPR_SPRG4];
785 regs.sprg5 = env->spr[SPR_SPRG5];
786 regs.sprg6 = env->spr[SPR_SPRG6];
787 regs.sprg7 = env->spr[SPR_SPRG7];
789 regs.pid = env->spr[SPR_BOOKE_PID];
791 for (i = 0;i < 32; i++)
792 regs.gpr[i] = env->gpr[i];
794 regs.cr = 0;
795 for (i = 0; i < 8; i++) {
796 regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
799 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
800 if (ret < 0)
801 return ret;
803 kvm_put_fp(cs);
805 if (env->tlb_dirty) {
806 kvm_sw_tlb_put(cpu);
807 env->tlb_dirty = false;
810 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
811 struct kvm_sregs sregs;
813 sregs.pvr = env->spr[SPR_PVR];
815 sregs.u.s.sdr1 = env->spr[SPR_SDR1];
817 /* Sync SLB */
818 #ifdef TARGET_PPC64
819 for (i = 0; i < 64; i++) {
820 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
821 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
823 #endif
825 /* Sync SRs */
826 for (i = 0; i < 16; i++) {
827 sregs.u.s.ppc32.sr[i] = env->sr[i];
830 /* Sync BATs */
831 for (i = 0; i < 8; i++) {
832 /* Beware. We have to swap upper and lower bits here */
833 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
834 | env->DBAT[1][i];
835 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
836 | env->IBAT[1][i];
839 ret = kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
840 if (ret) {
841 return ret;
845 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
846 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
849 if (cap_one_reg) {
850 int i;
852 /* We deliberately ignore errors here, for kernels which have
853 * the ONE_REG calls, but don't support the specific
854 * registers, there's a reasonable chance things will still
855 * work, at least until we try to migrate. */
856 for (i = 0; i < 1024; i++) {
857 uint64_t id = env->spr_cb[i].one_reg_id;
859 if (id != 0) {
860 kvm_put_one_spr(cs, id, i);
864 #ifdef TARGET_PPC64
865 if (cap_papr) {
866 if (kvm_put_vpa(cs) < 0) {
867 dprintf("Warning: Unable to set VPA information to KVM\n");
870 #endif /* TARGET_PPC64 */
873 return ret;
876 int kvm_arch_get_registers(CPUState *cs)
878 PowerPCCPU *cpu = POWERPC_CPU(cs);
879 CPUPPCState *env = &cpu->env;
880 struct kvm_regs regs;
881 struct kvm_sregs sregs;
882 uint32_t cr;
883 int i, ret;
885 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
886 if (ret < 0)
887 return ret;
889 cr = regs.cr;
890 for (i = 7; i >= 0; i--) {
891 env->crf[i] = cr & 15;
892 cr >>= 4;
895 env->ctr = regs.ctr;
896 env->lr = regs.lr;
897 cpu_write_xer(env, regs.xer);
898 env->msr = regs.msr;
899 env->nip = regs.pc;
901 env->spr[SPR_SRR0] = regs.srr0;
902 env->spr[SPR_SRR1] = regs.srr1;
904 env->spr[SPR_SPRG0] = regs.sprg0;
905 env->spr[SPR_SPRG1] = regs.sprg1;
906 env->spr[SPR_SPRG2] = regs.sprg2;
907 env->spr[SPR_SPRG3] = regs.sprg3;
908 env->spr[SPR_SPRG4] = regs.sprg4;
909 env->spr[SPR_SPRG5] = regs.sprg5;
910 env->spr[SPR_SPRG6] = regs.sprg6;
911 env->spr[SPR_SPRG7] = regs.sprg7;
913 env->spr[SPR_BOOKE_PID] = regs.pid;
915 for (i = 0;i < 32; i++)
916 env->gpr[i] = regs.gpr[i];
918 kvm_get_fp(cs);
920 if (cap_booke_sregs) {
921 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
922 if (ret < 0) {
923 return ret;
926 if (sregs.u.e.features & KVM_SREGS_E_BASE) {
927 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
928 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
929 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
930 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
931 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
932 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
933 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
934 env->spr[SPR_DECR] = sregs.u.e.dec;
935 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
936 env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
937 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
940 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
941 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
942 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
943 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
944 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
945 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
948 if (sregs.u.e.features & KVM_SREGS_E_64) {
949 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
952 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
953 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
956 if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
957 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
958 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
959 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
960 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
961 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
962 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
963 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
964 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
965 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
966 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
967 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
968 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
969 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
970 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
971 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
972 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
974 if (sregs.u.e.features & KVM_SREGS_E_SPE) {
975 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
976 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
977 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
980 if (sregs.u.e.features & KVM_SREGS_E_PM) {
981 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
984 if (sregs.u.e.features & KVM_SREGS_E_PC) {
985 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
986 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
990 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
991 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
992 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
993 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
994 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
995 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
996 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
997 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
998 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
999 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1000 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1003 if (sregs.u.e.features & KVM_SREGS_EXP) {
1004 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1007 if (sregs.u.e.features & KVM_SREGS_E_PD) {
1008 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1009 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1012 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1013 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1014 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1015 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1017 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1018 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1019 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1024 if (cap_segstate) {
1025 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
1026 if (ret < 0) {
1027 return ret;
1030 ppc_store_sdr1(env, sregs.u.s.sdr1);
1032 /* Sync SLB */
1033 #ifdef TARGET_PPC64
1034 for (i = 0; i < 64; i++) {
1035 ppc_store_slb(env, sregs.u.s.ppc64.slb[i].slbe,
1036 sregs.u.s.ppc64.slb[i].slbv);
1038 #endif
1040 /* Sync SRs */
1041 for (i = 0; i < 16; i++) {
1042 env->sr[i] = sregs.u.s.ppc32.sr[i];
1045 /* Sync BATs */
1046 for (i = 0; i < 8; i++) {
1047 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1048 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1049 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1050 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1054 if (cap_hior) {
1055 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1058 if (cap_one_reg) {
1059 int i;
1061 /* We deliberately ignore errors here, for kernels which have
1062 * the ONE_REG calls, but don't support the specific
1063 * registers, there's a reasonable chance things will still
1064 * work, at least until we try to migrate. */
1065 for (i = 0; i < 1024; i++) {
1066 uint64_t id = env->spr_cb[i].one_reg_id;
1068 if (id != 0) {
1069 kvm_get_one_spr(cs, id, i);
1073 #ifdef TARGET_PPC64
1074 if (cap_papr) {
1075 if (kvm_get_vpa(cs) < 0) {
1076 dprintf("Warning: Unable to get VPA information from KVM\n");
1079 #endif
1082 return 0;
1085 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1087 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1089 if (irq != PPC_INTERRUPT_EXT) {
1090 return 0;
1093 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1094 return 0;
1097 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1099 return 0;
1102 #if defined(TARGET_PPCEMB)
1103 #define PPC_INPUT_INT PPC40x_INPUT_INT
1104 #elif defined(TARGET_PPC64)
1105 #define PPC_INPUT_INT PPC970_INPUT_INT
1106 #else
1107 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1108 #endif
1110 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1112 PowerPCCPU *cpu = POWERPC_CPU(cs);
1113 CPUPPCState *env = &cpu->env;
1114 int r;
1115 unsigned irq;
1117 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1118 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1119 if (!cap_interrupt_level &&
1120 run->ready_for_interrupt_injection &&
1121 (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1122 (env->irq_input_state & (1<<PPC_INPUT_INT)))
1124 /* For now KVM disregards the 'irq' argument. However, in the
1125 * future KVM could cache it in-kernel to avoid a heavyweight exit
1126 * when reading the UIC.
1128 irq = KVM_INTERRUPT_SET;
1130 dprintf("injected interrupt %d\n", irq);
1131 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1132 if (r < 0) {
1133 printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1136 /* Always wake up soon in case the interrupt was level based */
1137 qemu_mod_timer(idle_timer, qemu_get_clock_ns(vm_clock) +
1138 (get_ticks_per_sec() / 50));
1141 /* We don't know if there are more interrupts pending after this. However,
1142 * the guest will return to userspace in the course of handling this one
1143 * anyways, so we will get a chance to deliver the rest. */
1146 void kvm_arch_post_run(CPUState *cpu, struct kvm_run *run)
1150 int kvm_arch_process_async_events(CPUState *cs)
1152 return cs->halted;
1155 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1157 CPUState *cs = CPU(cpu);
1158 CPUPPCState *env = &cpu->env;
1160 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1161 cs->halted = 1;
1162 env->exception_index = EXCP_HLT;
1165 return 0;
1168 /* map dcr access to existing qemu dcr emulation */
1169 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1171 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1172 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1174 return 0;
1177 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1179 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1180 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1182 return 0;
1185 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1187 PowerPCCPU *cpu = POWERPC_CPU(cs);
1188 CPUPPCState *env = &cpu->env;
1189 int ret;
1191 switch (run->exit_reason) {
1192 case KVM_EXIT_DCR:
1193 if (run->dcr.is_write) {
1194 dprintf("handle dcr write\n");
1195 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1196 } else {
1197 dprintf("handle dcr read\n");
1198 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1200 break;
1201 case KVM_EXIT_HLT:
1202 dprintf("handle halt\n");
1203 ret = kvmppc_handle_halt(cpu);
1204 break;
1205 #if defined(TARGET_PPC64)
1206 case KVM_EXIT_PAPR_HCALL:
1207 dprintf("handle PAPR hypercall\n");
1208 run->papr_hcall.ret = spapr_hypercall(cpu,
1209 run->papr_hcall.nr,
1210 run->papr_hcall.args);
1211 ret = 0;
1212 break;
1213 #endif
1214 case KVM_EXIT_EPR:
1215 dprintf("handle epr\n");
1216 run->epr.epr = ldl_phys(env->mpic_iack);
1217 ret = 0;
1218 break;
1219 case KVM_EXIT_WATCHDOG:
1220 dprintf("handle watchdog expiry\n");
1221 watchdog_perform_action();
1222 ret = 0;
1223 break;
1225 default:
1226 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1227 ret = -1;
1228 break;
1231 return ret;
1234 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1236 CPUState *cs = CPU(cpu);
1237 uint32_t bits = tsr_bits;
1238 struct kvm_one_reg reg = {
1239 .id = KVM_REG_PPC_OR_TSR,
1240 .addr = (uintptr_t) &bits,
1243 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1246 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1249 CPUState *cs = CPU(cpu);
1250 uint32_t bits = tsr_bits;
1251 struct kvm_one_reg reg = {
1252 .id = KVM_REG_PPC_CLEAR_TSR,
1253 .addr = (uintptr_t) &bits,
1256 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1259 int kvmppc_set_tcr(PowerPCCPU *cpu)
1261 CPUState *cs = CPU(cpu);
1262 CPUPPCState *env = &cpu->env;
1263 uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1265 struct kvm_one_reg reg = {
1266 .id = KVM_REG_PPC_TCR,
1267 .addr = (uintptr_t) &tcr,
1270 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1273 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1275 CPUState *cs = CPU(cpu);
1276 struct kvm_enable_cap encap = {};
1277 int ret;
1279 if (!kvm_enabled()) {
1280 return -1;
1283 if (!cap_ppc_watchdog) {
1284 printf("warning: KVM does not support watchdog");
1285 return -1;
1288 encap.cap = KVM_CAP_PPC_BOOKE_WATCHDOG;
1289 ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &encap);
1290 if (ret < 0) {
1291 fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1292 __func__, strerror(-ret));
1293 return ret;
1296 return ret;
1299 static int read_cpuinfo(const char *field, char *value, int len)
1301 FILE *f;
1302 int ret = -1;
1303 int field_len = strlen(field);
1304 char line[512];
1306 f = fopen("/proc/cpuinfo", "r");
1307 if (!f) {
1308 return -1;
1311 do {
1312 if(!fgets(line, sizeof(line), f)) {
1313 break;
1315 if (!strncmp(line, field, field_len)) {
1316 pstrcpy(value, len, line);
1317 ret = 0;
1318 break;
1320 } while(*line);
1322 fclose(f);
1324 return ret;
1327 uint32_t kvmppc_get_tbfreq(void)
1329 char line[512];
1330 char *ns;
1331 uint32_t retval = get_ticks_per_sec();
1333 if (read_cpuinfo("timebase", line, sizeof(line))) {
1334 return retval;
1337 if (!(ns = strchr(line, ':'))) {
1338 return retval;
1341 ns++;
1343 retval = atoi(ns);
1344 return retval;
1347 /* Try to find a device tree node for a CPU with clock-frequency property */
1348 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1350 struct dirent *dirp;
1351 DIR *dp;
1353 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1354 printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1355 return -1;
1358 buf[0] = '\0';
1359 while ((dirp = readdir(dp)) != NULL) {
1360 FILE *f;
1361 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1362 dirp->d_name);
1363 f = fopen(buf, "r");
1364 if (f) {
1365 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1366 fclose(f);
1367 break;
1369 buf[0] = '\0';
1371 closedir(dp);
1372 if (buf[0] == '\0') {
1373 printf("Unknown host!\n");
1374 return -1;
1377 return 0;
1380 /* Read a CPU node property from the host device tree that's a single
1381 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1382 * (can't find or open the property, or doesn't understand the
1383 * format) */
1384 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1386 char buf[PATH_MAX];
1387 union {
1388 uint32_t v32;
1389 uint64_t v64;
1390 } u;
1391 FILE *f;
1392 int len;
1394 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1395 return -1;
1398 strncat(buf, "/", sizeof(buf) - strlen(buf));
1399 strncat(buf, propname, sizeof(buf) - strlen(buf));
1401 f = fopen(buf, "rb");
1402 if (!f) {
1403 return -1;
1406 len = fread(&u, 1, sizeof(u), f);
1407 fclose(f);
1408 switch (len) {
1409 case 4:
1410 /* property is a 32-bit quantity */
1411 return be32_to_cpu(u.v32);
1412 case 8:
1413 return be64_to_cpu(u.v64);
1416 return 0;
1419 uint64_t kvmppc_get_clockfreq(void)
1421 return kvmppc_read_int_cpu_dt("clock-frequency");
1424 uint32_t kvmppc_get_vmx(void)
1426 return kvmppc_read_int_cpu_dt("ibm,vmx");
1429 uint32_t kvmppc_get_dfp(void)
1431 return kvmppc_read_int_cpu_dt("ibm,dfp");
1434 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1436 PowerPCCPU *cpu = ppc_env_get_cpu(env);
1437 CPUState *cs = CPU(cpu);
1439 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1440 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1441 return 0;
1444 return 1;
1447 int kvmppc_get_hasidle(CPUPPCState *env)
1449 struct kvm_ppc_pvinfo pvinfo;
1451 if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1452 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1453 return 1;
1456 return 0;
1459 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1461 uint32_t *hc = (uint32_t*)buf;
1462 struct kvm_ppc_pvinfo pvinfo;
1464 if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1465 memcpy(buf, pvinfo.hcall, buf_len);
1466 return 0;
1470 * Fallback to always fail hypercalls:
1472 * li r3, -1
1473 * nop
1474 * nop
1475 * nop
1478 hc[0] = 0x3860ffff;
1479 hc[1] = 0x60000000;
1480 hc[2] = 0x60000000;
1481 hc[3] = 0x60000000;
1483 return 0;
1486 void kvmppc_set_papr(PowerPCCPU *cpu)
1488 CPUPPCState *env = &cpu->env;
1489 CPUState *cs = CPU(cpu);
1490 struct kvm_enable_cap cap = {};
1491 int ret;
1493 cap.cap = KVM_CAP_PPC_PAPR;
1494 ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &cap);
1496 if (ret) {
1497 cpu_abort(env, "This KVM version does not support PAPR\n");
1500 /* Update the capability flag so we sync the right information
1501 * with kvm */
1502 cap_papr = 1;
1505 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
1507 CPUPPCState *env = &cpu->env;
1508 CPUState *cs = CPU(cpu);
1509 struct kvm_enable_cap cap = {};
1510 int ret;
1512 cap.cap = KVM_CAP_PPC_EPR;
1513 cap.args[0] = mpic_proxy;
1514 ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &cap);
1516 if (ret && mpic_proxy) {
1517 cpu_abort(env, "This KVM version does not support EPR\n");
1521 int kvmppc_smt_threads(void)
1523 return cap_ppc_smt ? cap_ppc_smt : 1;
1526 #ifdef TARGET_PPC64
1527 off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem)
1529 void *rma;
1530 off_t size;
1531 int fd;
1532 struct kvm_allocate_rma ret;
1533 MemoryRegion *rma_region;
1535 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
1536 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
1537 * not necessary on this hardware
1538 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
1540 * FIXME: We should allow the user to force contiguous RMA
1541 * allocation in the cap_ppc_rma==1 case.
1543 if (cap_ppc_rma < 2) {
1544 return 0;
1547 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
1548 if (fd < 0) {
1549 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
1550 strerror(errno));
1551 return -1;
1554 size = MIN(ret.rma_size, 256ul << 20);
1556 rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1557 if (rma == MAP_FAILED) {
1558 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
1559 return -1;
1562 rma_region = g_new(MemoryRegion, 1);
1563 memory_region_init_ram_ptr(rma_region, NULL, name, size, rma);
1564 vmstate_register_ram_global(rma_region);
1565 memory_region_add_subregion(sysmem, 0, rma_region);
1567 return size;
1570 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
1572 struct kvm_ppc_smmu_info info;
1573 long rampagesize, best_page_shift;
1574 int i;
1576 if (cap_ppc_rma >= 2) {
1577 return current_size;
1580 /* Find the largest hardware supported page size that's less than
1581 * or equal to the (logical) backing page size of guest RAM */
1582 kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
1583 rampagesize = getrampagesize();
1584 best_page_shift = 0;
1586 for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
1587 struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
1589 if (!sps->page_shift) {
1590 continue;
1593 if ((sps->page_shift > best_page_shift)
1594 && ((1UL << sps->page_shift) <= rampagesize)) {
1595 best_page_shift = sps->page_shift;
1599 return MIN(current_size,
1600 1ULL << (best_page_shift + hash_shift - 7));
1602 #endif
1604 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd)
1606 struct kvm_create_spapr_tce args = {
1607 .liobn = liobn,
1608 .window_size = window_size,
1610 long len;
1611 int fd;
1612 void *table;
1614 /* Must set fd to -1 so we don't try to munmap when called for
1615 * destroying the table, which the upper layers -will- do
1617 *pfd = -1;
1618 if (!cap_spapr_tce) {
1619 return NULL;
1622 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
1623 if (fd < 0) {
1624 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
1625 liobn);
1626 return NULL;
1629 len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(sPAPRTCE);
1630 /* FIXME: round this up to page size */
1632 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1633 if (table == MAP_FAILED) {
1634 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
1635 liobn);
1636 close(fd);
1637 return NULL;
1640 *pfd = fd;
1641 return table;
1644 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t window_size)
1646 long len;
1648 if (fd < 0) {
1649 return -1;
1652 len = (window_size / SPAPR_TCE_PAGE_SIZE)*sizeof(sPAPRTCE);
1653 if ((munmap(table, len) < 0) ||
1654 (close(fd) < 0)) {
1655 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
1656 strerror(errno));
1657 /* Leak the table */
1660 return 0;
1663 int kvmppc_reset_htab(int shift_hint)
1665 uint32_t shift = shift_hint;
1667 if (!kvm_enabled()) {
1668 /* Full emulation, tell caller to allocate htab itself */
1669 return 0;
1671 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
1672 int ret;
1673 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
1674 if (ret == -ENOTTY) {
1675 /* At least some versions of PR KVM advertise the
1676 * capability, but don't implement the ioctl(). Oops.
1677 * Return 0 so that we allocate the htab in qemu, as is
1678 * correct for PR. */
1679 return 0;
1680 } else if (ret < 0) {
1681 return ret;
1683 return shift;
1686 /* We have a kernel that predates the htab reset calls. For PR
1687 * KVM, we need to allocate the htab ourselves, for an HV KVM of
1688 * this era, it has allocated a 16MB fixed size hash table
1689 * already. Kernels of this era have the GET_PVINFO capability
1690 * only on PR, so we use this hack to determine the right
1691 * answer */
1692 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
1693 /* PR - tell caller to allocate htab */
1694 return 0;
1695 } else {
1696 /* HV - assume 16MB kernel allocated htab */
1697 return 24;
1701 static inline uint32_t mfpvr(void)
1703 uint32_t pvr;
1705 asm ("mfpvr %0"
1706 : "=r"(pvr));
1707 return pvr;
1710 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
1712 if (on) {
1713 *word |= flags;
1714 } else {
1715 *word &= ~flags;
1719 static void kvmppc_host_cpu_initfn(Object *obj)
1721 assert(kvm_enabled());
1724 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
1726 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
1727 uint32_t vmx = kvmppc_get_vmx();
1728 uint32_t dfp = kvmppc_get_dfp();
1729 uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
1730 uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
1732 /* Now fix up the class with information we can query from the host */
1734 if (vmx != -1) {
1735 /* Only override when we know what the host supports */
1736 alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
1737 alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
1739 if (dfp != -1) {
1740 /* Only override when we know what the host supports */
1741 alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
1744 if (dcache_size != -1) {
1745 pcc->l1_dcache_size = dcache_size;
1748 if (icache_size != -1) {
1749 pcc->l1_icache_size = icache_size;
1753 int kvmppc_fixup_cpu(PowerPCCPU *cpu)
1755 CPUState *cs = CPU(cpu);
1756 int smt;
1758 /* Adjust cpu index for SMT */
1759 smt = kvmppc_smt_threads();
1760 cs->cpu_index = (cs->cpu_index / smp_threads) * smt
1761 + (cs->cpu_index % smp_threads);
1763 return 0;
1766 bool kvmppc_has_cap_epr(void)
1768 return cap_epr;
1771 static int kvm_ppc_register_host_cpu_type(void)
1773 TypeInfo type_info = {
1774 .name = TYPE_HOST_POWERPC_CPU,
1775 .instance_init = kvmppc_host_cpu_initfn,
1776 .class_init = kvmppc_host_cpu_class_init,
1778 uint32_t host_pvr = mfpvr();
1779 PowerPCCPUClass *pvr_pcc;
1781 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
1782 if (pvr_pcc == NULL) {
1783 return -1;
1785 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
1786 type_register(&type_info);
1787 return 0;
1791 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
1793 return true;
1796 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
1798 return 1;
1801 int kvm_arch_on_sigbus(int code, void *addr)
1803 return 1;
1806 void kvm_arch_init_irq_routing(KVMState *s)