MAINTAINERS: mark megasas as maintained
[qemu/ar7.git] / target-ppc / kvm.c
blob9974b10ccb336dd248f968550d92351f3baf746e
1 /*
2 * PowerPC implementation of KVM hooks
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
7 * Authors:
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
17 #include <dirent.h>
18 #include <sys/types.h>
19 #include <sys/ioctl.h>
20 #include <sys/mman.h>
21 #include <sys/vfs.h>
23 #include <linux/kvm.h>
25 #include "qemu-common.h"
26 #include "qemu/timer.h"
27 #include "sysemu/sysemu.h"
28 #include "sysemu/kvm.h"
29 #include "kvm_ppc.h"
30 #include "cpu.h"
31 #include "sysemu/cpus.h"
32 #include "sysemu/device_tree.h"
33 #include "mmu-hash64.h"
35 #include "hw/sysbus.h"
36 #include "hw/ppc/spapr.h"
37 #include "hw/ppc/spapr_vio.h"
38 #include "sysemu/watchdog.h"
39 #include "trace.h"
41 //#define DEBUG_KVM
43 #ifdef DEBUG_KVM
44 #define DPRINTF(fmt, ...) \
45 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
46 #else
47 #define DPRINTF(fmt, ...) \
48 do { } while (0)
49 #endif
51 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
53 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
54 KVM_CAP_LAST_INFO
57 static int cap_interrupt_unset = false;
58 static int cap_interrupt_level = false;
59 static int cap_segstate;
60 static int cap_booke_sregs;
61 static int cap_ppc_smt;
62 static int cap_ppc_rma;
63 static int cap_spapr_tce;
64 static int cap_hior;
65 static int cap_one_reg;
66 static int cap_epr;
67 static int cap_ppc_watchdog;
68 static int cap_papr;
69 static int cap_htab_fd;
71 /* XXX We have a race condition where we actually have a level triggered
72 * interrupt, but the infrastructure can't expose that yet, so the guest
73 * takes but ignores it, goes to sleep and never gets notified that there's
74 * still an interrupt pending.
76 * As a quick workaround, let's just wake up again 20 ms after we injected
77 * an interrupt. That way we can assure that we're always reinjecting
78 * interrupts in case the guest swallowed them.
80 static QEMUTimer *idle_timer;
82 static void kvm_kick_cpu(void *opaque)
84 PowerPCCPU *cpu = opaque;
86 qemu_cpu_kick(CPU(cpu));
89 static int kvm_ppc_register_host_cpu_type(void);
91 int kvm_arch_init(KVMState *s)
93 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
94 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
95 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
96 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
97 cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
98 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
99 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
100 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
101 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
102 cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
103 cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
104 /* Note: we don't set cap_papr here, because this capability is
105 * only activated after this by kvmppc_set_papr() */
106 cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
108 if (!cap_interrupt_level) {
109 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
110 "VM to stall at times!\n");
113 kvm_ppc_register_host_cpu_type();
115 return 0;
118 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
120 CPUPPCState *cenv = &cpu->env;
121 CPUState *cs = CPU(cpu);
122 struct kvm_sregs sregs;
123 int ret;
125 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
126 /* What we're really trying to say is "if we're on BookE, we use
127 the native PVR for now". This is the only sane way to check
128 it though, so we potentially confuse users that they can run
129 BookE guests on BookS. Let's hope nobody dares enough :) */
130 return 0;
131 } else {
132 if (!cap_segstate) {
133 fprintf(stderr, "kvm error: missing PVR setting capability\n");
134 return -ENOSYS;
138 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
139 if (ret) {
140 return ret;
143 sregs.pvr = cenv->spr[SPR_PVR];
144 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
147 /* Set up a shared TLB array with KVM */
148 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
150 CPUPPCState *env = &cpu->env;
151 CPUState *cs = CPU(cpu);
152 struct kvm_book3e_206_tlb_params params = {};
153 struct kvm_config_tlb cfg = {};
154 struct kvm_enable_cap encap = {};
155 unsigned int entries = 0;
156 int ret, i;
158 if (!kvm_enabled() ||
159 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
160 return 0;
163 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
165 for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
166 params.tlb_sizes[i] = booke206_tlb_size(env, i);
167 params.tlb_ways[i] = booke206_tlb_ways(env, i);
168 entries += params.tlb_sizes[i];
171 assert(entries == env->nb_tlb);
172 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
174 env->tlb_dirty = true;
176 cfg.array = (uintptr_t)env->tlb.tlbm;
177 cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
178 cfg.params = (uintptr_t)&params;
179 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
181 encap.cap = KVM_CAP_SW_TLB;
182 encap.args[0] = (uintptr_t)&cfg;
184 ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &encap);
185 if (ret < 0) {
186 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
187 __func__, strerror(-ret));
188 return ret;
191 env->kvm_sw_tlb = true;
192 return 0;
196 #if defined(TARGET_PPC64)
197 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
198 struct kvm_ppc_smmu_info *info)
200 CPUPPCState *env = &cpu->env;
201 CPUState *cs = CPU(cpu);
203 memset(info, 0, sizeof(*info));
205 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
206 * need to "guess" what the supported page sizes are.
208 * For that to work we make a few assumptions:
210 * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
211 * KVM which only supports 4K and 16M pages, but supports them
212 * regardless of the backing store characteritics. We also don't
213 * support 1T segments.
215 * This is safe as if HV KVM ever supports that capability or PR
216 * KVM grows supports for more page/segment sizes, those versions
217 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
218 * will not hit this fallback
220 * - Else we are running HV KVM. This means we only support page
221 * sizes that fit in the backing store. Additionally we only
222 * advertize 64K pages if the processor is ARCH 2.06 and we assume
223 * P7 encodings for the SLB and hash table. Here too, we assume
224 * support for any newer processor will mean a kernel that
225 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
226 * this fallback.
228 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
229 /* No flags */
230 info->flags = 0;
231 info->slb_size = 64;
233 /* Standard 4k base page size segment */
234 info->sps[0].page_shift = 12;
235 info->sps[0].slb_enc = 0;
236 info->sps[0].enc[0].page_shift = 12;
237 info->sps[0].enc[0].pte_enc = 0;
239 /* Standard 16M large page size segment */
240 info->sps[1].page_shift = 24;
241 info->sps[1].slb_enc = SLB_VSID_L;
242 info->sps[1].enc[0].page_shift = 24;
243 info->sps[1].enc[0].pte_enc = 0;
244 } else {
245 int i = 0;
247 /* HV KVM has backing store size restrictions */
248 info->flags = KVM_PPC_PAGE_SIZES_REAL;
250 if (env->mmu_model & POWERPC_MMU_1TSEG) {
251 info->flags |= KVM_PPC_1T_SEGMENTS;
254 if (env->mmu_model == POWERPC_MMU_2_06) {
255 info->slb_size = 32;
256 } else {
257 info->slb_size = 64;
260 /* Standard 4k base page size segment */
261 info->sps[i].page_shift = 12;
262 info->sps[i].slb_enc = 0;
263 info->sps[i].enc[0].page_shift = 12;
264 info->sps[i].enc[0].pte_enc = 0;
265 i++;
267 /* 64K on MMU 2.06 */
268 if (env->mmu_model == POWERPC_MMU_2_06) {
269 info->sps[i].page_shift = 16;
270 info->sps[i].slb_enc = 0x110;
271 info->sps[i].enc[0].page_shift = 16;
272 info->sps[i].enc[0].pte_enc = 1;
273 i++;
276 /* Standard 16M large page size segment */
277 info->sps[i].page_shift = 24;
278 info->sps[i].slb_enc = SLB_VSID_L;
279 info->sps[i].enc[0].page_shift = 24;
280 info->sps[i].enc[0].pte_enc = 0;
284 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
286 CPUState *cs = CPU(cpu);
287 int ret;
289 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
290 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
291 if (ret == 0) {
292 return;
296 kvm_get_fallback_smmu_info(cpu, info);
299 static long getrampagesize(void)
301 struct statfs fs;
302 int ret;
304 if (!mem_path) {
305 /* guest RAM is backed by normal anonymous pages */
306 return getpagesize();
309 do {
310 ret = statfs(mem_path, &fs);
311 } while (ret != 0 && errno == EINTR);
313 if (ret != 0) {
314 fprintf(stderr, "Couldn't statfs() memory path: %s\n",
315 strerror(errno));
316 exit(1);
319 #define HUGETLBFS_MAGIC 0x958458f6
321 if (fs.f_type != HUGETLBFS_MAGIC) {
322 /* Explicit mempath, but it's ordinary pages */
323 return getpagesize();
326 /* It's hugepage, return the huge page size */
327 return fs.f_bsize;
330 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
332 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
333 return true;
336 return (1ul << shift) <= rampgsize;
339 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
341 static struct kvm_ppc_smmu_info smmu_info;
342 static bool has_smmu_info;
343 CPUPPCState *env = &cpu->env;
344 long rampagesize;
345 int iq, ik, jq, jk;
347 /* We only handle page sizes for 64-bit server guests for now */
348 if (!(env->mmu_model & POWERPC_MMU_64)) {
349 return;
352 /* Collect MMU info from kernel if not already */
353 if (!has_smmu_info) {
354 kvm_get_smmu_info(cpu, &smmu_info);
355 has_smmu_info = true;
358 rampagesize = getrampagesize();
360 /* Convert to QEMU form */
361 memset(&env->sps, 0, sizeof(env->sps));
363 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
364 struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
365 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
367 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
368 ksps->page_shift)) {
369 continue;
371 qsps->page_shift = ksps->page_shift;
372 qsps->slb_enc = ksps->slb_enc;
373 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
374 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
375 ksps->enc[jk].page_shift)) {
376 continue;
378 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
379 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
380 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
381 break;
384 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
385 break;
388 env->slb_nr = smmu_info.slb_size;
389 if (smmu_info.flags & KVM_PPC_1T_SEGMENTS) {
390 env->mmu_model |= POWERPC_MMU_1TSEG;
391 } else {
392 env->mmu_model &= ~POWERPC_MMU_1TSEG;
395 #else /* defined (TARGET_PPC64) */
397 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
401 #endif /* !defined (TARGET_PPC64) */
403 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
405 return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
408 int kvm_arch_init_vcpu(CPUState *cs)
410 PowerPCCPU *cpu = POWERPC_CPU(cs);
411 CPUPPCState *cenv = &cpu->env;
412 int ret;
414 /* Gather server mmu info from KVM and update the CPU state */
415 kvm_fixup_page_sizes(cpu);
417 /* Synchronize sregs with kvm */
418 ret = kvm_arch_sync_sregs(cpu);
419 if (ret) {
420 return ret;
423 idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
425 /* Some targets support access to KVM's guest TLB. */
426 switch (cenv->mmu_model) {
427 case POWERPC_MMU_BOOKE206:
428 ret = kvm_booke206_tlb_init(cpu);
429 break;
430 default:
431 break;
434 return ret;
437 void kvm_arch_reset_vcpu(CPUState *cpu)
441 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
443 CPUPPCState *env = &cpu->env;
444 CPUState *cs = CPU(cpu);
445 struct kvm_dirty_tlb dirty_tlb;
446 unsigned char *bitmap;
447 int ret;
449 if (!env->kvm_sw_tlb) {
450 return;
453 bitmap = g_malloc((env->nb_tlb + 7) / 8);
454 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
456 dirty_tlb.bitmap = (uintptr_t)bitmap;
457 dirty_tlb.num_dirty = env->nb_tlb;
459 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
460 if (ret) {
461 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
462 __func__, strerror(-ret));
465 g_free(bitmap);
468 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
470 PowerPCCPU *cpu = POWERPC_CPU(cs);
471 CPUPPCState *env = &cpu->env;
472 union {
473 uint32_t u32;
474 uint64_t u64;
475 } val;
476 struct kvm_one_reg reg = {
477 .id = id,
478 .addr = (uintptr_t) &val,
480 int ret;
482 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
483 if (ret != 0) {
484 trace_kvm_failed_spr_get(spr, strerror(errno));
485 } else {
486 switch (id & KVM_REG_SIZE_MASK) {
487 case KVM_REG_SIZE_U32:
488 env->spr[spr] = val.u32;
489 break;
491 case KVM_REG_SIZE_U64:
492 env->spr[spr] = val.u64;
493 break;
495 default:
496 /* Don't handle this size yet */
497 abort();
502 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
504 PowerPCCPU *cpu = POWERPC_CPU(cs);
505 CPUPPCState *env = &cpu->env;
506 union {
507 uint32_t u32;
508 uint64_t u64;
509 } val;
510 struct kvm_one_reg reg = {
511 .id = id,
512 .addr = (uintptr_t) &val,
514 int ret;
516 switch (id & KVM_REG_SIZE_MASK) {
517 case KVM_REG_SIZE_U32:
518 val.u32 = env->spr[spr];
519 break;
521 case KVM_REG_SIZE_U64:
522 val.u64 = env->spr[spr];
523 break;
525 default:
526 /* Don't handle this size yet */
527 abort();
530 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
531 if (ret != 0) {
532 trace_kvm_failed_spr_set(spr, strerror(errno));
536 static int kvm_put_fp(CPUState *cs)
538 PowerPCCPU *cpu = POWERPC_CPU(cs);
539 CPUPPCState *env = &cpu->env;
540 struct kvm_one_reg reg;
541 int i;
542 int ret;
544 if (env->insns_flags & PPC_FLOAT) {
545 uint64_t fpscr = env->fpscr;
546 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
548 reg.id = KVM_REG_PPC_FPSCR;
549 reg.addr = (uintptr_t)&fpscr;
550 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
551 if (ret < 0) {
552 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
553 return ret;
556 for (i = 0; i < 32; i++) {
557 uint64_t vsr[2];
559 vsr[0] = float64_val(env->fpr[i]);
560 vsr[1] = env->vsr[i];
561 reg.addr = (uintptr_t) &vsr;
562 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
564 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
565 if (ret < 0) {
566 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
567 i, strerror(errno));
568 return ret;
573 if (env->insns_flags & PPC_ALTIVEC) {
574 reg.id = KVM_REG_PPC_VSCR;
575 reg.addr = (uintptr_t)&env->vscr;
576 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
577 if (ret < 0) {
578 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
579 return ret;
582 for (i = 0; i < 32; i++) {
583 reg.id = KVM_REG_PPC_VR(i);
584 reg.addr = (uintptr_t)&env->avr[i];
585 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
586 if (ret < 0) {
587 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
588 return ret;
593 return 0;
596 static int kvm_get_fp(CPUState *cs)
598 PowerPCCPU *cpu = POWERPC_CPU(cs);
599 CPUPPCState *env = &cpu->env;
600 struct kvm_one_reg reg;
601 int i;
602 int ret;
604 if (env->insns_flags & PPC_FLOAT) {
605 uint64_t fpscr;
606 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
608 reg.id = KVM_REG_PPC_FPSCR;
609 reg.addr = (uintptr_t)&fpscr;
610 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
611 if (ret < 0) {
612 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
613 return ret;
614 } else {
615 env->fpscr = fpscr;
618 for (i = 0; i < 32; i++) {
619 uint64_t vsr[2];
621 reg.addr = (uintptr_t) &vsr;
622 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
624 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
625 if (ret < 0) {
626 DPRINTF("Unable to get %s%d from KVM: %s\n",
627 vsx ? "VSR" : "FPR", i, strerror(errno));
628 return ret;
629 } else {
630 env->fpr[i] = vsr[0];
631 if (vsx) {
632 env->vsr[i] = vsr[1];
638 if (env->insns_flags & PPC_ALTIVEC) {
639 reg.id = KVM_REG_PPC_VSCR;
640 reg.addr = (uintptr_t)&env->vscr;
641 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
642 if (ret < 0) {
643 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
644 return ret;
647 for (i = 0; i < 32; i++) {
648 reg.id = KVM_REG_PPC_VR(i);
649 reg.addr = (uintptr_t)&env->avr[i];
650 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
651 if (ret < 0) {
652 DPRINTF("Unable to get VR%d from KVM: %s\n",
653 i, strerror(errno));
654 return ret;
659 return 0;
662 #if defined(TARGET_PPC64)
663 static int kvm_get_vpa(CPUState *cs)
665 PowerPCCPU *cpu = POWERPC_CPU(cs);
666 CPUPPCState *env = &cpu->env;
667 struct kvm_one_reg reg;
668 int ret;
670 reg.id = KVM_REG_PPC_VPA_ADDR;
671 reg.addr = (uintptr_t)&env->vpa_addr;
672 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
673 if (ret < 0) {
674 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
675 return ret;
678 assert((uintptr_t)&env->slb_shadow_size
679 == ((uintptr_t)&env->slb_shadow_addr + 8));
680 reg.id = KVM_REG_PPC_VPA_SLB;
681 reg.addr = (uintptr_t)&env->slb_shadow_addr;
682 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
683 if (ret < 0) {
684 DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
685 strerror(errno));
686 return ret;
689 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
690 reg.id = KVM_REG_PPC_VPA_DTL;
691 reg.addr = (uintptr_t)&env->dtl_addr;
692 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
693 if (ret < 0) {
694 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
695 strerror(errno));
696 return ret;
699 return 0;
702 static int kvm_put_vpa(CPUState *cs)
704 PowerPCCPU *cpu = POWERPC_CPU(cs);
705 CPUPPCState *env = &cpu->env;
706 struct kvm_one_reg reg;
707 int ret;
709 /* SLB shadow or DTL can't be registered unless a master VPA is
710 * registered. That means when restoring state, if a VPA *is*
711 * registered, we need to set that up first. If not, we need to
712 * deregister the others before deregistering the master VPA */
713 assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
715 if (env->vpa_addr) {
716 reg.id = KVM_REG_PPC_VPA_ADDR;
717 reg.addr = (uintptr_t)&env->vpa_addr;
718 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
719 if (ret < 0) {
720 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
721 return ret;
725 assert((uintptr_t)&env->slb_shadow_size
726 == ((uintptr_t)&env->slb_shadow_addr + 8));
727 reg.id = KVM_REG_PPC_VPA_SLB;
728 reg.addr = (uintptr_t)&env->slb_shadow_addr;
729 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
730 if (ret < 0) {
731 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
732 return ret;
735 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
736 reg.id = KVM_REG_PPC_VPA_DTL;
737 reg.addr = (uintptr_t)&env->dtl_addr;
738 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
739 if (ret < 0) {
740 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
741 strerror(errno));
742 return ret;
745 if (!env->vpa_addr) {
746 reg.id = KVM_REG_PPC_VPA_ADDR;
747 reg.addr = (uintptr_t)&env->vpa_addr;
748 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
749 if (ret < 0) {
750 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
751 return ret;
755 return 0;
757 #endif /* TARGET_PPC64 */
759 int kvm_arch_put_registers(CPUState *cs, int level)
761 PowerPCCPU *cpu = POWERPC_CPU(cs);
762 CPUPPCState *env = &cpu->env;
763 struct kvm_regs regs;
764 int ret;
765 int i;
767 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
768 if (ret < 0) {
769 return ret;
772 regs.ctr = env->ctr;
773 regs.lr = env->lr;
774 regs.xer = cpu_read_xer(env);
775 regs.msr = env->msr;
776 regs.pc = env->nip;
778 regs.srr0 = env->spr[SPR_SRR0];
779 regs.srr1 = env->spr[SPR_SRR1];
781 regs.sprg0 = env->spr[SPR_SPRG0];
782 regs.sprg1 = env->spr[SPR_SPRG1];
783 regs.sprg2 = env->spr[SPR_SPRG2];
784 regs.sprg3 = env->spr[SPR_SPRG3];
785 regs.sprg4 = env->spr[SPR_SPRG4];
786 regs.sprg5 = env->spr[SPR_SPRG5];
787 regs.sprg6 = env->spr[SPR_SPRG6];
788 regs.sprg7 = env->spr[SPR_SPRG7];
790 regs.pid = env->spr[SPR_BOOKE_PID];
792 for (i = 0;i < 32; i++)
793 regs.gpr[i] = env->gpr[i];
795 regs.cr = 0;
796 for (i = 0; i < 8; i++) {
797 regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
800 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
801 if (ret < 0)
802 return ret;
804 kvm_put_fp(cs);
806 if (env->tlb_dirty) {
807 kvm_sw_tlb_put(cpu);
808 env->tlb_dirty = false;
811 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
812 struct kvm_sregs sregs;
814 sregs.pvr = env->spr[SPR_PVR];
816 sregs.u.s.sdr1 = env->spr[SPR_SDR1];
818 /* Sync SLB */
819 #ifdef TARGET_PPC64
820 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
821 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
822 if (env->slb[i].esid & SLB_ESID_V) {
823 sregs.u.s.ppc64.slb[i].slbe |= i;
825 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
827 #endif
829 /* Sync SRs */
830 for (i = 0; i < 16; i++) {
831 sregs.u.s.ppc32.sr[i] = env->sr[i];
834 /* Sync BATs */
835 for (i = 0; i < 8; i++) {
836 /* Beware. We have to swap upper and lower bits here */
837 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
838 | env->DBAT[1][i];
839 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
840 | env->IBAT[1][i];
843 ret = kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
844 if (ret) {
845 return ret;
849 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
850 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
853 if (cap_one_reg) {
854 int i;
856 /* We deliberately ignore errors here, for kernels which have
857 * the ONE_REG calls, but don't support the specific
858 * registers, there's a reasonable chance things will still
859 * work, at least until we try to migrate. */
860 for (i = 0; i < 1024; i++) {
861 uint64_t id = env->spr_cb[i].one_reg_id;
863 if (id != 0) {
864 kvm_put_one_spr(cs, id, i);
868 #ifdef TARGET_PPC64
869 if (cap_papr) {
870 if (kvm_put_vpa(cs) < 0) {
871 DPRINTF("Warning: Unable to set VPA information to KVM\n");
874 #endif /* TARGET_PPC64 */
877 return ret;
880 int kvm_arch_get_registers(CPUState *cs)
882 PowerPCCPU *cpu = POWERPC_CPU(cs);
883 CPUPPCState *env = &cpu->env;
884 struct kvm_regs regs;
885 struct kvm_sregs sregs;
886 uint32_t cr;
887 int i, ret;
889 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
890 if (ret < 0)
891 return ret;
893 cr = regs.cr;
894 for (i = 7; i >= 0; i--) {
895 env->crf[i] = cr & 15;
896 cr >>= 4;
899 env->ctr = regs.ctr;
900 env->lr = regs.lr;
901 cpu_write_xer(env, regs.xer);
902 env->msr = regs.msr;
903 env->nip = regs.pc;
905 env->spr[SPR_SRR0] = regs.srr0;
906 env->spr[SPR_SRR1] = regs.srr1;
908 env->spr[SPR_SPRG0] = regs.sprg0;
909 env->spr[SPR_SPRG1] = regs.sprg1;
910 env->spr[SPR_SPRG2] = regs.sprg2;
911 env->spr[SPR_SPRG3] = regs.sprg3;
912 env->spr[SPR_SPRG4] = regs.sprg4;
913 env->spr[SPR_SPRG5] = regs.sprg5;
914 env->spr[SPR_SPRG6] = regs.sprg6;
915 env->spr[SPR_SPRG7] = regs.sprg7;
917 env->spr[SPR_BOOKE_PID] = regs.pid;
919 for (i = 0;i < 32; i++)
920 env->gpr[i] = regs.gpr[i];
922 kvm_get_fp(cs);
924 if (cap_booke_sregs) {
925 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
926 if (ret < 0) {
927 return ret;
930 if (sregs.u.e.features & KVM_SREGS_E_BASE) {
931 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
932 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
933 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
934 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
935 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
936 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
937 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
938 env->spr[SPR_DECR] = sregs.u.e.dec;
939 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
940 env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
941 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
944 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
945 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
946 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
947 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
948 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
949 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
952 if (sregs.u.e.features & KVM_SREGS_E_64) {
953 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
956 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
957 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
960 if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
961 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
962 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
963 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
964 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
965 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
966 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
967 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
968 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
969 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
970 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
971 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
972 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
973 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
974 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
975 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
976 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
978 if (sregs.u.e.features & KVM_SREGS_E_SPE) {
979 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
980 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
981 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
984 if (sregs.u.e.features & KVM_SREGS_E_PM) {
985 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
988 if (sregs.u.e.features & KVM_SREGS_E_PC) {
989 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
990 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
994 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
995 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
996 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
997 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
998 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
999 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1000 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1001 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1002 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1003 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1004 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1007 if (sregs.u.e.features & KVM_SREGS_EXP) {
1008 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1011 if (sregs.u.e.features & KVM_SREGS_E_PD) {
1012 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1013 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1016 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1017 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1018 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1019 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1021 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1022 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1023 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1028 if (cap_segstate) {
1029 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
1030 if (ret < 0) {
1031 return ret;
1034 if (!env->external_htab) {
1035 ppc_store_sdr1(env, sregs.u.s.sdr1);
1038 /* Sync SLB */
1039 #ifdef TARGET_PPC64
1041 * The packed SLB array we get from KVM_GET_SREGS only contains
1042 * information about valid entries. So we flush our internal
1043 * copy to get rid of stale ones, then put all valid SLB entries
1044 * back in.
1046 memset(env->slb, 0, sizeof(env->slb));
1047 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1048 target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1049 target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1051 * Only restore valid entries
1053 if (rb & SLB_ESID_V) {
1054 ppc_store_slb(env, rb, rs);
1057 #endif
1059 /* Sync SRs */
1060 for (i = 0; i < 16; i++) {
1061 env->sr[i] = sregs.u.s.ppc32.sr[i];
1064 /* Sync BATs */
1065 for (i = 0; i < 8; i++) {
1066 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1067 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1068 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1069 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1073 if (cap_hior) {
1074 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1077 if (cap_one_reg) {
1078 int i;
1080 /* We deliberately ignore errors here, for kernels which have
1081 * the ONE_REG calls, but don't support the specific
1082 * registers, there's a reasonable chance things will still
1083 * work, at least until we try to migrate. */
1084 for (i = 0; i < 1024; i++) {
1085 uint64_t id = env->spr_cb[i].one_reg_id;
1087 if (id != 0) {
1088 kvm_get_one_spr(cs, id, i);
1092 #ifdef TARGET_PPC64
1093 if (cap_papr) {
1094 if (kvm_get_vpa(cs) < 0) {
1095 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1098 #endif
1101 return 0;
1104 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1106 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1108 if (irq != PPC_INTERRUPT_EXT) {
1109 return 0;
1112 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1113 return 0;
1116 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1118 return 0;
1121 #if defined(TARGET_PPCEMB)
1122 #define PPC_INPUT_INT PPC40x_INPUT_INT
1123 #elif defined(TARGET_PPC64)
1124 #define PPC_INPUT_INT PPC970_INPUT_INT
1125 #else
1126 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1127 #endif
1129 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1131 PowerPCCPU *cpu = POWERPC_CPU(cs);
1132 CPUPPCState *env = &cpu->env;
1133 int r;
1134 unsigned irq;
1136 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1137 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1138 if (!cap_interrupt_level &&
1139 run->ready_for_interrupt_injection &&
1140 (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1141 (env->irq_input_state & (1<<PPC_INPUT_INT)))
1143 /* For now KVM disregards the 'irq' argument. However, in the
1144 * future KVM could cache it in-kernel to avoid a heavyweight exit
1145 * when reading the UIC.
1147 irq = KVM_INTERRUPT_SET;
1149 DPRINTF("injected interrupt %d\n", irq);
1150 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1151 if (r < 0) {
1152 printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1155 /* Always wake up soon in case the interrupt was level based */
1156 timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1157 (get_ticks_per_sec() / 50));
1160 /* We don't know if there are more interrupts pending after this. However,
1161 * the guest will return to userspace in the course of handling this one
1162 * anyways, so we will get a chance to deliver the rest. */
1165 void kvm_arch_post_run(CPUState *cpu, struct kvm_run *run)
1169 int kvm_arch_process_async_events(CPUState *cs)
1171 return cs->halted;
1174 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1176 CPUState *cs = CPU(cpu);
1177 CPUPPCState *env = &cpu->env;
1179 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1180 cs->halted = 1;
1181 cs->exception_index = EXCP_HLT;
1184 return 0;
1187 /* map dcr access to existing qemu dcr emulation */
1188 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1190 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1191 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1193 return 0;
1196 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1198 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1199 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1201 return 0;
1204 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1206 PowerPCCPU *cpu = POWERPC_CPU(cs);
1207 CPUPPCState *env = &cpu->env;
1208 int ret;
1210 switch (run->exit_reason) {
1211 case KVM_EXIT_DCR:
1212 if (run->dcr.is_write) {
1213 DPRINTF("handle dcr write\n");
1214 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1215 } else {
1216 DPRINTF("handle dcr read\n");
1217 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1219 break;
1220 case KVM_EXIT_HLT:
1221 DPRINTF("handle halt\n");
1222 ret = kvmppc_handle_halt(cpu);
1223 break;
1224 #if defined(TARGET_PPC64)
1225 case KVM_EXIT_PAPR_HCALL:
1226 DPRINTF("handle PAPR hypercall\n");
1227 run->papr_hcall.ret = spapr_hypercall(cpu,
1228 run->papr_hcall.nr,
1229 run->papr_hcall.args);
1230 ret = 0;
1231 break;
1232 #endif
1233 case KVM_EXIT_EPR:
1234 DPRINTF("handle epr\n");
1235 run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1236 ret = 0;
1237 break;
1238 case KVM_EXIT_WATCHDOG:
1239 DPRINTF("handle watchdog expiry\n");
1240 watchdog_perform_action();
1241 ret = 0;
1242 break;
1244 default:
1245 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1246 ret = -1;
1247 break;
1250 return ret;
1253 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1255 CPUState *cs = CPU(cpu);
1256 uint32_t bits = tsr_bits;
1257 struct kvm_one_reg reg = {
1258 .id = KVM_REG_PPC_OR_TSR,
1259 .addr = (uintptr_t) &bits,
1262 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1265 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1268 CPUState *cs = CPU(cpu);
1269 uint32_t bits = tsr_bits;
1270 struct kvm_one_reg reg = {
1271 .id = KVM_REG_PPC_CLEAR_TSR,
1272 .addr = (uintptr_t) &bits,
1275 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1278 int kvmppc_set_tcr(PowerPCCPU *cpu)
1280 CPUState *cs = CPU(cpu);
1281 CPUPPCState *env = &cpu->env;
1282 uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1284 struct kvm_one_reg reg = {
1285 .id = KVM_REG_PPC_TCR,
1286 .addr = (uintptr_t) &tcr,
1289 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1292 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1294 CPUState *cs = CPU(cpu);
1295 struct kvm_enable_cap encap = {};
1296 int ret;
1298 if (!kvm_enabled()) {
1299 return -1;
1302 if (!cap_ppc_watchdog) {
1303 printf("warning: KVM does not support watchdog");
1304 return -1;
1307 encap.cap = KVM_CAP_PPC_BOOKE_WATCHDOG;
1308 ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &encap);
1309 if (ret < 0) {
1310 fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1311 __func__, strerror(-ret));
1312 return ret;
1315 return ret;
1318 static int read_cpuinfo(const char *field, char *value, int len)
1320 FILE *f;
1321 int ret = -1;
1322 int field_len = strlen(field);
1323 char line[512];
1325 f = fopen("/proc/cpuinfo", "r");
1326 if (!f) {
1327 return -1;
1330 do {
1331 if(!fgets(line, sizeof(line), f)) {
1332 break;
1334 if (!strncmp(line, field, field_len)) {
1335 pstrcpy(value, len, line);
1336 ret = 0;
1337 break;
1339 } while(*line);
1341 fclose(f);
1343 return ret;
1346 uint32_t kvmppc_get_tbfreq(void)
1348 char line[512];
1349 char *ns;
1350 uint32_t retval = get_ticks_per_sec();
1352 if (read_cpuinfo("timebase", line, sizeof(line))) {
1353 return retval;
1356 if (!(ns = strchr(line, ':'))) {
1357 return retval;
1360 ns++;
1362 retval = atoi(ns);
1363 return retval;
1366 /* Try to find a device tree node for a CPU with clock-frequency property */
1367 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1369 struct dirent *dirp;
1370 DIR *dp;
1372 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1373 printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1374 return -1;
1377 buf[0] = '\0';
1378 while ((dirp = readdir(dp)) != NULL) {
1379 FILE *f;
1380 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1381 dirp->d_name);
1382 f = fopen(buf, "r");
1383 if (f) {
1384 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1385 fclose(f);
1386 break;
1388 buf[0] = '\0';
1390 closedir(dp);
1391 if (buf[0] == '\0') {
1392 printf("Unknown host!\n");
1393 return -1;
1396 return 0;
1399 /* Read a CPU node property from the host device tree that's a single
1400 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1401 * (can't find or open the property, or doesn't understand the
1402 * format) */
1403 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1405 char buf[PATH_MAX];
1406 union {
1407 uint32_t v32;
1408 uint64_t v64;
1409 } u;
1410 FILE *f;
1411 int len;
1413 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1414 return -1;
1417 strncat(buf, "/", sizeof(buf) - strlen(buf));
1418 strncat(buf, propname, sizeof(buf) - strlen(buf));
1420 f = fopen(buf, "rb");
1421 if (!f) {
1422 return -1;
1425 len = fread(&u, 1, sizeof(u), f);
1426 fclose(f);
1427 switch (len) {
1428 case 4:
1429 /* property is a 32-bit quantity */
1430 return be32_to_cpu(u.v32);
1431 case 8:
1432 return be64_to_cpu(u.v64);
1435 return 0;
1438 uint64_t kvmppc_get_clockfreq(void)
1440 return kvmppc_read_int_cpu_dt("clock-frequency");
1443 uint32_t kvmppc_get_vmx(void)
1445 return kvmppc_read_int_cpu_dt("ibm,vmx");
1448 uint32_t kvmppc_get_dfp(void)
1450 return kvmppc_read_int_cpu_dt("ibm,dfp");
1453 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1455 PowerPCCPU *cpu = ppc_env_get_cpu(env);
1456 CPUState *cs = CPU(cpu);
1458 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1459 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1460 return 0;
1463 return 1;
1466 int kvmppc_get_hasidle(CPUPPCState *env)
1468 struct kvm_ppc_pvinfo pvinfo;
1470 if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1471 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1472 return 1;
1475 return 0;
1478 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1480 uint32_t *hc = (uint32_t*)buf;
1481 struct kvm_ppc_pvinfo pvinfo;
1483 if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1484 memcpy(buf, pvinfo.hcall, buf_len);
1485 return 0;
1489 * Fallback to always fail hypercalls:
1491 * li r3, -1
1492 * nop
1493 * nop
1494 * nop
1497 hc[0] = 0x3860ffff;
1498 hc[1] = 0x60000000;
1499 hc[2] = 0x60000000;
1500 hc[3] = 0x60000000;
1502 return 0;
1505 void kvmppc_set_papr(PowerPCCPU *cpu)
1507 CPUState *cs = CPU(cpu);
1508 struct kvm_enable_cap cap = {};
1509 int ret;
1511 cap.cap = KVM_CAP_PPC_PAPR;
1512 ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &cap);
1514 if (ret) {
1515 cpu_abort(cs, "This KVM version does not support PAPR\n");
1518 /* Update the capability flag so we sync the right information
1519 * with kvm */
1520 cap_papr = 1;
1523 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
1525 CPUState *cs = CPU(cpu);
1526 struct kvm_enable_cap cap = {};
1527 int ret;
1529 cap.cap = KVM_CAP_PPC_EPR;
1530 cap.args[0] = mpic_proxy;
1531 ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &cap);
1533 if (ret && mpic_proxy) {
1534 cpu_abort(cs, "This KVM version does not support EPR\n");
1538 int kvmppc_smt_threads(void)
1540 return cap_ppc_smt ? cap_ppc_smt : 1;
1543 #ifdef TARGET_PPC64
1544 off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem)
1546 void *rma;
1547 off_t size;
1548 int fd;
1549 struct kvm_allocate_rma ret;
1550 MemoryRegion *rma_region;
1552 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
1553 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
1554 * not necessary on this hardware
1555 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
1557 * FIXME: We should allow the user to force contiguous RMA
1558 * allocation in the cap_ppc_rma==1 case.
1560 if (cap_ppc_rma < 2) {
1561 return 0;
1564 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
1565 if (fd < 0) {
1566 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
1567 strerror(errno));
1568 return -1;
1571 size = MIN(ret.rma_size, 256ul << 20);
1573 rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1574 if (rma == MAP_FAILED) {
1575 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
1576 return -1;
1579 rma_region = g_new(MemoryRegion, 1);
1580 memory_region_init_ram_ptr(rma_region, NULL, name, size, rma);
1581 vmstate_register_ram_global(rma_region);
1582 memory_region_add_subregion(sysmem, 0, rma_region);
1584 return size;
1587 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
1589 struct kvm_ppc_smmu_info info;
1590 long rampagesize, best_page_shift;
1591 int i;
1593 if (cap_ppc_rma >= 2) {
1594 return current_size;
1597 /* Find the largest hardware supported page size that's less than
1598 * or equal to the (logical) backing page size of guest RAM */
1599 kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
1600 rampagesize = getrampagesize();
1601 best_page_shift = 0;
1603 for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
1604 struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
1606 if (!sps->page_shift) {
1607 continue;
1610 if ((sps->page_shift > best_page_shift)
1611 && ((1UL << sps->page_shift) <= rampagesize)) {
1612 best_page_shift = sps->page_shift;
1616 return MIN(current_size,
1617 1ULL << (best_page_shift + hash_shift - 7));
1619 #endif
1621 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd)
1623 struct kvm_create_spapr_tce args = {
1624 .liobn = liobn,
1625 .window_size = window_size,
1627 long len;
1628 int fd;
1629 void *table;
1631 /* Must set fd to -1 so we don't try to munmap when called for
1632 * destroying the table, which the upper layers -will- do
1634 *pfd = -1;
1635 if (!cap_spapr_tce) {
1636 return NULL;
1639 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
1640 if (fd < 0) {
1641 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
1642 liobn);
1643 return NULL;
1646 len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
1647 /* FIXME: round this up to page size */
1649 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1650 if (table == MAP_FAILED) {
1651 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
1652 liobn);
1653 close(fd);
1654 return NULL;
1657 *pfd = fd;
1658 return table;
1661 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t window_size)
1663 long len;
1665 if (fd < 0) {
1666 return -1;
1669 len = (window_size / SPAPR_TCE_PAGE_SIZE)*sizeof(uint64_t);
1670 if ((munmap(table, len) < 0) ||
1671 (close(fd) < 0)) {
1672 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
1673 strerror(errno));
1674 /* Leak the table */
1677 return 0;
1680 int kvmppc_reset_htab(int shift_hint)
1682 uint32_t shift = shift_hint;
1684 if (!kvm_enabled()) {
1685 /* Full emulation, tell caller to allocate htab itself */
1686 return 0;
1688 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
1689 int ret;
1690 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
1691 if (ret == -ENOTTY) {
1692 /* At least some versions of PR KVM advertise the
1693 * capability, but don't implement the ioctl(). Oops.
1694 * Return 0 so that we allocate the htab in qemu, as is
1695 * correct for PR. */
1696 return 0;
1697 } else if (ret < 0) {
1698 return ret;
1700 return shift;
1703 /* We have a kernel that predates the htab reset calls. For PR
1704 * KVM, we need to allocate the htab ourselves, for an HV KVM of
1705 * this era, it has allocated a 16MB fixed size hash table
1706 * already. Kernels of this era have the GET_PVINFO capability
1707 * only on PR, so we use this hack to determine the right
1708 * answer */
1709 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
1710 /* PR - tell caller to allocate htab */
1711 return 0;
1712 } else {
1713 /* HV - assume 16MB kernel allocated htab */
1714 return 24;
1718 static inline uint32_t mfpvr(void)
1720 uint32_t pvr;
1722 asm ("mfpvr %0"
1723 : "=r"(pvr));
1724 return pvr;
1727 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
1729 if (on) {
1730 *word |= flags;
1731 } else {
1732 *word &= ~flags;
1736 static void kvmppc_host_cpu_initfn(Object *obj)
1738 assert(kvm_enabled());
1741 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
1743 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
1744 uint32_t vmx = kvmppc_get_vmx();
1745 uint32_t dfp = kvmppc_get_dfp();
1746 uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
1747 uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
1749 /* Now fix up the class with information we can query from the host */
1750 pcc->pvr = mfpvr();
1752 if (vmx != -1) {
1753 /* Only override when we know what the host supports */
1754 alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
1755 alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
1757 if (dfp != -1) {
1758 /* Only override when we know what the host supports */
1759 alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
1762 if (dcache_size != -1) {
1763 pcc->l1_dcache_size = dcache_size;
1766 if (icache_size != -1) {
1767 pcc->l1_icache_size = icache_size;
1771 bool kvmppc_has_cap_epr(void)
1773 return cap_epr;
1776 bool kvmppc_has_cap_htab_fd(void)
1778 return cap_htab_fd;
1781 static int kvm_ppc_register_host_cpu_type(void)
1783 TypeInfo type_info = {
1784 .name = TYPE_HOST_POWERPC_CPU,
1785 .instance_init = kvmppc_host_cpu_initfn,
1786 .class_init = kvmppc_host_cpu_class_init,
1788 uint32_t host_pvr = mfpvr();
1789 PowerPCCPUClass *pvr_pcc;
1791 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
1792 if (pvr_pcc == NULL) {
1793 pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
1795 if (pvr_pcc == NULL) {
1796 return -1;
1798 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
1799 type_register(&type_info);
1800 return 0;
1803 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
1805 struct kvm_rtas_token_args args = {
1806 .token = token,
1809 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
1810 return -ENOENT;
1813 strncpy(args.name, function, sizeof(args.name));
1815 return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
1818 int kvmppc_get_htab_fd(bool write)
1820 struct kvm_get_htab_fd s = {
1821 .flags = write ? KVM_GET_HTAB_WRITE : 0,
1822 .start_index = 0,
1825 if (!cap_htab_fd) {
1826 fprintf(stderr, "KVM version doesn't support saving the hash table\n");
1827 return -1;
1830 return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
1833 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
1835 int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
1836 uint8_t buf[bufsize];
1837 ssize_t rc;
1839 do {
1840 rc = read(fd, buf, bufsize);
1841 if (rc < 0) {
1842 fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
1843 strerror(errno));
1844 return rc;
1845 } else if (rc) {
1846 /* Kernel already retuns data in BE format for the file */
1847 qemu_put_buffer(f, buf, rc);
1849 } while ((rc != 0)
1850 && ((max_ns < 0)
1851 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
1853 return (rc == 0) ? 1 : 0;
1856 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
1857 uint16_t n_valid, uint16_t n_invalid)
1859 struct kvm_get_htab_header *buf;
1860 size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
1861 ssize_t rc;
1863 buf = alloca(chunksize);
1864 /* This is KVM on ppc, so this is all big-endian */
1865 buf->index = index;
1866 buf->n_valid = n_valid;
1867 buf->n_invalid = n_invalid;
1869 qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
1871 rc = write(fd, buf, chunksize);
1872 if (rc < 0) {
1873 fprintf(stderr, "Error writing KVM hash table: %s\n",
1874 strerror(errno));
1875 return rc;
1877 if (rc != chunksize) {
1878 /* We should never get a short write on a single chunk */
1879 fprintf(stderr, "Short write, restoring KVM hash table\n");
1880 return -1;
1882 return 0;
1885 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
1887 return true;
1890 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
1892 return 1;
1895 int kvm_arch_on_sigbus(int code, void *addr)
1897 return 1;
1900 void kvm_arch_init_irq_routing(KVMState *s)
1904 int kvm_arch_insert_sw_breakpoint(CPUState *cpu, struct kvm_sw_breakpoint *bp)
1906 return -EINVAL;
1909 int kvm_arch_remove_sw_breakpoint(CPUState *cpu, struct kvm_sw_breakpoint *bp)
1911 return -EINVAL;
1914 int kvm_arch_insert_hw_breakpoint(target_ulong addr, target_ulong len, int type)
1916 return -EINVAL;
1919 int kvm_arch_remove_hw_breakpoint(target_ulong addr, target_ulong len, int type)
1921 return -EINVAL;
1924 void kvm_arch_remove_all_hw_breakpoints(void)
1928 void kvm_arch_update_guest_debug(CPUState *cpu, struct kvm_guest_debug *dbg)
1932 struct kvm_get_htab_buf {
1933 struct kvm_get_htab_header header;
1935 * We require one extra byte for read
1937 target_ulong hpte[(HPTES_PER_GROUP * 2) + 1];
1940 uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index)
1942 int htab_fd;
1943 struct kvm_get_htab_fd ghf;
1944 struct kvm_get_htab_buf *hpte_buf;
1946 ghf.flags = 0;
1947 ghf.start_index = pte_index;
1948 htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
1949 if (htab_fd < 0) {
1950 goto error_out;
1953 hpte_buf = g_malloc0(sizeof(*hpte_buf));
1955 * Read the hpte group
1957 if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) {
1958 goto out_close;
1961 close(htab_fd);
1962 return (uint64_t)(uintptr_t) hpte_buf->hpte;
1964 out_close:
1965 g_free(hpte_buf);
1966 close(htab_fd);
1967 error_out:
1968 return 0;
1971 void kvmppc_hash64_free_pteg(uint64_t token)
1973 struct kvm_get_htab_buf *htab_buf;
1975 htab_buf = container_of((void *)(uintptr_t) token, struct kvm_get_htab_buf,
1976 hpte);
1977 g_free(htab_buf);
1978 return;
1981 void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
1982 target_ulong pte0, target_ulong pte1)
1984 int htab_fd;
1985 struct kvm_get_htab_fd ghf;
1986 struct kvm_get_htab_buf hpte_buf;
1988 ghf.flags = 0;
1989 ghf.start_index = 0; /* Ignored */
1990 htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
1991 if (htab_fd < 0) {
1992 goto error_out;
1995 hpte_buf.header.n_valid = 1;
1996 hpte_buf.header.n_invalid = 0;
1997 hpte_buf.header.index = pte_index;
1998 hpte_buf.hpte[0] = pte0;
1999 hpte_buf.hpte[1] = pte1;
2001 * Write the hpte entry.
2002 * CAUTION: write() has the warn_unused_result attribute. Hence we
2003 * need to check the return value, even though we do nothing.
2005 if (write(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) {
2006 goto out_close;
2009 out_close:
2010 close(htab_fd);
2011 return;
2013 error_out:
2014 return;