qdev: Fix crash by validating the object type
[qemu/cris-port.git] / target-ppc / kvm.c
blob4b81e5f253277f1a39fae7339d038d3c5b546e60
1 /*
2 * PowerPC implementation of KVM hooks
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
7 * Authors:
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
17 #include <dirent.h>
18 #include <sys/types.h>
19 #include <sys/ioctl.h>
20 #include <sys/mman.h>
21 #include <sys/vfs.h>
23 #include <linux/kvm.h>
25 #include "qemu-common.h"
26 #include "qemu/timer.h"
27 #include "sysemu/sysemu.h"
28 #include "sysemu/kvm.h"
29 #include "kvm_ppc.h"
30 #include "cpu.h"
31 #include "sysemu/cpus.h"
32 #include "sysemu/device_tree.h"
33 #include "mmu-hash64.h"
35 #include "hw/sysbus.h"
36 #include "hw/ppc/spapr.h"
37 #include "hw/ppc/spapr_vio.h"
38 #include "sysemu/watchdog.h"
39 #include "trace.h"
41 //#define DEBUG_KVM
43 #ifdef DEBUG_KVM
44 #define DPRINTF(fmt, ...) \
45 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
46 #else
47 #define DPRINTF(fmt, ...) \
48 do { } while (0)
49 #endif
51 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
53 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
54 KVM_CAP_LAST_INFO
57 static int cap_interrupt_unset = false;
58 static int cap_interrupt_level = false;
59 static int cap_segstate;
60 static int cap_booke_sregs;
61 static int cap_ppc_smt;
62 static int cap_ppc_rma;
63 static int cap_spapr_tce;
64 static int cap_hior;
65 static int cap_one_reg;
66 static int cap_epr;
67 static int cap_ppc_watchdog;
68 static int cap_papr;
69 static int cap_htab_fd;
71 /* XXX We have a race condition where we actually have a level triggered
72 * interrupt, but the infrastructure can't expose that yet, so the guest
73 * takes but ignores it, goes to sleep and never gets notified that there's
74 * still an interrupt pending.
76 * As a quick workaround, let's just wake up again 20 ms after we injected
77 * an interrupt. That way we can assure that we're always reinjecting
78 * interrupts in case the guest swallowed them.
80 static QEMUTimer *idle_timer;
82 static void kvm_kick_cpu(void *opaque)
84 PowerPCCPU *cpu = opaque;
86 qemu_cpu_kick(CPU(cpu));
89 static int kvm_ppc_register_host_cpu_type(void);
91 int kvm_arch_init(KVMState *s)
93 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
94 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
95 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
96 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
97 cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
98 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
99 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
100 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
101 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
102 cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
103 cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
104 /* Note: we don't set cap_papr here, because this capability is
105 * only activated after this by kvmppc_set_papr() */
106 cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
108 if (!cap_interrupt_level) {
109 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
110 "VM to stall at times!\n");
113 kvm_ppc_register_host_cpu_type();
115 return 0;
118 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
120 CPUPPCState *cenv = &cpu->env;
121 CPUState *cs = CPU(cpu);
122 struct kvm_sregs sregs;
123 int ret;
125 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
126 /* What we're really trying to say is "if we're on BookE, we use
127 the native PVR for now". This is the only sane way to check
128 it though, so we potentially confuse users that they can run
129 BookE guests on BookS. Let's hope nobody dares enough :) */
130 return 0;
131 } else {
132 if (!cap_segstate) {
133 fprintf(stderr, "kvm error: missing PVR setting capability\n");
134 return -ENOSYS;
138 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
139 if (ret) {
140 return ret;
143 sregs.pvr = cenv->spr[SPR_PVR];
144 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
147 /* Set up a shared TLB array with KVM */
148 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
150 CPUPPCState *env = &cpu->env;
151 CPUState *cs = CPU(cpu);
152 struct kvm_book3e_206_tlb_params params = {};
153 struct kvm_config_tlb cfg = {};
154 unsigned int entries = 0;
155 int ret, i;
157 if (!kvm_enabled() ||
158 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
159 return 0;
162 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
164 for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
165 params.tlb_sizes[i] = booke206_tlb_size(env, i);
166 params.tlb_ways[i] = booke206_tlb_ways(env, i);
167 entries += params.tlb_sizes[i];
170 assert(entries == env->nb_tlb);
171 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
173 env->tlb_dirty = true;
175 cfg.array = (uintptr_t)env->tlb.tlbm;
176 cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
177 cfg.params = (uintptr_t)&params;
178 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
180 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
181 if (ret < 0) {
182 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
183 __func__, strerror(-ret));
184 return ret;
187 env->kvm_sw_tlb = true;
188 return 0;
192 #if defined(TARGET_PPC64)
193 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
194 struct kvm_ppc_smmu_info *info)
196 CPUPPCState *env = &cpu->env;
197 CPUState *cs = CPU(cpu);
199 memset(info, 0, sizeof(*info));
201 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
202 * need to "guess" what the supported page sizes are.
204 * For that to work we make a few assumptions:
206 * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
207 * KVM which only supports 4K and 16M pages, but supports them
208 * regardless of the backing store characteritics. We also don't
209 * support 1T segments.
211 * This is safe as if HV KVM ever supports that capability or PR
212 * KVM grows supports for more page/segment sizes, those versions
213 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
214 * will not hit this fallback
216 * - Else we are running HV KVM. This means we only support page
217 * sizes that fit in the backing store. Additionally we only
218 * advertize 64K pages if the processor is ARCH 2.06 and we assume
219 * P7 encodings for the SLB and hash table. Here too, we assume
220 * support for any newer processor will mean a kernel that
221 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
222 * this fallback.
224 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
225 /* No flags */
226 info->flags = 0;
227 info->slb_size = 64;
229 /* Standard 4k base page size segment */
230 info->sps[0].page_shift = 12;
231 info->sps[0].slb_enc = 0;
232 info->sps[0].enc[0].page_shift = 12;
233 info->sps[0].enc[0].pte_enc = 0;
235 /* Standard 16M large page size segment */
236 info->sps[1].page_shift = 24;
237 info->sps[1].slb_enc = SLB_VSID_L;
238 info->sps[1].enc[0].page_shift = 24;
239 info->sps[1].enc[0].pte_enc = 0;
240 } else {
241 int i = 0;
243 /* HV KVM has backing store size restrictions */
244 info->flags = KVM_PPC_PAGE_SIZES_REAL;
246 if (env->mmu_model & POWERPC_MMU_1TSEG) {
247 info->flags |= KVM_PPC_1T_SEGMENTS;
250 if (env->mmu_model == POWERPC_MMU_2_06) {
251 info->slb_size = 32;
252 } else {
253 info->slb_size = 64;
256 /* Standard 4k base page size segment */
257 info->sps[i].page_shift = 12;
258 info->sps[i].slb_enc = 0;
259 info->sps[i].enc[0].page_shift = 12;
260 info->sps[i].enc[0].pte_enc = 0;
261 i++;
263 /* 64K on MMU 2.06 */
264 if (env->mmu_model == POWERPC_MMU_2_06) {
265 info->sps[i].page_shift = 16;
266 info->sps[i].slb_enc = 0x110;
267 info->sps[i].enc[0].page_shift = 16;
268 info->sps[i].enc[0].pte_enc = 1;
269 i++;
272 /* Standard 16M large page size segment */
273 info->sps[i].page_shift = 24;
274 info->sps[i].slb_enc = SLB_VSID_L;
275 info->sps[i].enc[0].page_shift = 24;
276 info->sps[i].enc[0].pte_enc = 0;
280 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
282 CPUState *cs = CPU(cpu);
283 int ret;
285 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
286 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
287 if (ret == 0) {
288 return;
292 kvm_get_fallback_smmu_info(cpu, info);
295 static long getrampagesize(void)
297 struct statfs fs;
298 int ret;
300 if (!mem_path) {
301 /* guest RAM is backed by normal anonymous pages */
302 return getpagesize();
305 do {
306 ret = statfs(mem_path, &fs);
307 } while (ret != 0 && errno == EINTR);
309 if (ret != 0) {
310 fprintf(stderr, "Couldn't statfs() memory path: %s\n",
311 strerror(errno));
312 exit(1);
315 #define HUGETLBFS_MAGIC 0x958458f6
317 if (fs.f_type != HUGETLBFS_MAGIC) {
318 /* Explicit mempath, but it's ordinary pages */
319 return getpagesize();
322 /* It's hugepage, return the huge page size */
323 return fs.f_bsize;
326 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
328 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
329 return true;
332 return (1ul << shift) <= rampgsize;
335 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
337 static struct kvm_ppc_smmu_info smmu_info;
338 static bool has_smmu_info;
339 CPUPPCState *env = &cpu->env;
340 long rampagesize;
341 int iq, ik, jq, jk;
343 /* We only handle page sizes for 64-bit server guests for now */
344 if (!(env->mmu_model & POWERPC_MMU_64)) {
345 return;
348 /* Collect MMU info from kernel if not already */
349 if (!has_smmu_info) {
350 kvm_get_smmu_info(cpu, &smmu_info);
351 has_smmu_info = true;
354 rampagesize = getrampagesize();
356 /* Convert to QEMU form */
357 memset(&env->sps, 0, sizeof(env->sps));
359 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
360 struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
361 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
363 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
364 ksps->page_shift)) {
365 continue;
367 qsps->page_shift = ksps->page_shift;
368 qsps->slb_enc = ksps->slb_enc;
369 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
370 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
371 ksps->enc[jk].page_shift)) {
372 continue;
374 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
375 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
376 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
377 break;
380 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
381 break;
384 env->slb_nr = smmu_info.slb_size;
385 if (smmu_info.flags & KVM_PPC_1T_SEGMENTS) {
386 env->mmu_model |= POWERPC_MMU_1TSEG;
387 } else {
388 env->mmu_model &= ~POWERPC_MMU_1TSEG;
391 #else /* defined (TARGET_PPC64) */
393 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
397 #endif /* !defined (TARGET_PPC64) */
399 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
401 return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
404 int kvm_arch_init_vcpu(CPUState *cs)
406 PowerPCCPU *cpu = POWERPC_CPU(cs);
407 CPUPPCState *cenv = &cpu->env;
408 int ret;
410 /* Gather server mmu info from KVM and update the CPU state */
411 kvm_fixup_page_sizes(cpu);
413 /* Synchronize sregs with kvm */
414 ret = kvm_arch_sync_sregs(cpu);
415 if (ret) {
416 return ret;
419 idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
421 /* Some targets support access to KVM's guest TLB. */
422 switch (cenv->mmu_model) {
423 case POWERPC_MMU_BOOKE206:
424 ret = kvm_booke206_tlb_init(cpu);
425 break;
426 default:
427 break;
430 return ret;
433 void kvm_arch_reset_vcpu(CPUState *cpu)
437 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
439 CPUPPCState *env = &cpu->env;
440 CPUState *cs = CPU(cpu);
441 struct kvm_dirty_tlb dirty_tlb;
442 unsigned char *bitmap;
443 int ret;
445 if (!env->kvm_sw_tlb) {
446 return;
449 bitmap = g_malloc((env->nb_tlb + 7) / 8);
450 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
452 dirty_tlb.bitmap = (uintptr_t)bitmap;
453 dirty_tlb.num_dirty = env->nb_tlb;
455 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
456 if (ret) {
457 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
458 __func__, strerror(-ret));
461 g_free(bitmap);
464 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
466 PowerPCCPU *cpu = POWERPC_CPU(cs);
467 CPUPPCState *env = &cpu->env;
468 union {
469 uint32_t u32;
470 uint64_t u64;
471 } val;
472 struct kvm_one_reg reg = {
473 .id = id,
474 .addr = (uintptr_t) &val,
476 int ret;
478 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
479 if (ret != 0) {
480 trace_kvm_failed_spr_get(spr, strerror(errno));
481 } else {
482 switch (id & KVM_REG_SIZE_MASK) {
483 case KVM_REG_SIZE_U32:
484 env->spr[spr] = val.u32;
485 break;
487 case KVM_REG_SIZE_U64:
488 env->spr[spr] = val.u64;
489 break;
491 default:
492 /* Don't handle this size yet */
493 abort();
498 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
500 PowerPCCPU *cpu = POWERPC_CPU(cs);
501 CPUPPCState *env = &cpu->env;
502 union {
503 uint32_t u32;
504 uint64_t u64;
505 } val;
506 struct kvm_one_reg reg = {
507 .id = id,
508 .addr = (uintptr_t) &val,
510 int ret;
512 switch (id & KVM_REG_SIZE_MASK) {
513 case KVM_REG_SIZE_U32:
514 val.u32 = env->spr[spr];
515 break;
517 case KVM_REG_SIZE_U64:
518 val.u64 = env->spr[spr];
519 break;
521 default:
522 /* Don't handle this size yet */
523 abort();
526 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
527 if (ret != 0) {
528 trace_kvm_failed_spr_set(spr, strerror(errno));
532 static int kvm_put_fp(CPUState *cs)
534 PowerPCCPU *cpu = POWERPC_CPU(cs);
535 CPUPPCState *env = &cpu->env;
536 struct kvm_one_reg reg;
537 int i;
538 int ret;
540 if (env->insns_flags & PPC_FLOAT) {
541 uint64_t fpscr = env->fpscr;
542 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
544 reg.id = KVM_REG_PPC_FPSCR;
545 reg.addr = (uintptr_t)&fpscr;
546 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
547 if (ret < 0) {
548 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
549 return ret;
552 for (i = 0; i < 32; i++) {
553 uint64_t vsr[2];
555 vsr[0] = float64_val(env->fpr[i]);
556 vsr[1] = env->vsr[i];
557 reg.addr = (uintptr_t) &vsr;
558 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
560 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
561 if (ret < 0) {
562 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
563 i, strerror(errno));
564 return ret;
569 if (env->insns_flags & PPC_ALTIVEC) {
570 reg.id = KVM_REG_PPC_VSCR;
571 reg.addr = (uintptr_t)&env->vscr;
572 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
573 if (ret < 0) {
574 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
575 return ret;
578 for (i = 0; i < 32; i++) {
579 reg.id = KVM_REG_PPC_VR(i);
580 reg.addr = (uintptr_t)&env->avr[i];
581 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
582 if (ret < 0) {
583 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
584 return ret;
589 return 0;
592 static int kvm_get_fp(CPUState *cs)
594 PowerPCCPU *cpu = POWERPC_CPU(cs);
595 CPUPPCState *env = &cpu->env;
596 struct kvm_one_reg reg;
597 int i;
598 int ret;
600 if (env->insns_flags & PPC_FLOAT) {
601 uint64_t fpscr;
602 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
604 reg.id = KVM_REG_PPC_FPSCR;
605 reg.addr = (uintptr_t)&fpscr;
606 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
607 if (ret < 0) {
608 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
609 return ret;
610 } else {
611 env->fpscr = fpscr;
614 for (i = 0; i < 32; i++) {
615 uint64_t vsr[2];
617 reg.addr = (uintptr_t) &vsr;
618 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
620 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
621 if (ret < 0) {
622 DPRINTF("Unable to get %s%d from KVM: %s\n",
623 vsx ? "VSR" : "FPR", i, strerror(errno));
624 return ret;
625 } else {
626 env->fpr[i] = vsr[0];
627 if (vsx) {
628 env->vsr[i] = vsr[1];
634 if (env->insns_flags & PPC_ALTIVEC) {
635 reg.id = KVM_REG_PPC_VSCR;
636 reg.addr = (uintptr_t)&env->vscr;
637 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
638 if (ret < 0) {
639 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
640 return ret;
643 for (i = 0; i < 32; i++) {
644 reg.id = KVM_REG_PPC_VR(i);
645 reg.addr = (uintptr_t)&env->avr[i];
646 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
647 if (ret < 0) {
648 DPRINTF("Unable to get VR%d from KVM: %s\n",
649 i, strerror(errno));
650 return ret;
655 return 0;
658 #if defined(TARGET_PPC64)
659 static int kvm_get_vpa(CPUState *cs)
661 PowerPCCPU *cpu = POWERPC_CPU(cs);
662 CPUPPCState *env = &cpu->env;
663 struct kvm_one_reg reg;
664 int ret;
666 reg.id = KVM_REG_PPC_VPA_ADDR;
667 reg.addr = (uintptr_t)&env->vpa_addr;
668 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
669 if (ret < 0) {
670 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
671 return ret;
674 assert((uintptr_t)&env->slb_shadow_size
675 == ((uintptr_t)&env->slb_shadow_addr + 8));
676 reg.id = KVM_REG_PPC_VPA_SLB;
677 reg.addr = (uintptr_t)&env->slb_shadow_addr;
678 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
679 if (ret < 0) {
680 DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
681 strerror(errno));
682 return ret;
685 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
686 reg.id = KVM_REG_PPC_VPA_DTL;
687 reg.addr = (uintptr_t)&env->dtl_addr;
688 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
689 if (ret < 0) {
690 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
691 strerror(errno));
692 return ret;
695 return 0;
698 static int kvm_put_vpa(CPUState *cs)
700 PowerPCCPU *cpu = POWERPC_CPU(cs);
701 CPUPPCState *env = &cpu->env;
702 struct kvm_one_reg reg;
703 int ret;
705 /* SLB shadow or DTL can't be registered unless a master VPA is
706 * registered. That means when restoring state, if a VPA *is*
707 * registered, we need to set that up first. If not, we need to
708 * deregister the others before deregistering the master VPA */
709 assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
711 if (env->vpa_addr) {
712 reg.id = KVM_REG_PPC_VPA_ADDR;
713 reg.addr = (uintptr_t)&env->vpa_addr;
714 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
715 if (ret < 0) {
716 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
717 return ret;
721 assert((uintptr_t)&env->slb_shadow_size
722 == ((uintptr_t)&env->slb_shadow_addr + 8));
723 reg.id = KVM_REG_PPC_VPA_SLB;
724 reg.addr = (uintptr_t)&env->slb_shadow_addr;
725 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
726 if (ret < 0) {
727 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
728 return ret;
731 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
732 reg.id = KVM_REG_PPC_VPA_DTL;
733 reg.addr = (uintptr_t)&env->dtl_addr;
734 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
735 if (ret < 0) {
736 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
737 strerror(errno));
738 return ret;
741 if (!env->vpa_addr) {
742 reg.id = KVM_REG_PPC_VPA_ADDR;
743 reg.addr = (uintptr_t)&env->vpa_addr;
744 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
745 if (ret < 0) {
746 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
747 return ret;
751 return 0;
753 #endif /* TARGET_PPC64 */
755 int kvm_arch_put_registers(CPUState *cs, int level)
757 PowerPCCPU *cpu = POWERPC_CPU(cs);
758 CPUPPCState *env = &cpu->env;
759 struct kvm_regs regs;
760 int ret;
761 int i;
763 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
764 if (ret < 0) {
765 return ret;
768 regs.ctr = env->ctr;
769 regs.lr = env->lr;
770 regs.xer = cpu_read_xer(env);
771 regs.msr = env->msr;
772 regs.pc = env->nip;
774 regs.srr0 = env->spr[SPR_SRR0];
775 regs.srr1 = env->spr[SPR_SRR1];
777 regs.sprg0 = env->spr[SPR_SPRG0];
778 regs.sprg1 = env->spr[SPR_SPRG1];
779 regs.sprg2 = env->spr[SPR_SPRG2];
780 regs.sprg3 = env->spr[SPR_SPRG3];
781 regs.sprg4 = env->spr[SPR_SPRG4];
782 regs.sprg5 = env->spr[SPR_SPRG5];
783 regs.sprg6 = env->spr[SPR_SPRG6];
784 regs.sprg7 = env->spr[SPR_SPRG7];
786 regs.pid = env->spr[SPR_BOOKE_PID];
788 for (i = 0;i < 32; i++)
789 regs.gpr[i] = env->gpr[i];
791 regs.cr = 0;
792 for (i = 0; i < 8; i++) {
793 regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
796 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
797 if (ret < 0)
798 return ret;
800 kvm_put_fp(cs);
802 if (env->tlb_dirty) {
803 kvm_sw_tlb_put(cpu);
804 env->tlb_dirty = false;
807 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
808 struct kvm_sregs sregs;
810 sregs.pvr = env->spr[SPR_PVR];
812 sregs.u.s.sdr1 = env->spr[SPR_SDR1];
814 /* Sync SLB */
815 #ifdef TARGET_PPC64
816 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
817 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
818 if (env->slb[i].esid & SLB_ESID_V) {
819 sregs.u.s.ppc64.slb[i].slbe |= i;
821 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
823 #endif
825 /* Sync SRs */
826 for (i = 0; i < 16; i++) {
827 sregs.u.s.ppc32.sr[i] = env->sr[i];
830 /* Sync BATs */
831 for (i = 0; i < 8; i++) {
832 /* Beware. We have to swap upper and lower bits here */
833 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
834 | env->DBAT[1][i];
835 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
836 | env->IBAT[1][i];
839 ret = kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
840 if (ret) {
841 return ret;
845 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
846 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
849 if (cap_one_reg) {
850 int i;
852 /* We deliberately ignore errors here, for kernels which have
853 * the ONE_REG calls, but don't support the specific
854 * registers, there's a reasonable chance things will still
855 * work, at least until we try to migrate. */
856 for (i = 0; i < 1024; i++) {
857 uint64_t id = env->spr_cb[i].one_reg_id;
859 if (id != 0) {
860 kvm_put_one_spr(cs, id, i);
864 #ifdef TARGET_PPC64
865 if (cap_papr) {
866 if (kvm_put_vpa(cs) < 0) {
867 DPRINTF("Warning: Unable to set VPA information to KVM\n");
870 #endif /* TARGET_PPC64 */
873 return ret;
876 int kvm_arch_get_registers(CPUState *cs)
878 PowerPCCPU *cpu = POWERPC_CPU(cs);
879 CPUPPCState *env = &cpu->env;
880 struct kvm_regs regs;
881 struct kvm_sregs sregs;
882 uint32_t cr;
883 int i, ret;
885 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
886 if (ret < 0)
887 return ret;
889 cr = regs.cr;
890 for (i = 7; i >= 0; i--) {
891 env->crf[i] = cr & 15;
892 cr >>= 4;
895 env->ctr = regs.ctr;
896 env->lr = regs.lr;
897 cpu_write_xer(env, regs.xer);
898 env->msr = regs.msr;
899 env->nip = regs.pc;
901 env->spr[SPR_SRR0] = regs.srr0;
902 env->spr[SPR_SRR1] = regs.srr1;
904 env->spr[SPR_SPRG0] = regs.sprg0;
905 env->spr[SPR_SPRG1] = regs.sprg1;
906 env->spr[SPR_SPRG2] = regs.sprg2;
907 env->spr[SPR_SPRG3] = regs.sprg3;
908 env->spr[SPR_SPRG4] = regs.sprg4;
909 env->spr[SPR_SPRG5] = regs.sprg5;
910 env->spr[SPR_SPRG6] = regs.sprg6;
911 env->spr[SPR_SPRG7] = regs.sprg7;
913 env->spr[SPR_BOOKE_PID] = regs.pid;
915 for (i = 0;i < 32; i++)
916 env->gpr[i] = regs.gpr[i];
918 kvm_get_fp(cs);
920 if (cap_booke_sregs) {
921 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
922 if (ret < 0) {
923 return ret;
926 if (sregs.u.e.features & KVM_SREGS_E_BASE) {
927 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
928 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
929 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
930 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
931 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
932 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
933 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
934 env->spr[SPR_DECR] = sregs.u.e.dec;
935 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
936 env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
937 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
940 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
941 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
942 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
943 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
944 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
945 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
948 if (sregs.u.e.features & KVM_SREGS_E_64) {
949 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
952 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
953 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
956 if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
957 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
958 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
959 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
960 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
961 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
962 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
963 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
964 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
965 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
966 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
967 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
968 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
969 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
970 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
971 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
972 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
974 if (sregs.u.e.features & KVM_SREGS_E_SPE) {
975 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
976 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
977 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
980 if (sregs.u.e.features & KVM_SREGS_E_PM) {
981 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
984 if (sregs.u.e.features & KVM_SREGS_E_PC) {
985 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
986 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
990 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
991 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
992 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
993 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
994 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
995 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
996 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
997 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
998 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
999 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1000 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1003 if (sregs.u.e.features & KVM_SREGS_EXP) {
1004 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1007 if (sregs.u.e.features & KVM_SREGS_E_PD) {
1008 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1009 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1012 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1013 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1014 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1015 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1017 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1018 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1019 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1024 if (cap_segstate) {
1025 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
1026 if (ret < 0) {
1027 return ret;
1030 if (!env->external_htab) {
1031 ppc_store_sdr1(env, sregs.u.s.sdr1);
1034 /* Sync SLB */
1035 #ifdef TARGET_PPC64
1037 * The packed SLB array we get from KVM_GET_SREGS only contains
1038 * information about valid entries. So we flush our internal
1039 * copy to get rid of stale ones, then put all valid SLB entries
1040 * back in.
1042 memset(env->slb, 0, sizeof(env->slb));
1043 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1044 target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1045 target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1047 * Only restore valid entries
1049 if (rb & SLB_ESID_V) {
1050 ppc_store_slb(env, rb, rs);
1053 #endif
1055 /* Sync SRs */
1056 for (i = 0; i < 16; i++) {
1057 env->sr[i] = sregs.u.s.ppc32.sr[i];
1060 /* Sync BATs */
1061 for (i = 0; i < 8; i++) {
1062 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1063 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1064 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1065 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1069 if (cap_hior) {
1070 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1073 if (cap_one_reg) {
1074 int i;
1076 /* We deliberately ignore errors here, for kernels which have
1077 * the ONE_REG calls, but don't support the specific
1078 * registers, there's a reasonable chance things will still
1079 * work, at least until we try to migrate. */
1080 for (i = 0; i < 1024; i++) {
1081 uint64_t id = env->spr_cb[i].one_reg_id;
1083 if (id != 0) {
1084 kvm_get_one_spr(cs, id, i);
1088 #ifdef TARGET_PPC64
1089 if (cap_papr) {
1090 if (kvm_get_vpa(cs) < 0) {
1091 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1094 #endif
1097 return 0;
1100 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1102 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1104 if (irq != PPC_INTERRUPT_EXT) {
1105 return 0;
1108 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1109 return 0;
1112 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1114 return 0;
1117 #if defined(TARGET_PPCEMB)
1118 #define PPC_INPUT_INT PPC40x_INPUT_INT
1119 #elif defined(TARGET_PPC64)
1120 #define PPC_INPUT_INT PPC970_INPUT_INT
1121 #else
1122 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1123 #endif
1125 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1127 PowerPCCPU *cpu = POWERPC_CPU(cs);
1128 CPUPPCState *env = &cpu->env;
1129 int r;
1130 unsigned irq;
1132 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1133 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1134 if (!cap_interrupt_level &&
1135 run->ready_for_interrupt_injection &&
1136 (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1137 (env->irq_input_state & (1<<PPC_INPUT_INT)))
1139 /* For now KVM disregards the 'irq' argument. However, in the
1140 * future KVM could cache it in-kernel to avoid a heavyweight exit
1141 * when reading the UIC.
1143 irq = KVM_INTERRUPT_SET;
1145 DPRINTF("injected interrupt %d\n", irq);
1146 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1147 if (r < 0) {
1148 printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1151 /* Always wake up soon in case the interrupt was level based */
1152 timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1153 (get_ticks_per_sec() / 50));
1156 /* We don't know if there are more interrupts pending after this. However,
1157 * the guest will return to userspace in the course of handling this one
1158 * anyways, so we will get a chance to deliver the rest. */
1161 void kvm_arch_post_run(CPUState *cpu, struct kvm_run *run)
1165 int kvm_arch_process_async_events(CPUState *cs)
1167 return cs->halted;
1170 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1172 CPUState *cs = CPU(cpu);
1173 CPUPPCState *env = &cpu->env;
1175 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1176 cs->halted = 1;
1177 cs->exception_index = EXCP_HLT;
1180 return 0;
1183 /* map dcr access to existing qemu dcr emulation */
1184 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1186 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1187 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1189 return 0;
1192 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1194 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1195 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1197 return 0;
1200 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1202 PowerPCCPU *cpu = POWERPC_CPU(cs);
1203 CPUPPCState *env = &cpu->env;
1204 int ret;
1206 switch (run->exit_reason) {
1207 case KVM_EXIT_DCR:
1208 if (run->dcr.is_write) {
1209 DPRINTF("handle dcr write\n");
1210 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1211 } else {
1212 DPRINTF("handle dcr read\n");
1213 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1215 break;
1216 case KVM_EXIT_HLT:
1217 DPRINTF("handle halt\n");
1218 ret = kvmppc_handle_halt(cpu);
1219 break;
1220 #if defined(TARGET_PPC64)
1221 case KVM_EXIT_PAPR_HCALL:
1222 DPRINTF("handle PAPR hypercall\n");
1223 run->papr_hcall.ret = spapr_hypercall(cpu,
1224 run->papr_hcall.nr,
1225 run->papr_hcall.args);
1226 ret = 0;
1227 break;
1228 #endif
1229 case KVM_EXIT_EPR:
1230 DPRINTF("handle epr\n");
1231 run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1232 ret = 0;
1233 break;
1234 case KVM_EXIT_WATCHDOG:
1235 DPRINTF("handle watchdog expiry\n");
1236 watchdog_perform_action();
1237 ret = 0;
1238 break;
1240 default:
1241 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1242 ret = -1;
1243 break;
1246 return ret;
1249 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1251 CPUState *cs = CPU(cpu);
1252 uint32_t bits = tsr_bits;
1253 struct kvm_one_reg reg = {
1254 .id = KVM_REG_PPC_OR_TSR,
1255 .addr = (uintptr_t) &bits,
1258 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1261 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1264 CPUState *cs = CPU(cpu);
1265 uint32_t bits = tsr_bits;
1266 struct kvm_one_reg reg = {
1267 .id = KVM_REG_PPC_CLEAR_TSR,
1268 .addr = (uintptr_t) &bits,
1271 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1274 int kvmppc_set_tcr(PowerPCCPU *cpu)
1276 CPUState *cs = CPU(cpu);
1277 CPUPPCState *env = &cpu->env;
1278 uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1280 struct kvm_one_reg reg = {
1281 .id = KVM_REG_PPC_TCR,
1282 .addr = (uintptr_t) &tcr,
1285 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1288 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1290 CPUState *cs = CPU(cpu);
1291 int ret;
1293 if (!kvm_enabled()) {
1294 return -1;
1297 if (!cap_ppc_watchdog) {
1298 printf("warning: KVM does not support watchdog");
1299 return -1;
1302 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1303 if (ret < 0) {
1304 fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1305 __func__, strerror(-ret));
1306 return ret;
1309 return ret;
1312 static int read_cpuinfo(const char *field, char *value, int len)
1314 FILE *f;
1315 int ret = -1;
1316 int field_len = strlen(field);
1317 char line[512];
1319 f = fopen("/proc/cpuinfo", "r");
1320 if (!f) {
1321 return -1;
1324 do {
1325 if(!fgets(line, sizeof(line), f)) {
1326 break;
1328 if (!strncmp(line, field, field_len)) {
1329 pstrcpy(value, len, line);
1330 ret = 0;
1331 break;
1333 } while(*line);
1335 fclose(f);
1337 return ret;
1340 uint32_t kvmppc_get_tbfreq(void)
1342 char line[512];
1343 char *ns;
1344 uint32_t retval = get_ticks_per_sec();
1346 if (read_cpuinfo("timebase", line, sizeof(line))) {
1347 return retval;
1350 if (!(ns = strchr(line, ':'))) {
1351 return retval;
1354 ns++;
1356 retval = atoi(ns);
1357 return retval;
1360 /* Try to find a device tree node for a CPU with clock-frequency property */
1361 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1363 struct dirent *dirp;
1364 DIR *dp;
1366 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1367 printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1368 return -1;
1371 buf[0] = '\0';
1372 while ((dirp = readdir(dp)) != NULL) {
1373 FILE *f;
1374 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1375 dirp->d_name);
1376 f = fopen(buf, "r");
1377 if (f) {
1378 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1379 fclose(f);
1380 break;
1382 buf[0] = '\0';
1384 closedir(dp);
1385 if (buf[0] == '\0') {
1386 printf("Unknown host!\n");
1387 return -1;
1390 return 0;
1393 /* Read a CPU node property from the host device tree that's a single
1394 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1395 * (can't find or open the property, or doesn't understand the
1396 * format) */
1397 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1399 char buf[PATH_MAX];
1400 union {
1401 uint32_t v32;
1402 uint64_t v64;
1403 } u;
1404 FILE *f;
1405 int len;
1407 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1408 return -1;
1411 strncat(buf, "/", sizeof(buf) - strlen(buf));
1412 strncat(buf, propname, sizeof(buf) - strlen(buf));
1414 f = fopen(buf, "rb");
1415 if (!f) {
1416 return -1;
1419 len = fread(&u, 1, sizeof(u), f);
1420 fclose(f);
1421 switch (len) {
1422 case 4:
1423 /* property is a 32-bit quantity */
1424 return be32_to_cpu(u.v32);
1425 case 8:
1426 return be64_to_cpu(u.v64);
1429 return 0;
1432 uint64_t kvmppc_get_clockfreq(void)
1434 return kvmppc_read_int_cpu_dt("clock-frequency");
1437 uint32_t kvmppc_get_vmx(void)
1439 return kvmppc_read_int_cpu_dt("ibm,vmx");
1442 uint32_t kvmppc_get_dfp(void)
1444 return kvmppc_read_int_cpu_dt("ibm,dfp");
1447 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1449 PowerPCCPU *cpu = ppc_env_get_cpu(env);
1450 CPUState *cs = CPU(cpu);
1452 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1453 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1454 return 0;
1457 return 1;
1460 int kvmppc_get_hasidle(CPUPPCState *env)
1462 struct kvm_ppc_pvinfo pvinfo;
1464 if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1465 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1466 return 1;
1469 return 0;
1472 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1474 uint32_t *hc = (uint32_t*)buf;
1475 struct kvm_ppc_pvinfo pvinfo;
1477 if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1478 memcpy(buf, pvinfo.hcall, buf_len);
1479 return 0;
1483 * Fallback to always fail hypercalls:
1485 * li r3, -1
1486 * nop
1487 * nop
1488 * nop
1491 hc[0] = 0x3860ffff;
1492 hc[1] = 0x60000000;
1493 hc[2] = 0x60000000;
1494 hc[3] = 0x60000000;
1496 return 0;
1499 void kvmppc_set_papr(PowerPCCPU *cpu)
1501 CPUState *cs = CPU(cpu);
1502 int ret;
1504 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
1505 if (ret) {
1506 cpu_abort(cs, "This KVM version does not support PAPR\n");
1509 /* Update the capability flag so we sync the right information
1510 * with kvm */
1511 cap_papr = 1;
1514 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
1516 CPUState *cs = CPU(cpu);
1517 int ret;
1519 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
1520 if (ret && mpic_proxy) {
1521 cpu_abort(cs, "This KVM version does not support EPR\n");
1525 int kvmppc_smt_threads(void)
1527 return cap_ppc_smt ? cap_ppc_smt : 1;
1530 #ifdef TARGET_PPC64
1531 off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem)
1533 void *rma;
1534 off_t size;
1535 int fd;
1536 struct kvm_allocate_rma ret;
1537 MemoryRegion *rma_region;
1539 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
1540 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
1541 * not necessary on this hardware
1542 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
1544 * FIXME: We should allow the user to force contiguous RMA
1545 * allocation in the cap_ppc_rma==1 case.
1547 if (cap_ppc_rma < 2) {
1548 return 0;
1551 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
1552 if (fd < 0) {
1553 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
1554 strerror(errno));
1555 return -1;
1558 size = MIN(ret.rma_size, 256ul << 20);
1560 rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1561 if (rma == MAP_FAILED) {
1562 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
1563 return -1;
1566 rma_region = g_new(MemoryRegion, 1);
1567 memory_region_init_ram_ptr(rma_region, NULL, name, size, rma);
1568 vmstate_register_ram_global(rma_region);
1569 memory_region_add_subregion(sysmem, 0, rma_region);
1571 return size;
1574 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
1576 struct kvm_ppc_smmu_info info;
1577 long rampagesize, best_page_shift;
1578 int i;
1580 if (cap_ppc_rma >= 2) {
1581 return current_size;
1584 /* Find the largest hardware supported page size that's less than
1585 * or equal to the (logical) backing page size of guest RAM */
1586 kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
1587 rampagesize = getrampagesize();
1588 best_page_shift = 0;
1590 for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
1591 struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
1593 if (!sps->page_shift) {
1594 continue;
1597 if ((sps->page_shift > best_page_shift)
1598 && ((1UL << sps->page_shift) <= rampagesize)) {
1599 best_page_shift = sps->page_shift;
1603 return MIN(current_size,
1604 1ULL << (best_page_shift + hash_shift - 7));
1606 #endif
1608 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd)
1610 struct kvm_create_spapr_tce args = {
1611 .liobn = liobn,
1612 .window_size = window_size,
1614 long len;
1615 int fd;
1616 void *table;
1618 /* Must set fd to -1 so we don't try to munmap when called for
1619 * destroying the table, which the upper layers -will- do
1621 *pfd = -1;
1622 if (!cap_spapr_tce) {
1623 return NULL;
1626 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
1627 if (fd < 0) {
1628 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
1629 liobn);
1630 return NULL;
1633 len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
1634 /* FIXME: round this up to page size */
1636 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1637 if (table == MAP_FAILED) {
1638 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
1639 liobn);
1640 close(fd);
1641 return NULL;
1644 *pfd = fd;
1645 return table;
1648 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t window_size)
1650 long len;
1652 if (fd < 0) {
1653 return -1;
1656 len = (window_size / SPAPR_TCE_PAGE_SIZE)*sizeof(uint64_t);
1657 if ((munmap(table, len) < 0) ||
1658 (close(fd) < 0)) {
1659 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
1660 strerror(errno));
1661 /* Leak the table */
1664 return 0;
1667 int kvmppc_reset_htab(int shift_hint)
1669 uint32_t shift = shift_hint;
1671 if (!kvm_enabled()) {
1672 /* Full emulation, tell caller to allocate htab itself */
1673 return 0;
1675 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
1676 int ret;
1677 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
1678 if (ret == -ENOTTY) {
1679 /* At least some versions of PR KVM advertise the
1680 * capability, but don't implement the ioctl(). Oops.
1681 * Return 0 so that we allocate the htab in qemu, as is
1682 * correct for PR. */
1683 return 0;
1684 } else if (ret < 0) {
1685 return ret;
1687 return shift;
1690 /* We have a kernel that predates the htab reset calls. For PR
1691 * KVM, we need to allocate the htab ourselves, for an HV KVM of
1692 * this era, it has allocated a 16MB fixed size hash table
1693 * already. Kernels of this era have the GET_PVINFO capability
1694 * only on PR, so we use this hack to determine the right
1695 * answer */
1696 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
1697 /* PR - tell caller to allocate htab */
1698 return 0;
1699 } else {
1700 /* HV - assume 16MB kernel allocated htab */
1701 return 24;
1705 static inline uint32_t mfpvr(void)
1707 uint32_t pvr;
1709 asm ("mfpvr %0"
1710 : "=r"(pvr));
1711 return pvr;
1714 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
1716 if (on) {
1717 *word |= flags;
1718 } else {
1719 *word &= ~flags;
1723 static void kvmppc_host_cpu_initfn(Object *obj)
1725 assert(kvm_enabled());
1728 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
1730 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
1731 uint32_t vmx = kvmppc_get_vmx();
1732 uint32_t dfp = kvmppc_get_dfp();
1733 uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
1734 uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
1736 /* Now fix up the class with information we can query from the host */
1737 pcc->pvr = mfpvr();
1739 if (vmx != -1) {
1740 /* Only override when we know what the host supports */
1741 alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
1742 alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
1744 if (dfp != -1) {
1745 /* Only override when we know what the host supports */
1746 alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
1749 if (dcache_size != -1) {
1750 pcc->l1_dcache_size = dcache_size;
1753 if (icache_size != -1) {
1754 pcc->l1_icache_size = icache_size;
1758 bool kvmppc_has_cap_epr(void)
1760 return cap_epr;
1763 bool kvmppc_has_cap_htab_fd(void)
1765 return cap_htab_fd;
1768 static int kvm_ppc_register_host_cpu_type(void)
1770 TypeInfo type_info = {
1771 .name = TYPE_HOST_POWERPC_CPU,
1772 .instance_init = kvmppc_host_cpu_initfn,
1773 .class_init = kvmppc_host_cpu_class_init,
1775 uint32_t host_pvr = mfpvr();
1776 PowerPCCPUClass *pvr_pcc;
1778 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
1779 if (pvr_pcc == NULL) {
1780 pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
1782 if (pvr_pcc == NULL) {
1783 return -1;
1785 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
1786 type_register(&type_info);
1787 return 0;
1790 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
1792 struct kvm_rtas_token_args args = {
1793 .token = token,
1796 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
1797 return -ENOENT;
1800 strncpy(args.name, function, sizeof(args.name));
1802 return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
1805 int kvmppc_get_htab_fd(bool write)
1807 struct kvm_get_htab_fd s = {
1808 .flags = write ? KVM_GET_HTAB_WRITE : 0,
1809 .start_index = 0,
1812 if (!cap_htab_fd) {
1813 fprintf(stderr, "KVM version doesn't support saving the hash table\n");
1814 return -1;
1817 return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
1820 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
1822 int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
1823 uint8_t buf[bufsize];
1824 ssize_t rc;
1826 do {
1827 rc = read(fd, buf, bufsize);
1828 if (rc < 0) {
1829 fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
1830 strerror(errno));
1831 return rc;
1832 } else if (rc) {
1833 /* Kernel already retuns data in BE format for the file */
1834 qemu_put_buffer(f, buf, rc);
1836 } while ((rc != 0)
1837 && ((max_ns < 0)
1838 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
1840 return (rc == 0) ? 1 : 0;
1843 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
1844 uint16_t n_valid, uint16_t n_invalid)
1846 struct kvm_get_htab_header *buf;
1847 size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
1848 ssize_t rc;
1850 buf = alloca(chunksize);
1851 /* This is KVM on ppc, so this is all big-endian */
1852 buf->index = index;
1853 buf->n_valid = n_valid;
1854 buf->n_invalid = n_invalid;
1856 qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
1858 rc = write(fd, buf, chunksize);
1859 if (rc < 0) {
1860 fprintf(stderr, "Error writing KVM hash table: %s\n",
1861 strerror(errno));
1862 return rc;
1864 if (rc != chunksize) {
1865 /* We should never get a short write on a single chunk */
1866 fprintf(stderr, "Short write, restoring KVM hash table\n");
1867 return -1;
1869 return 0;
1872 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
1874 return true;
1877 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
1879 return 1;
1882 int kvm_arch_on_sigbus(int code, void *addr)
1884 return 1;
1887 void kvm_arch_init_irq_routing(KVMState *s)
1891 int kvm_arch_insert_sw_breakpoint(CPUState *cpu, struct kvm_sw_breakpoint *bp)
1893 return -EINVAL;
1896 int kvm_arch_remove_sw_breakpoint(CPUState *cpu, struct kvm_sw_breakpoint *bp)
1898 return -EINVAL;
1901 int kvm_arch_insert_hw_breakpoint(target_ulong addr, target_ulong len, int type)
1903 return -EINVAL;
1906 int kvm_arch_remove_hw_breakpoint(target_ulong addr, target_ulong len, int type)
1908 return -EINVAL;
1911 void kvm_arch_remove_all_hw_breakpoints(void)
1915 void kvm_arch_update_guest_debug(CPUState *cpu, struct kvm_guest_debug *dbg)
1919 struct kvm_get_htab_buf {
1920 struct kvm_get_htab_header header;
1922 * We require one extra byte for read
1924 target_ulong hpte[(HPTES_PER_GROUP * 2) + 1];
1927 uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index)
1929 int htab_fd;
1930 struct kvm_get_htab_fd ghf;
1931 struct kvm_get_htab_buf *hpte_buf;
1933 ghf.flags = 0;
1934 ghf.start_index = pte_index;
1935 htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
1936 if (htab_fd < 0) {
1937 goto error_out;
1940 hpte_buf = g_malloc0(sizeof(*hpte_buf));
1942 * Read the hpte group
1944 if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) {
1945 goto out_close;
1948 close(htab_fd);
1949 return (uint64_t)(uintptr_t) hpte_buf->hpte;
1951 out_close:
1952 g_free(hpte_buf);
1953 close(htab_fd);
1954 error_out:
1955 return 0;
1958 void kvmppc_hash64_free_pteg(uint64_t token)
1960 struct kvm_get_htab_buf *htab_buf;
1962 htab_buf = container_of((void *)(uintptr_t) token, struct kvm_get_htab_buf,
1963 hpte);
1964 g_free(htab_buf);
1965 return;
1968 void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
1969 target_ulong pte0, target_ulong pte1)
1971 int htab_fd;
1972 struct kvm_get_htab_fd ghf;
1973 struct kvm_get_htab_buf hpte_buf;
1975 ghf.flags = 0;
1976 ghf.start_index = 0; /* Ignored */
1977 htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
1978 if (htab_fd < 0) {
1979 goto error_out;
1982 hpte_buf.header.n_valid = 1;
1983 hpte_buf.header.n_invalid = 0;
1984 hpte_buf.header.index = pte_index;
1985 hpte_buf.hpte[0] = pte0;
1986 hpte_buf.hpte[1] = pte1;
1988 * Write the hpte entry.
1989 * CAUTION: write() has the warn_unused_result attribute. Hence we
1990 * need to check the return value, even though we do nothing.
1992 if (write(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) {
1993 goto out_close;
1996 out_close:
1997 close(htab_fd);
1998 return;
2000 error_out:
2001 return;