tcg: Optimize brcond2 and setcond2 ne/eq
[qemu.git] / target-ppc / kvm.c
blob8ff1777dcb20ffffcd0f912b06d14b31725ff293
1 /*
2 * PowerPC implementation of KVM hooks
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
7 * Authors:
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
17 #include <dirent.h>
18 #include <sys/types.h>
19 #include <sys/ioctl.h>
20 #include <sys/mman.h>
21 #include <sys/vfs.h>
23 #include <linux/kvm.h>
25 #include "qemu-common.h"
26 #include "qemu/timer.h"
27 #include "sysemu/sysemu.h"
28 #include "sysemu/kvm.h"
29 #include "kvm_ppc.h"
30 #include "cpu.h"
31 #include "sysemu/cpus.h"
32 #include "sysemu/device_tree.h"
33 #include "mmu-hash64.h"
35 #include "hw/sysbus.h"
36 #include "hw/ppc/spapr.h"
37 #include "hw/ppc/spapr_vio.h"
38 #include "sysemu/watchdog.h"
39 #include "trace.h"
41 //#define DEBUG_KVM
43 #ifdef DEBUG_KVM
44 #define DPRINTF(fmt, ...) \
45 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
46 #else
47 #define DPRINTF(fmt, ...) \
48 do { } while (0)
49 #endif
51 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
53 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
54 KVM_CAP_LAST_INFO
57 static int cap_interrupt_unset = false;
58 static int cap_interrupt_level = false;
59 static int cap_segstate;
60 static int cap_booke_sregs;
61 static int cap_ppc_smt;
62 static int cap_ppc_rma;
63 static int cap_spapr_tce;
64 static int cap_hior;
65 static int cap_one_reg;
66 static int cap_epr;
67 static int cap_ppc_watchdog;
68 static int cap_papr;
69 static int cap_htab_fd;
71 /* XXX We have a race condition where we actually have a level triggered
72 * interrupt, but the infrastructure can't expose that yet, so the guest
73 * takes but ignores it, goes to sleep and never gets notified that there's
74 * still an interrupt pending.
76 * As a quick workaround, let's just wake up again 20 ms after we injected
77 * an interrupt. That way we can assure that we're always reinjecting
78 * interrupts in case the guest swallowed them.
80 static QEMUTimer *idle_timer;
82 static void kvm_kick_cpu(void *opaque)
84 PowerPCCPU *cpu = opaque;
86 qemu_cpu_kick(CPU(cpu));
89 static int kvm_ppc_register_host_cpu_type(void);
91 int kvm_arch_init(KVMState *s)
93 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
94 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
95 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
96 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
97 cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
98 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
99 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
100 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
101 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
102 cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
103 cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
104 /* Note: we don't set cap_papr here, because this capability is
105 * only activated after this by kvmppc_set_papr() */
106 cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
108 if (!cap_interrupt_level) {
109 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
110 "VM to stall at times!\n");
113 kvm_ppc_register_host_cpu_type();
115 return 0;
118 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
120 CPUPPCState *cenv = &cpu->env;
121 CPUState *cs = CPU(cpu);
122 struct kvm_sregs sregs;
123 int ret;
125 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
126 /* What we're really trying to say is "if we're on BookE, we use
127 the native PVR for now". This is the only sane way to check
128 it though, so we potentially confuse users that they can run
129 BookE guests on BookS. Let's hope nobody dares enough :) */
130 return 0;
131 } else {
132 if (!cap_segstate) {
133 fprintf(stderr, "kvm error: missing PVR setting capability\n");
134 return -ENOSYS;
138 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
139 if (ret) {
140 return ret;
143 sregs.pvr = cenv->spr[SPR_PVR];
144 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
147 /* Set up a shared TLB array with KVM */
148 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
150 CPUPPCState *env = &cpu->env;
151 CPUState *cs = CPU(cpu);
152 struct kvm_book3e_206_tlb_params params = {};
153 struct kvm_config_tlb cfg = {};
154 unsigned int entries = 0;
155 int ret, i;
157 if (!kvm_enabled() ||
158 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
159 return 0;
162 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
164 for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
165 params.tlb_sizes[i] = booke206_tlb_size(env, i);
166 params.tlb_ways[i] = booke206_tlb_ways(env, i);
167 entries += params.tlb_sizes[i];
170 assert(entries == env->nb_tlb);
171 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
173 env->tlb_dirty = true;
175 cfg.array = (uintptr_t)env->tlb.tlbm;
176 cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
177 cfg.params = (uintptr_t)&params;
178 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
180 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
181 if (ret < 0) {
182 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
183 __func__, strerror(-ret));
184 return ret;
187 env->kvm_sw_tlb = true;
188 return 0;
192 #if defined(TARGET_PPC64)
193 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
194 struct kvm_ppc_smmu_info *info)
196 CPUPPCState *env = &cpu->env;
197 CPUState *cs = CPU(cpu);
199 memset(info, 0, sizeof(*info));
201 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
202 * need to "guess" what the supported page sizes are.
204 * For that to work we make a few assumptions:
206 * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
207 * KVM which only supports 4K and 16M pages, but supports them
208 * regardless of the backing store characteritics. We also don't
209 * support 1T segments.
211 * This is safe as if HV KVM ever supports that capability or PR
212 * KVM grows supports for more page/segment sizes, those versions
213 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
214 * will not hit this fallback
216 * - Else we are running HV KVM. This means we only support page
217 * sizes that fit in the backing store. Additionally we only
218 * advertize 64K pages if the processor is ARCH 2.06 and we assume
219 * P7 encodings for the SLB and hash table. Here too, we assume
220 * support for any newer processor will mean a kernel that
221 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
222 * this fallback.
224 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
225 /* No flags */
226 info->flags = 0;
227 info->slb_size = 64;
229 /* Standard 4k base page size segment */
230 info->sps[0].page_shift = 12;
231 info->sps[0].slb_enc = 0;
232 info->sps[0].enc[0].page_shift = 12;
233 info->sps[0].enc[0].pte_enc = 0;
235 /* Standard 16M large page size segment */
236 info->sps[1].page_shift = 24;
237 info->sps[1].slb_enc = SLB_VSID_L;
238 info->sps[1].enc[0].page_shift = 24;
239 info->sps[1].enc[0].pte_enc = 0;
240 } else {
241 int i = 0;
243 /* HV KVM has backing store size restrictions */
244 info->flags = KVM_PPC_PAGE_SIZES_REAL;
246 if (env->mmu_model & POWERPC_MMU_1TSEG) {
247 info->flags |= KVM_PPC_1T_SEGMENTS;
250 if (env->mmu_model == POWERPC_MMU_2_06) {
251 info->slb_size = 32;
252 } else {
253 info->slb_size = 64;
256 /* Standard 4k base page size segment */
257 info->sps[i].page_shift = 12;
258 info->sps[i].slb_enc = 0;
259 info->sps[i].enc[0].page_shift = 12;
260 info->sps[i].enc[0].pte_enc = 0;
261 i++;
263 /* 64K on MMU 2.06 */
264 if (env->mmu_model == POWERPC_MMU_2_06) {
265 info->sps[i].page_shift = 16;
266 info->sps[i].slb_enc = 0x110;
267 info->sps[i].enc[0].page_shift = 16;
268 info->sps[i].enc[0].pte_enc = 1;
269 i++;
272 /* Standard 16M large page size segment */
273 info->sps[i].page_shift = 24;
274 info->sps[i].slb_enc = SLB_VSID_L;
275 info->sps[i].enc[0].page_shift = 24;
276 info->sps[i].enc[0].pte_enc = 0;
280 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
282 CPUState *cs = CPU(cpu);
283 int ret;
285 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
286 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
287 if (ret == 0) {
288 return;
292 kvm_get_fallback_smmu_info(cpu, info);
295 static long getrampagesize(void)
297 struct statfs fs;
298 int ret;
300 if (!mem_path) {
301 /* guest RAM is backed by normal anonymous pages */
302 return getpagesize();
305 do {
306 ret = statfs(mem_path, &fs);
307 } while (ret != 0 && errno == EINTR);
309 if (ret != 0) {
310 fprintf(stderr, "Couldn't statfs() memory path: %s\n",
311 strerror(errno));
312 exit(1);
315 #define HUGETLBFS_MAGIC 0x958458f6
317 if (fs.f_type != HUGETLBFS_MAGIC) {
318 /* Explicit mempath, but it's ordinary pages */
319 return getpagesize();
322 /* It's hugepage, return the huge page size */
323 return fs.f_bsize;
326 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
328 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
329 return true;
332 return (1ul << shift) <= rampgsize;
335 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
337 static struct kvm_ppc_smmu_info smmu_info;
338 static bool has_smmu_info;
339 CPUPPCState *env = &cpu->env;
340 long rampagesize;
341 int iq, ik, jq, jk;
343 /* We only handle page sizes for 64-bit server guests for now */
344 if (!(env->mmu_model & POWERPC_MMU_64)) {
345 return;
348 /* Collect MMU info from kernel if not already */
349 if (!has_smmu_info) {
350 kvm_get_smmu_info(cpu, &smmu_info);
351 has_smmu_info = true;
354 rampagesize = getrampagesize();
356 /* Convert to QEMU form */
357 memset(&env->sps, 0, sizeof(env->sps));
359 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
360 struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
361 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
363 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
364 ksps->page_shift)) {
365 continue;
367 qsps->page_shift = ksps->page_shift;
368 qsps->slb_enc = ksps->slb_enc;
369 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
370 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
371 ksps->enc[jk].page_shift)) {
372 continue;
374 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
375 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
376 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
377 break;
380 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
381 break;
384 env->slb_nr = smmu_info.slb_size;
385 if (smmu_info.flags & KVM_PPC_1T_SEGMENTS) {
386 env->mmu_model |= POWERPC_MMU_1TSEG;
387 } else {
388 env->mmu_model &= ~POWERPC_MMU_1TSEG;
391 #else /* defined (TARGET_PPC64) */
393 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
397 #endif /* !defined (TARGET_PPC64) */
399 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
401 return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
404 int kvm_arch_init_vcpu(CPUState *cs)
406 PowerPCCPU *cpu = POWERPC_CPU(cs);
407 CPUPPCState *cenv = &cpu->env;
408 int ret;
410 /* Gather server mmu info from KVM and update the CPU state */
411 kvm_fixup_page_sizes(cpu);
413 /* Synchronize sregs with kvm */
414 ret = kvm_arch_sync_sregs(cpu);
415 if (ret) {
416 return ret;
419 idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
421 /* Some targets support access to KVM's guest TLB. */
422 switch (cenv->mmu_model) {
423 case POWERPC_MMU_BOOKE206:
424 ret = kvm_booke206_tlb_init(cpu);
425 break;
426 default:
427 break;
430 return ret;
433 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
435 CPUPPCState *env = &cpu->env;
436 CPUState *cs = CPU(cpu);
437 struct kvm_dirty_tlb dirty_tlb;
438 unsigned char *bitmap;
439 int ret;
441 if (!env->kvm_sw_tlb) {
442 return;
445 bitmap = g_malloc((env->nb_tlb + 7) / 8);
446 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
448 dirty_tlb.bitmap = (uintptr_t)bitmap;
449 dirty_tlb.num_dirty = env->nb_tlb;
451 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
452 if (ret) {
453 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
454 __func__, strerror(-ret));
457 g_free(bitmap);
460 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
462 PowerPCCPU *cpu = POWERPC_CPU(cs);
463 CPUPPCState *env = &cpu->env;
464 union {
465 uint32_t u32;
466 uint64_t u64;
467 } val;
468 struct kvm_one_reg reg = {
469 .id = id,
470 .addr = (uintptr_t) &val,
472 int ret;
474 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
475 if (ret != 0) {
476 trace_kvm_failed_spr_get(spr, strerror(errno));
477 } else {
478 switch (id & KVM_REG_SIZE_MASK) {
479 case KVM_REG_SIZE_U32:
480 env->spr[spr] = val.u32;
481 break;
483 case KVM_REG_SIZE_U64:
484 env->spr[spr] = val.u64;
485 break;
487 default:
488 /* Don't handle this size yet */
489 abort();
494 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
496 PowerPCCPU *cpu = POWERPC_CPU(cs);
497 CPUPPCState *env = &cpu->env;
498 union {
499 uint32_t u32;
500 uint64_t u64;
501 } val;
502 struct kvm_one_reg reg = {
503 .id = id,
504 .addr = (uintptr_t) &val,
506 int ret;
508 switch (id & KVM_REG_SIZE_MASK) {
509 case KVM_REG_SIZE_U32:
510 val.u32 = env->spr[spr];
511 break;
513 case KVM_REG_SIZE_U64:
514 val.u64 = env->spr[spr];
515 break;
517 default:
518 /* Don't handle this size yet */
519 abort();
522 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
523 if (ret != 0) {
524 trace_kvm_failed_spr_set(spr, strerror(errno));
528 static int kvm_put_fp(CPUState *cs)
530 PowerPCCPU *cpu = POWERPC_CPU(cs);
531 CPUPPCState *env = &cpu->env;
532 struct kvm_one_reg reg;
533 int i;
534 int ret;
536 if (env->insns_flags & PPC_FLOAT) {
537 uint64_t fpscr = env->fpscr;
538 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
540 reg.id = KVM_REG_PPC_FPSCR;
541 reg.addr = (uintptr_t)&fpscr;
542 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
543 if (ret < 0) {
544 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
545 return ret;
548 for (i = 0; i < 32; i++) {
549 uint64_t vsr[2];
551 vsr[0] = float64_val(env->fpr[i]);
552 vsr[1] = env->vsr[i];
553 reg.addr = (uintptr_t) &vsr;
554 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
556 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
557 if (ret < 0) {
558 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
559 i, strerror(errno));
560 return ret;
565 if (env->insns_flags & PPC_ALTIVEC) {
566 reg.id = KVM_REG_PPC_VSCR;
567 reg.addr = (uintptr_t)&env->vscr;
568 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
569 if (ret < 0) {
570 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
571 return ret;
574 for (i = 0; i < 32; i++) {
575 reg.id = KVM_REG_PPC_VR(i);
576 reg.addr = (uintptr_t)&env->avr[i];
577 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
578 if (ret < 0) {
579 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
580 return ret;
585 return 0;
588 static int kvm_get_fp(CPUState *cs)
590 PowerPCCPU *cpu = POWERPC_CPU(cs);
591 CPUPPCState *env = &cpu->env;
592 struct kvm_one_reg reg;
593 int i;
594 int ret;
596 if (env->insns_flags & PPC_FLOAT) {
597 uint64_t fpscr;
598 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
600 reg.id = KVM_REG_PPC_FPSCR;
601 reg.addr = (uintptr_t)&fpscr;
602 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
603 if (ret < 0) {
604 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
605 return ret;
606 } else {
607 env->fpscr = fpscr;
610 for (i = 0; i < 32; i++) {
611 uint64_t vsr[2];
613 reg.addr = (uintptr_t) &vsr;
614 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
616 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
617 if (ret < 0) {
618 DPRINTF("Unable to get %s%d from KVM: %s\n",
619 vsx ? "VSR" : "FPR", i, strerror(errno));
620 return ret;
621 } else {
622 env->fpr[i] = vsr[0];
623 if (vsx) {
624 env->vsr[i] = vsr[1];
630 if (env->insns_flags & PPC_ALTIVEC) {
631 reg.id = KVM_REG_PPC_VSCR;
632 reg.addr = (uintptr_t)&env->vscr;
633 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
634 if (ret < 0) {
635 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
636 return ret;
639 for (i = 0; i < 32; i++) {
640 reg.id = KVM_REG_PPC_VR(i);
641 reg.addr = (uintptr_t)&env->avr[i];
642 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
643 if (ret < 0) {
644 DPRINTF("Unable to get VR%d from KVM: %s\n",
645 i, strerror(errno));
646 return ret;
651 return 0;
654 #if defined(TARGET_PPC64)
655 static int kvm_get_vpa(CPUState *cs)
657 PowerPCCPU *cpu = POWERPC_CPU(cs);
658 CPUPPCState *env = &cpu->env;
659 struct kvm_one_reg reg;
660 int ret;
662 reg.id = KVM_REG_PPC_VPA_ADDR;
663 reg.addr = (uintptr_t)&env->vpa_addr;
664 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
665 if (ret < 0) {
666 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
667 return ret;
670 assert((uintptr_t)&env->slb_shadow_size
671 == ((uintptr_t)&env->slb_shadow_addr + 8));
672 reg.id = KVM_REG_PPC_VPA_SLB;
673 reg.addr = (uintptr_t)&env->slb_shadow_addr;
674 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
675 if (ret < 0) {
676 DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
677 strerror(errno));
678 return ret;
681 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
682 reg.id = KVM_REG_PPC_VPA_DTL;
683 reg.addr = (uintptr_t)&env->dtl_addr;
684 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
685 if (ret < 0) {
686 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
687 strerror(errno));
688 return ret;
691 return 0;
694 static int kvm_put_vpa(CPUState *cs)
696 PowerPCCPU *cpu = POWERPC_CPU(cs);
697 CPUPPCState *env = &cpu->env;
698 struct kvm_one_reg reg;
699 int ret;
701 /* SLB shadow or DTL can't be registered unless a master VPA is
702 * registered. That means when restoring state, if a VPA *is*
703 * registered, we need to set that up first. If not, we need to
704 * deregister the others before deregistering the master VPA */
705 assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
707 if (env->vpa_addr) {
708 reg.id = KVM_REG_PPC_VPA_ADDR;
709 reg.addr = (uintptr_t)&env->vpa_addr;
710 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
711 if (ret < 0) {
712 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
713 return ret;
717 assert((uintptr_t)&env->slb_shadow_size
718 == ((uintptr_t)&env->slb_shadow_addr + 8));
719 reg.id = KVM_REG_PPC_VPA_SLB;
720 reg.addr = (uintptr_t)&env->slb_shadow_addr;
721 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
722 if (ret < 0) {
723 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
724 return ret;
727 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
728 reg.id = KVM_REG_PPC_VPA_DTL;
729 reg.addr = (uintptr_t)&env->dtl_addr;
730 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
731 if (ret < 0) {
732 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
733 strerror(errno));
734 return ret;
737 if (!env->vpa_addr) {
738 reg.id = KVM_REG_PPC_VPA_ADDR;
739 reg.addr = (uintptr_t)&env->vpa_addr;
740 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
741 if (ret < 0) {
742 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
743 return ret;
747 return 0;
749 #endif /* TARGET_PPC64 */
751 int kvm_arch_put_registers(CPUState *cs, int level)
753 PowerPCCPU *cpu = POWERPC_CPU(cs);
754 CPUPPCState *env = &cpu->env;
755 struct kvm_regs regs;
756 int ret;
757 int i;
759 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
760 if (ret < 0) {
761 return ret;
764 regs.ctr = env->ctr;
765 regs.lr = env->lr;
766 regs.xer = cpu_read_xer(env);
767 regs.msr = env->msr;
768 regs.pc = env->nip;
770 regs.srr0 = env->spr[SPR_SRR0];
771 regs.srr1 = env->spr[SPR_SRR1];
773 regs.sprg0 = env->spr[SPR_SPRG0];
774 regs.sprg1 = env->spr[SPR_SPRG1];
775 regs.sprg2 = env->spr[SPR_SPRG2];
776 regs.sprg3 = env->spr[SPR_SPRG3];
777 regs.sprg4 = env->spr[SPR_SPRG4];
778 regs.sprg5 = env->spr[SPR_SPRG5];
779 regs.sprg6 = env->spr[SPR_SPRG6];
780 regs.sprg7 = env->spr[SPR_SPRG7];
782 regs.pid = env->spr[SPR_BOOKE_PID];
784 for (i = 0;i < 32; i++)
785 regs.gpr[i] = env->gpr[i];
787 regs.cr = 0;
788 for (i = 0; i < 8; i++) {
789 regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
792 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
793 if (ret < 0)
794 return ret;
796 kvm_put_fp(cs);
798 if (env->tlb_dirty) {
799 kvm_sw_tlb_put(cpu);
800 env->tlb_dirty = false;
803 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
804 struct kvm_sregs sregs;
806 sregs.pvr = env->spr[SPR_PVR];
808 sregs.u.s.sdr1 = env->spr[SPR_SDR1];
810 /* Sync SLB */
811 #ifdef TARGET_PPC64
812 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
813 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
814 if (env->slb[i].esid & SLB_ESID_V) {
815 sregs.u.s.ppc64.slb[i].slbe |= i;
817 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
819 #endif
821 /* Sync SRs */
822 for (i = 0; i < 16; i++) {
823 sregs.u.s.ppc32.sr[i] = env->sr[i];
826 /* Sync BATs */
827 for (i = 0; i < 8; i++) {
828 /* Beware. We have to swap upper and lower bits here */
829 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
830 | env->DBAT[1][i];
831 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
832 | env->IBAT[1][i];
835 ret = kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
836 if (ret) {
837 return ret;
841 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
842 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
845 if (cap_one_reg) {
846 int i;
848 /* We deliberately ignore errors here, for kernels which have
849 * the ONE_REG calls, but don't support the specific
850 * registers, there's a reasonable chance things will still
851 * work, at least until we try to migrate. */
852 for (i = 0; i < 1024; i++) {
853 uint64_t id = env->spr_cb[i].one_reg_id;
855 if (id != 0) {
856 kvm_put_one_spr(cs, id, i);
860 #ifdef TARGET_PPC64
861 if (cap_papr) {
862 if (kvm_put_vpa(cs) < 0) {
863 DPRINTF("Warning: Unable to set VPA information to KVM\n");
866 #endif /* TARGET_PPC64 */
869 return ret;
872 int kvm_arch_get_registers(CPUState *cs)
874 PowerPCCPU *cpu = POWERPC_CPU(cs);
875 CPUPPCState *env = &cpu->env;
876 struct kvm_regs regs;
877 struct kvm_sregs sregs;
878 uint32_t cr;
879 int i, ret;
881 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
882 if (ret < 0)
883 return ret;
885 cr = regs.cr;
886 for (i = 7; i >= 0; i--) {
887 env->crf[i] = cr & 15;
888 cr >>= 4;
891 env->ctr = regs.ctr;
892 env->lr = regs.lr;
893 cpu_write_xer(env, regs.xer);
894 env->msr = regs.msr;
895 env->nip = regs.pc;
897 env->spr[SPR_SRR0] = regs.srr0;
898 env->spr[SPR_SRR1] = regs.srr1;
900 env->spr[SPR_SPRG0] = regs.sprg0;
901 env->spr[SPR_SPRG1] = regs.sprg1;
902 env->spr[SPR_SPRG2] = regs.sprg2;
903 env->spr[SPR_SPRG3] = regs.sprg3;
904 env->spr[SPR_SPRG4] = regs.sprg4;
905 env->spr[SPR_SPRG5] = regs.sprg5;
906 env->spr[SPR_SPRG6] = regs.sprg6;
907 env->spr[SPR_SPRG7] = regs.sprg7;
909 env->spr[SPR_BOOKE_PID] = regs.pid;
911 for (i = 0;i < 32; i++)
912 env->gpr[i] = regs.gpr[i];
914 kvm_get_fp(cs);
916 if (cap_booke_sregs) {
917 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
918 if (ret < 0) {
919 return ret;
922 if (sregs.u.e.features & KVM_SREGS_E_BASE) {
923 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
924 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
925 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
926 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
927 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
928 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
929 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
930 env->spr[SPR_DECR] = sregs.u.e.dec;
931 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
932 env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
933 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
936 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
937 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
938 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
939 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
940 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
941 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
944 if (sregs.u.e.features & KVM_SREGS_E_64) {
945 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
948 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
949 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
952 if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
953 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
954 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
955 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
956 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
957 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
958 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
959 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
960 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
961 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
962 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
963 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
964 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
965 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
966 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
967 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
968 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
970 if (sregs.u.e.features & KVM_SREGS_E_SPE) {
971 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
972 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
973 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
976 if (sregs.u.e.features & KVM_SREGS_E_PM) {
977 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
980 if (sregs.u.e.features & KVM_SREGS_E_PC) {
981 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
982 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
986 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
987 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
988 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
989 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
990 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
991 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
992 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
993 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
994 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
995 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
996 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
999 if (sregs.u.e.features & KVM_SREGS_EXP) {
1000 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1003 if (sregs.u.e.features & KVM_SREGS_E_PD) {
1004 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1005 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1008 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1009 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1010 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1011 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1013 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1014 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1015 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1020 if (cap_segstate) {
1021 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
1022 if (ret < 0) {
1023 return ret;
1026 if (!env->external_htab) {
1027 ppc_store_sdr1(env, sregs.u.s.sdr1);
1030 /* Sync SLB */
1031 #ifdef TARGET_PPC64
1033 * The packed SLB array we get from KVM_GET_SREGS only contains
1034 * information about valid entries. So we flush our internal
1035 * copy to get rid of stale ones, then put all valid SLB entries
1036 * back in.
1038 memset(env->slb, 0, sizeof(env->slb));
1039 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1040 target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1041 target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1043 * Only restore valid entries
1045 if (rb & SLB_ESID_V) {
1046 ppc_store_slb(env, rb, rs);
1049 #endif
1051 /* Sync SRs */
1052 for (i = 0; i < 16; i++) {
1053 env->sr[i] = sregs.u.s.ppc32.sr[i];
1056 /* Sync BATs */
1057 for (i = 0; i < 8; i++) {
1058 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1059 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1060 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1061 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1065 if (cap_hior) {
1066 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1069 if (cap_one_reg) {
1070 int i;
1072 /* We deliberately ignore errors here, for kernels which have
1073 * the ONE_REG calls, but don't support the specific
1074 * registers, there's a reasonable chance things will still
1075 * work, at least until we try to migrate. */
1076 for (i = 0; i < 1024; i++) {
1077 uint64_t id = env->spr_cb[i].one_reg_id;
1079 if (id != 0) {
1080 kvm_get_one_spr(cs, id, i);
1084 #ifdef TARGET_PPC64
1085 if (cap_papr) {
1086 if (kvm_get_vpa(cs) < 0) {
1087 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1090 #endif
1093 return 0;
1096 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1098 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1100 if (irq != PPC_INTERRUPT_EXT) {
1101 return 0;
1104 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1105 return 0;
1108 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1110 return 0;
1113 #if defined(TARGET_PPCEMB)
1114 #define PPC_INPUT_INT PPC40x_INPUT_INT
1115 #elif defined(TARGET_PPC64)
1116 #define PPC_INPUT_INT PPC970_INPUT_INT
1117 #else
1118 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1119 #endif
1121 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1123 PowerPCCPU *cpu = POWERPC_CPU(cs);
1124 CPUPPCState *env = &cpu->env;
1125 int r;
1126 unsigned irq;
1128 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1129 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1130 if (!cap_interrupt_level &&
1131 run->ready_for_interrupt_injection &&
1132 (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1133 (env->irq_input_state & (1<<PPC_INPUT_INT)))
1135 /* For now KVM disregards the 'irq' argument. However, in the
1136 * future KVM could cache it in-kernel to avoid a heavyweight exit
1137 * when reading the UIC.
1139 irq = KVM_INTERRUPT_SET;
1141 DPRINTF("injected interrupt %d\n", irq);
1142 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1143 if (r < 0) {
1144 printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1147 /* Always wake up soon in case the interrupt was level based */
1148 timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1149 (get_ticks_per_sec() / 50));
1152 /* We don't know if there are more interrupts pending after this. However,
1153 * the guest will return to userspace in the course of handling this one
1154 * anyways, so we will get a chance to deliver the rest. */
1157 void kvm_arch_post_run(CPUState *cpu, struct kvm_run *run)
1161 int kvm_arch_process_async_events(CPUState *cs)
1163 return cs->halted;
1166 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1168 CPUState *cs = CPU(cpu);
1169 CPUPPCState *env = &cpu->env;
1171 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1172 cs->halted = 1;
1173 cs->exception_index = EXCP_HLT;
1176 return 0;
1179 /* map dcr access to existing qemu dcr emulation */
1180 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1182 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1183 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1185 return 0;
1188 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1190 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1191 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1193 return 0;
1196 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1198 PowerPCCPU *cpu = POWERPC_CPU(cs);
1199 CPUPPCState *env = &cpu->env;
1200 int ret;
1202 switch (run->exit_reason) {
1203 case KVM_EXIT_DCR:
1204 if (run->dcr.is_write) {
1205 DPRINTF("handle dcr write\n");
1206 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1207 } else {
1208 DPRINTF("handle dcr read\n");
1209 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1211 break;
1212 case KVM_EXIT_HLT:
1213 DPRINTF("handle halt\n");
1214 ret = kvmppc_handle_halt(cpu);
1215 break;
1216 #if defined(TARGET_PPC64)
1217 case KVM_EXIT_PAPR_HCALL:
1218 DPRINTF("handle PAPR hypercall\n");
1219 run->papr_hcall.ret = spapr_hypercall(cpu,
1220 run->papr_hcall.nr,
1221 run->papr_hcall.args);
1222 ret = 0;
1223 break;
1224 #endif
1225 case KVM_EXIT_EPR:
1226 DPRINTF("handle epr\n");
1227 run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1228 ret = 0;
1229 break;
1230 case KVM_EXIT_WATCHDOG:
1231 DPRINTF("handle watchdog expiry\n");
1232 watchdog_perform_action();
1233 ret = 0;
1234 break;
1236 default:
1237 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1238 ret = -1;
1239 break;
1242 return ret;
1245 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1247 CPUState *cs = CPU(cpu);
1248 uint32_t bits = tsr_bits;
1249 struct kvm_one_reg reg = {
1250 .id = KVM_REG_PPC_OR_TSR,
1251 .addr = (uintptr_t) &bits,
1254 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1257 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1260 CPUState *cs = CPU(cpu);
1261 uint32_t bits = tsr_bits;
1262 struct kvm_one_reg reg = {
1263 .id = KVM_REG_PPC_CLEAR_TSR,
1264 .addr = (uintptr_t) &bits,
1267 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1270 int kvmppc_set_tcr(PowerPCCPU *cpu)
1272 CPUState *cs = CPU(cpu);
1273 CPUPPCState *env = &cpu->env;
1274 uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1276 struct kvm_one_reg reg = {
1277 .id = KVM_REG_PPC_TCR,
1278 .addr = (uintptr_t) &tcr,
1281 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1284 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1286 CPUState *cs = CPU(cpu);
1287 int ret;
1289 if (!kvm_enabled()) {
1290 return -1;
1293 if (!cap_ppc_watchdog) {
1294 printf("warning: KVM does not support watchdog");
1295 return -1;
1298 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1299 if (ret < 0) {
1300 fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1301 __func__, strerror(-ret));
1302 return ret;
1305 return ret;
1308 static int read_cpuinfo(const char *field, char *value, int len)
1310 FILE *f;
1311 int ret = -1;
1312 int field_len = strlen(field);
1313 char line[512];
1315 f = fopen("/proc/cpuinfo", "r");
1316 if (!f) {
1317 return -1;
1320 do {
1321 if(!fgets(line, sizeof(line), f)) {
1322 break;
1324 if (!strncmp(line, field, field_len)) {
1325 pstrcpy(value, len, line);
1326 ret = 0;
1327 break;
1329 } while(*line);
1331 fclose(f);
1333 return ret;
1336 uint32_t kvmppc_get_tbfreq(void)
1338 char line[512];
1339 char *ns;
1340 uint32_t retval = get_ticks_per_sec();
1342 if (read_cpuinfo("timebase", line, sizeof(line))) {
1343 return retval;
1346 if (!(ns = strchr(line, ':'))) {
1347 return retval;
1350 ns++;
1352 retval = atoi(ns);
1353 return retval;
1356 /* Try to find a device tree node for a CPU with clock-frequency property */
1357 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1359 struct dirent *dirp;
1360 DIR *dp;
1362 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1363 printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1364 return -1;
1367 buf[0] = '\0';
1368 while ((dirp = readdir(dp)) != NULL) {
1369 FILE *f;
1370 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1371 dirp->d_name);
1372 f = fopen(buf, "r");
1373 if (f) {
1374 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1375 fclose(f);
1376 break;
1378 buf[0] = '\0';
1380 closedir(dp);
1381 if (buf[0] == '\0') {
1382 printf("Unknown host!\n");
1383 return -1;
1386 return 0;
1389 /* Read a CPU node property from the host device tree that's a single
1390 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1391 * (can't find or open the property, or doesn't understand the
1392 * format) */
1393 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1395 char buf[PATH_MAX];
1396 union {
1397 uint32_t v32;
1398 uint64_t v64;
1399 } u;
1400 FILE *f;
1401 int len;
1403 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1404 return -1;
1407 strncat(buf, "/", sizeof(buf) - strlen(buf));
1408 strncat(buf, propname, sizeof(buf) - strlen(buf));
1410 f = fopen(buf, "rb");
1411 if (!f) {
1412 return -1;
1415 len = fread(&u, 1, sizeof(u), f);
1416 fclose(f);
1417 switch (len) {
1418 case 4:
1419 /* property is a 32-bit quantity */
1420 return be32_to_cpu(u.v32);
1421 case 8:
1422 return be64_to_cpu(u.v64);
1425 return 0;
1428 uint64_t kvmppc_get_clockfreq(void)
1430 return kvmppc_read_int_cpu_dt("clock-frequency");
1433 uint32_t kvmppc_get_vmx(void)
1435 return kvmppc_read_int_cpu_dt("ibm,vmx");
1438 uint32_t kvmppc_get_dfp(void)
1440 return kvmppc_read_int_cpu_dt("ibm,dfp");
1443 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1445 PowerPCCPU *cpu = ppc_env_get_cpu(env);
1446 CPUState *cs = CPU(cpu);
1448 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1449 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1450 return 0;
1453 return 1;
1456 int kvmppc_get_hasidle(CPUPPCState *env)
1458 struct kvm_ppc_pvinfo pvinfo;
1460 if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1461 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1462 return 1;
1465 return 0;
1468 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1470 uint32_t *hc = (uint32_t*)buf;
1471 struct kvm_ppc_pvinfo pvinfo;
1473 if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1474 memcpy(buf, pvinfo.hcall, buf_len);
1475 return 0;
1479 * Fallback to always fail hypercalls:
1481 * li r3, -1
1482 * nop
1483 * nop
1484 * nop
1487 hc[0] = 0x3860ffff;
1488 hc[1] = 0x60000000;
1489 hc[2] = 0x60000000;
1490 hc[3] = 0x60000000;
1492 return 0;
1495 void kvmppc_set_papr(PowerPCCPU *cpu)
1497 CPUState *cs = CPU(cpu);
1498 int ret;
1500 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
1501 if (ret) {
1502 cpu_abort(cs, "This KVM version does not support PAPR\n");
1505 /* Update the capability flag so we sync the right information
1506 * with kvm */
1507 cap_papr = 1;
1510 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
1512 CPUState *cs = CPU(cpu);
1513 int ret;
1515 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
1516 if (ret && mpic_proxy) {
1517 cpu_abort(cs, "This KVM version does not support EPR\n");
1521 int kvmppc_smt_threads(void)
1523 return cap_ppc_smt ? cap_ppc_smt : 1;
1526 #ifdef TARGET_PPC64
1527 off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem)
1529 void *rma;
1530 off_t size;
1531 int fd;
1532 struct kvm_allocate_rma ret;
1533 MemoryRegion *rma_region;
1535 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
1536 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
1537 * not necessary on this hardware
1538 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
1540 * FIXME: We should allow the user to force contiguous RMA
1541 * allocation in the cap_ppc_rma==1 case.
1543 if (cap_ppc_rma < 2) {
1544 return 0;
1547 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
1548 if (fd < 0) {
1549 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
1550 strerror(errno));
1551 return -1;
1554 size = MIN(ret.rma_size, 256ul << 20);
1556 rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1557 if (rma == MAP_FAILED) {
1558 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
1559 return -1;
1562 rma_region = g_new(MemoryRegion, 1);
1563 memory_region_init_ram_ptr(rma_region, NULL, name, size, rma);
1564 vmstate_register_ram_global(rma_region);
1565 memory_region_add_subregion(sysmem, 0, rma_region);
1567 return size;
1570 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
1572 struct kvm_ppc_smmu_info info;
1573 long rampagesize, best_page_shift;
1574 int i;
1576 if (cap_ppc_rma >= 2) {
1577 return current_size;
1580 /* Find the largest hardware supported page size that's less than
1581 * or equal to the (logical) backing page size of guest RAM */
1582 kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
1583 rampagesize = getrampagesize();
1584 best_page_shift = 0;
1586 for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
1587 struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
1589 if (!sps->page_shift) {
1590 continue;
1593 if ((sps->page_shift > best_page_shift)
1594 && ((1UL << sps->page_shift) <= rampagesize)) {
1595 best_page_shift = sps->page_shift;
1599 return MIN(current_size,
1600 1ULL << (best_page_shift + hash_shift - 7));
1602 #endif
1604 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd)
1606 struct kvm_create_spapr_tce args = {
1607 .liobn = liobn,
1608 .window_size = window_size,
1610 long len;
1611 int fd;
1612 void *table;
1614 /* Must set fd to -1 so we don't try to munmap when called for
1615 * destroying the table, which the upper layers -will- do
1617 *pfd = -1;
1618 if (!cap_spapr_tce) {
1619 return NULL;
1622 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
1623 if (fd < 0) {
1624 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
1625 liobn);
1626 return NULL;
1629 len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
1630 /* FIXME: round this up to page size */
1632 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1633 if (table == MAP_FAILED) {
1634 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
1635 liobn);
1636 close(fd);
1637 return NULL;
1640 *pfd = fd;
1641 return table;
1644 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t window_size)
1646 long len;
1648 if (fd < 0) {
1649 return -1;
1652 len = (window_size / SPAPR_TCE_PAGE_SIZE)*sizeof(uint64_t);
1653 if ((munmap(table, len) < 0) ||
1654 (close(fd) < 0)) {
1655 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
1656 strerror(errno));
1657 /* Leak the table */
1660 return 0;
1663 int kvmppc_reset_htab(int shift_hint)
1665 uint32_t shift = shift_hint;
1667 if (!kvm_enabled()) {
1668 /* Full emulation, tell caller to allocate htab itself */
1669 return 0;
1671 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
1672 int ret;
1673 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
1674 if (ret == -ENOTTY) {
1675 /* At least some versions of PR KVM advertise the
1676 * capability, but don't implement the ioctl(). Oops.
1677 * Return 0 so that we allocate the htab in qemu, as is
1678 * correct for PR. */
1679 return 0;
1680 } else if (ret < 0) {
1681 return ret;
1683 return shift;
1686 /* We have a kernel that predates the htab reset calls. For PR
1687 * KVM, we need to allocate the htab ourselves, for an HV KVM of
1688 * this era, it has allocated a 16MB fixed size hash table
1689 * already. Kernels of this era have the GET_PVINFO capability
1690 * only on PR, so we use this hack to determine the right
1691 * answer */
1692 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
1693 /* PR - tell caller to allocate htab */
1694 return 0;
1695 } else {
1696 /* HV - assume 16MB kernel allocated htab */
1697 return 24;
1701 static inline uint32_t mfpvr(void)
1703 uint32_t pvr;
1705 asm ("mfpvr %0"
1706 : "=r"(pvr));
1707 return pvr;
1710 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
1712 if (on) {
1713 *word |= flags;
1714 } else {
1715 *word &= ~flags;
1719 static void kvmppc_host_cpu_initfn(Object *obj)
1721 assert(kvm_enabled());
1724 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
1726 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
1727 uint32_t vmx = kvmppc_get_vmx();
1728 uint32_t dfp = kvmppc_get_dfp();
1729 uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
1730 uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
1732 /* Now fix up the class with information we can query from the host */
1733 pcc->pvr = mfpvr();
1735 if (vmx != -1) {
1736 /* Only override when we know what the host supports */
1737 alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
1738 alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
1740 if (dfp != -1) {
1741 /* Only override when we know what the host supports */
1742 alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
1745 if (dcache_size != -1) {
1746 pcc->l1_dcache_size = dcache_size;
1749 if (icache_size != -1) {
1750 pcc->l1_icache_size = icache_size;
1754 bool kvmppc_has_cap_epr(void)
1756 return cap_epr;
1759 bool kvmppc_has_cap_htab_fd(void)
1761 return cap_htab_fd;
1764 static int kvm_ppc_register_host_cpu_type(void)
1766 TypeInfo type_info = {
1767 .name = TYPE_HOST_POWERPC_CPU,
1768 .instance_init = kvmppc_host_cpu_initfn,
1769 .class_init = kvmppc_host_cpu_class_init,
1771 uint32_t host_pvr = mfpvr();
1772 PowerPCCPUClass *pvr_pcc;
1774 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
1775 if (pvr_pcc == NULL) {
1776 pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
1778 if (pvr_pcc == NULL) {
1779 return -1;
1781 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
1782 type_register(&type_info);
1783 return 0;
1786 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
1788 struct kvm_rtas_token_args args = {
1789 .token = token,
1792 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
1793 return -ENOENT;
1796 strncpy(args.name, function, sizeof(args.name));
1798 return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
1801 int kvmppc_get_htab_fd(bool write)
1803 struct kvm_get_htab_fd s = {
1804 .flags = write ? KVM_GET_HTAB_WRITE : 0,
1805 .start_index = 0,
1808 if (!cap_htab_fd) {
1809 fprintf(stderr, "KVM version doesn't support saving the hash table\n");
1810 return -1;
1813 return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
1816 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
1818 int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
1819 uint8_t buf[bufsize];
1820 ssize_t rc;
1822 do {
1823 rc = read(fd, buf, bufsize);
1824 if (rc < 0) {
1825 fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
1826 strerror(errno));
1827 return rc;
1828 } else if (rc) {
1829 /* Kernel already retuns data in BE format for the file */
1830 qemu_put_buffer(f, buf, rc);
1832 } while ((rc != 0)
1833 && ((max_ns < 0)
1834 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
1836 return (rc == 0) ? 1 : 0;
1839 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
1840 uint16_t n_valid, uint16_t n_invalid)
1842 struct kvm_get_htab_header *buf;
1843 size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
1844 ssize_t rc;
1846 buf = alloca(chunksize);
1847 /* This is KVM on ppc, so this is all big-endian */
1848 buf->index = index;
1849 buf->n_valid = n_valid;
1850 buf->n_invalid = n_invalid;
1852 qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
1854 rc = write(fd, buf, chunksize);
1855 if (rc < 0) {
1856 fprintf(stderr, "Error writing KVM hash table: %s\n",
1857 strerror(errno));
1858 return rc;
1860 if (rc != chunksize) {
1861 /* We should never get a short write on a single chunk */
1862 fprintf(stderr, "Short write, restoring KVM hash table\n");
1863 return -1;
1865 return 0;
1868 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
1870 return true;
1873 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
1875 return 1;
1878 int kvm_arch_on_sigbus(int code, void *addr)
1880 return 1;
1883 void kvm_arch_init_irq_routing(KVMState *s)
1887 int kvm_arch_insert_sw_breakpoint(CPUState *cpu, struct kvm_sw_breakpoint *bp)
1889 return -EINVAL;
1892 int kvm_arch_remove_sw_breakpoint(CPUState *cpu, struct kvm_sw_breakpoint *bp)
1894 return -EINVAL;
1897 int kvm_arch_insert_hw_breakpoint(target_ulong addr, target_ulong len, int type)
1899 return -EINVAL;
1902 int kvm_arch_remove_hw_breakpoint(target_ulong addr, target_ulong len, int type)
1904 return -EINVAL;
1907 void kvm_arch_remove_all_hw_breakpoints(void)
1911 void kvm_arch_update_guest_debug(CPUState *cpu, struct kvm_guest_debug *dbg)
1915 struct kvm_get_htab_buf {
1916 struct kvm_get_htab_header header;
1918 * We require one extra byte for read
1920 target_ulong hpte[(HPTES_PER_GROUP * 2) + 1];
1923 uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index)
1925 int htab_fd;
1926 struct kvm_get_htab_fd ghf;
1927 struct kvm_get_htab_buf *hpte_buf;
1929 ghf.flags = 0;
1930 ghf.start_index = pte_index;
1931 htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
1932 if (htab_fd < 0) {
1933 goto error_out;
1936 hpte_buf = g_malloc0(sizeof(*hpte_buf));
1938 * Read the hpte group
1940 if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) {
1941 goto out_close;
1944 close(htab_fd);
1945 return (uint64_t)(uintptr_t) hpte_buf->hpte;
1947 out_close:
1948 g_free(hpte_buf);
1949 close(htab_fd);
1950 error_out:
1951 return 0;
1954 void kvmppc_hash64_free_pteg(uint64_t token)
1956 struct kvm_get_htab_buf *htab_buf;
1958 htab_buf = container_of((void *)(uintptr_t) token, struct kvm_get_htab_buf,
1959 hpte);
1960 g_free(htab_buf);
1961 return;
1964 void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
1965 target_ulong pte0, target_ulong pte1)
1967 int htab_fd;
1968 struct kvm_get_htab_fd ghf;
1969 struct kvm_get_htab_buf hpte_buf;
1971 ghf.flags = 0;
1972 ghf.start_index = 0; /* Ignored */
1973 htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
1974 if (htab_fd < 0) {
1975 goto error_out;
1978 hpte_buf.header.n_valid = 1;
1979 hpte_buf.header.n_invalid = 0;
1980 hpte_buf.header.index = pte_index;
1981 hpte_buf.hpte[0] = pte0;
1982 hpte_buf.hpte[1] = pte1;
1984 * Write the hpte entry.
1985 * CAUTION: write() has the warn_unused_result attribute. Hence we
1986 * need to check the return value, even though we do nothing.
1988 if (write(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) {
1989 goto out_close;
1992 out_close:
1993 close(htab_fd);
1994 return;
1996 error_out:
1997 return;