target-ppc: Add PMC7/8 to 970 class
[qemu/ar7.git] / target-ppc / kvm.c
blobef691feebce230af9b662a3b91c7dd9efd47fcc4
1 /*
2 * PowerPC implementation of KVM hooks
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
7 * Authors:
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
17 #include <dirent.h>
18 #include <sys/types.h>
19 #include <sys/ioctl.h>
20 #include <sys/mman.h>
21 #include <sys/vfs.h>
23 #include <linux/kvm.h>
25 #include "qemu-common.h"
26 #include "qemu/timer.h"
27 #include "sysemu/sysemu.h"
28 #include "sysemu/kvm.h"
29 #include "kvm_ppc.h"
30 #include "cpu.h"
31 #include "sysemu/cpus.h"
32 #include "sysemu/device_tree.h"
33 #include "mmu-hash64.h"
35 #include "hw/sysbus.h"
36 #include "hw/ppc/spapr.h"
37 #include "hw/ppc/spapr_vio.h"
38 #include "hw/ppc/ppc.h"
39 #include "sysemu/watchdog.h"
40 #include "trace.h"
42 //#define DEBUG_KVM
44 #ifdef DEBUG_KVM
45 #define DPRINTF(fmt, ...) \
46 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
47 #else
48 #define DPRINTF(fmt, ...) \
49 do { } while (0)
50 #endif
52 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
54 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
55 KVM_CAP_LAST_INFO
58 static int cap_interrupt_unset = false;
59 static int cap_interrupt_level = false;
60 static int cap_segstate;
61 static int cap_booke_sregs;
62 static int cap_ppc_smt;
63 static int cap_ppc_rma;
64 static int cap_spapr_tce;
65 static int cap_spapr_multitce;
66 static int cap_hior;
67 static int cap_one_reg;
68 static int cap_epr;
69 static int cap_ppc_watchdog;
70 static int cap_papr;
71 static int cap_htab_fd;
72 static int cap_fixup_hcalls;
74 /* XXX We have a race condition where we actually have a level triggered
75 * interrupt, but the infrastructure can't expose that yet, so the guest
76 * takes but ignores it, goes to sleep and never gets notified that there's
77 * still an interrupt pending.
79 * As a quick workaround, let's just wake up again 20 ms after we injected
80 * an interrupt. That way we can assure that we're always reinjecting
81 * interrupts in case the guest swallowed them.
83 static QEMUTimer *idle_timer;
85 static void kvm_kick_cpu(void *opaque)
87 PowerPCCPU *cpu = opaque;
89 qemu_cpu_kick(CPU(cpu));
92 static int kvm_ppc_register_host_cpu_type(void);
94 int kvm_arch_init(KVMState *s)
96 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
97 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
98 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
99 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
100 cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
101 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
102 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
103 cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
104 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
105 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
106 cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
107 cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
108 /* Note: we don't set cap_papr here, because this capability is
109 * only activated after this by kvmppc_set_papr() */
110 cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
111 cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
113 if (!cap_interrupt_level) {
114 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
115 "VM to stall at times!\n");
118 kvm_ppc_register_host_cpu_type();
120 return 0;
123 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
125 CPUPPCState *cenv = &cpu->env;
126 CPUState *cs = CPU(cpu);
127 struct kvm_sregs sregs;
128 int ret;
130 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
131 /* What we're really trying to say is "if we're on BookE, we use
132 the native PVR for now". This is the only sane way to check
133 it though, so we potentially confuse users that they can run
134 BookE guests on BookS. Let's hope nobody dares enough :) */
135 return 0;
136 } else {
137 if (!cap_segstate) {
138 fprintf(stderr, "kvm error: missing PVR setting capability\n");
139 return -ENOSYS;
143 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
144 if (ret) {
145 return ret;
148 sregs.pvr = cenv->spr[SPR_PVR];
149 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
152 /* Set up a shared TLB array with KVM */
153 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
155 CPUPPCState *env = &cpu->env;
156 CPUState *cs = CPU(cpu);
157 struct kvm_book3e_206_tlb_params params = {};
158 struct kvm_config_tlb cfg = {};
159 unsigned int entries = 0;
160 int ret, i;
162 if (!kvm_enabled() ||
163 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
164 return 0;
167 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
169 for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
170 params.tlb_sizes[i] = booke206_tlb_size(env, i);
171 params.tlb_ways[i] = booke206_tlb_ways(env, i);
172 entries += params.tlb_sizes[i];
175 assert(entries == env->nb_tlb);
176 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
178 env->tlb_dirty = true;
180 cfg.array = (uintptr_t)env->tlb.tlbm;
181 cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
182 cfg.params = (uintptr_t)&params;
183 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
185 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
186 if (ret < 0) {
187 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
188 __func__, strerror(-ret));
189 return ret;
192 env->kvm_sw_tlb = true;
193 return 0;
197 #if defined(TARGET_PPC64)
198 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
199 struct kvm_ppc_smmu_info *info)
201 CPUPPCState *env = &cpu->env;
202 CPUState *cs = CPU(cpu);
204 memset(info, 0, sizeof(*info));
206 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
207 * need to "guess" what the supported page sizes are.
209 * For that to work we make a few assumptions:
211 * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
212 * KVM which only supports 4K and 16M pages, but supports them
213 * regardless of the backing store characteritics. We also don't
214 * support 1T segments.
216 * This is safe as if HV KVM ever supports that capability or PR
217 * KVM grows supports for more page/segment sizes, those versions
218 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
219 * will not hit this fallback
221 * - Else we are running HV KVM. This means we only support page
222 * sizes that fit in the backing store. Additionally we only
223 * advertize 64K pages if the processor is ARCH 2.06 and we assume
224 * P7 encodings for the SLB and hash table. Here too, we assume
225 * support for any newer processor will mean a kernel that
226 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
227 * this fallback.
229 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
230 /* No flags */
231 info->flags = 0;
232 info->slb_size = 64;
234 /* Standard 4k base page size segment */
235 info->sps[0].page_shift = 12;
236 info->sps[0].slb_enc = 0;
237 info->sps[0].enc[0].page_shift = 12;
238 info->sps[0].enc[0].pte_enc = 0;
240 /* Standard 16M large page size segment */
241 info->sps[1].page_shift = 24;
242 info->sps[1].slb_enc = SLB_VSID_L;
243 info->sps[1].enc[0].page_shift = 24;
244 info->sps[1].enc[0].pte_enc = 0;
245 } else {
246 int i = 0;
248 /* HV KVM has backing store size restrictions */
249 info->flags = KVM_PPC_PAGE_SIZES_REAL;
251 if (env->mmu_model & POWERPC_MMU_1TSEG) {
252 info->flags |= KVM_PPC_1T_SEGMENTS;
255 if (env->mmu_model == POWERPC_MMU_2_06) {
256 info->slb_size = 32;
257 } else {
258 info->slb_size = 64;
261 /* Standard 4k base page size segment */
262 info->sps[i].page_shift = 12;
263 info->sps[i].slb_enc = 0;
264 info->sps[i].enc[0].page_shift = 12;
265 info->sps[i].enc[0].pte_enc = 0;
266 i++;
268 /* 64K on MMU 2.06 */
269 if (env->mmu_model == POWERPC_MMU_2_06) {
270 info->sps[i].page_shift = 16;
271 info->sps[i].slb_enc = 0x110;
272 info->sps[i].enc[0].page_shift = 16;
273 info->sps[i].enc[0].pte_enc = 1;
274 i++;
277 /* Standard 16M large page size segment */
278 info->sps[i].page_shift = 24;
279 info->sps[i].slb_enc = SLB_VSID_L;
280 info->sps[i].enc[0].page_shift = 24;
281 info->sps[i].enc[0].pte_enc = 0;
285 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
287 CPUState *cs = CPU(cpu);
288 int ret;
290 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
291 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
292 if (ret == 0) {
293 return;
297 kvm_get_fallback_smmu_info(cpu, info);
300 static long getrampagesize(void)
302 struct statfs fs;
303 int ret;
305 if (!mem_path) {
306 /* guest RAM is backed by normal anonymous pages */
307 return getpagesize();
310 do {
311 ret = statfs(mem_path, &fs);
312 } while (ret != 0 && errno == EINTR);
314 if (ret != 0) {
315 fprintf(stderr, "Couldn't statfs() memory path: %s\n",
316 strerror(errno));
317 exit(1);
320 #define HUGETLBFS_MAGIC 0x958458f6
322 if (fs.f_type != HUGETLBFS_MAGIC) {
323 /* Explicit mempath, but it's ordinary pages */
324 return getpagesize();
327 /* It's hugepage, return the huge page size */
328 return fs.f_bsize;
331 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
333 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
334 return true;
337 return (1ul << shift) <= rampgsize;
340 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
342 static struct kvm_ppc_smmu_info smmu_info;
343 static bool has_smmu_info;
344 CPUPPCState *env = &cpu->env;
345 long rampagesize;
346 int iq, ik, jq, jk;
348 /* We only handle page sizes for 64-bit server guests for now */
349 if (!(env->mmu_model & POWERPC_MMU_64)) {
350 return;
353 /* Collect MMU info from kernel if not already */
354 if (!has_smmu_info) {
355 kvm_get_smmu_info(cpu, &smmu_info);
356 has_smmu_info = true;
359 rampagesize = getrampagesize();
361 /* Convert to QEMU form */
362 memset(&env->sps, 0, sizeof(env->sps));
365 * XXX This loop should be an entry wide AND of the capabilities that
366 * the selected CPU has with the capabilities that KVM supports.
368 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
369 struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
370 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
372 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
373 ksps->page_shift)) {
374 continue;
376 qsps->page_shift = ksps->page_shift;
377 qsps->slb_enc = ksps->slb_enc;
378 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
379 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
380 ksps->enc[jk].page_shift)) {
381 continue;
383 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
384 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
385 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
386 break;
389 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
390 break;
393 env->slb_nr = smmu_info.slb_size;
394 if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
395 env->mmu_model &= ~POWERPC_MMU_1TSEG;
398 #else /* defined (TARGET_PPC64) */
400 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
404 #endif /* !defined (TARGET_PPC64) */
406 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
408 return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
411 int kvm_arch_init_vcpu(CPUState *cs)
413 PowerPCCPU *cpu = POWERPC_CPU(cs);
414 CPUPPCState *cenv = &cpu->env;
415 int ret;
417 /* Gather server mmu info from KVM and update the CPU state */
418 kvm_fixup_page_sizes(cpu);
420 /* Synchronize sregs with kvm */
421 ret = kvm_arch_sync_sregs(cpu);
422 if (ret) {
423 return ret;
426 idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
428 /* Some targets support access to KVM's guest TLB. */
429 switch (cenv->mmu_model) {
430 case POWERPC_MMU_BOOKE206:
431 ret = kvm_booke206_tlb_init(cpu);
432 break;
433 default:
434 break;
437 return ret;
440 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
442 CPUPPCState *env = &cpu->env;
443 CPUState *cs = CPU(cpu);
444 struct kvm_dirty_tlb dirty_tlb;
445 unsigned char *bitmap;
446 int ret;
448 if (!env->kvm_sw_tlb) {
449 return;
452 bitmap = g_malloc((env->nb_tlb + 7) / 8);
453 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
455 dirty_tlb.bitmap = (uintptr_t)bitmap;
456 dirty_tlb.num_dirty = env->nb_tlb;
458 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
459 if (ret) {
460 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
461 __func__, strerror(-ret));
464 g_free(bitmap);
467 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
469 PowerPCCPU *cpu = POWERPC_CPU(cs);
470 CPUPPCState *env = &cpu->env;
471 union {
472 uint32_t u32;
473 uint64_t u64;
474 } val;
475 struct kvm_one_reg reg = {
476 .id = id,
477 .addr = (uintptr_t) &val,
479 int ret;
481 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
482 if (ret != 0) {
483 trace_kvm_failed_spr_get(spr, strerror(errno));
484 } else {
485 switch (id & KVM_REG_SIZE_MASK) {
486 case KVM_REG_SIZE_U32:
487 env->spr[spr] = val.u32;
488 break;
490 case KVM_REG_SIZE_U64:
491 env->spr[spr] = val.u64;
492 break;
494 default:
495 /* Don't handle this size yet */
496 abort();
501 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
503 PowerPCCPU *cpu = POWERPC_CPU(cs);
504 CPUPPCState *env = &cpu->env;
505 union {
506 uint32_t u32;
507 uint64_t u64;
508 } val;
509 struct kvm_one_reg reg = {
510 .id = id,
511 .addr = (uintptr_t) &val,
513 int ret;
515 switch (id & KVM_REG_SIZE_MASK) {
516 case KVM_REG_SIZE_U32:
517 val.u32 = env->spr[spr];
518 break;
520 case KVM_REG_SIZE_U64:
521 val.u64 = env->spr[spr];
522 break;
524 default:
525 /* Don't handle this size yet */
526 abort();
529 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
530 if (ret != 0) {
531 trace_kvm_failed_spr_set(spr, strerror(errno));
535 static int kvm_put_fp(CPUState *cs)
537 PowerPCCPU *cpu = POWERPC_CPU(cs);
538 CPUPPCState *env = &cpu->env;
539 struct kvm_one_reg reg;
540 int i;
541 int ret;
543 if (env->insns_flags & PPC_FLOAT) {
544 uint64_t fpscr = env->fpscr;
545 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
547 reg.id = KVM_REG_PPC_FPSCR;
548 reg.addr = (uintptr_t)&fpscr;
549 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
550 if (ret < 0) {
551 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
552 return ret;
555 for (i = 0; i < 32; i++) {
556 uint64_t vsr[2];
558 vsr[0] = float64_val(env->fpr[i]);
559 vsr[1] = env->vsr[i];
560 reg.addr = (uintptr_t) &vsr;
561 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
563 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
564 if (ret < 0) {
565 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
566 i, strerror(errno));
567 return ret;
572 if (env->insns_flags & PPC_ALTIVEC) {
573 reg.id = KVM_REG_PPC_VSCR;
574 reg.addr = (uintptr_t)&env->vscr;
575 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
576 if (ret < 0) {
577 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
578 return ret;
581 for (i = 0; i < 32; i++) {
582 reg.id = KVM_REG_PPC_VR(i);
583 reg.addr = (uintptr_t)&env->avr[i];
584 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
585 if (ret < 0) {
586 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
587 return ret;
592 return 0;
595 static int kvm_get_fp(CPUState *cs)
597 PowerPCCPU *cpu = POWERPC_CPU(cs);
598 CPUPPCState *env = &cpu->env;
599 struct kvm_one_reg reg;
600 int i;
601 int ret;
603 if (env->insns_flags & PPC_FLOAT) {
604 uint64_t fpscr;
605 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
607 reg.id = KVM_REG_PPC_FPSCR;
608 reg.addr = (uintptr_t)&fpscr;
609 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
610 if (ret < 0) {
611 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
612 return ret;
613 } else {
614 env->fpscr = fpscr;
617 for (i = 0; i < 32; i++) {
618 uint64_t vsr[2];
620 reg.addr = (uintptr_t) &vsr;
621 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
623 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
624 if (ret < 0) {
625 DPRINTF("Unable to get %s%d from KVM: %s\n",
626 vsx ? "VSR" : "FPR", i, strerror(errno));
627 return ret;
628 } else {
629 env->fpr[i] = vsr[0];
630 if (vsx) {
631 env->vsr[i] = vsr[1];
637 if (env->insns_flags & PPC_ALTIVEC) {
638 reg.id = KVM_REG_PPC_VSCR;
639 reg.addr = (uintptr_t)&env->vscr;
640 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
641 if (ret < 0) {
642 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
643 return ret;
646 for (i = 0; i < 32; i++) {
647 reg.id = KVM_REG_PPC_VR(i);
648 reg.addr = (uintptr_t)&env->avr[i];
649 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
650 if (ret < 0) {
651 DPRINTF("Unable to get VR%d from KVM: %s\n",
652 i, strerror(errno));
653 return ret;
658 return 0;
661 #if defined(TARGET_PPC64)
662 static int kvm_get_vpa(CPUState *cs)
664 PowerPCCPU *cpu = POWERPC_CPU(cs);
665 CPUPPCState *env = &cpu->env;
666 struct kvm_one_reg reg;
667 int ret;
669 reg.id = KVM_REG_PPC_VPA_ADDR;
670 reg.addr = (uintptr_t)&env->vpa_addr;
671 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
672 if (ret < 0) {
673 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
674 return ret;
677 assert((uintptr_t)&env->slb_shadow_size
678 == ((uintptr_t)&env->slb_shadow_addr + 8));
679 reg.id = KVM_REG_PPC_VPA_SLB;
680 reg.addr = (uintptr_t)&env->slb_shadow_addr;
681 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
682 if (ret < 0) {
683 DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
684 strerror(errno));
685 return ret;
688 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
689 reg.id = KVM_REG_PPC_VPA_DTL;
690 reg.addr = (uintptr_t)&env->dtl_addr;
691 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
692 if (ret < 0) {
693 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
694 strerror(errno));
695 return ret;
698 return 0;
701 static int kvm_put_vpa(CPUState *cs)
703 PowerPCCPU *cpu = POWERPC_CPU(cs);
704 CPUPPCState *env = &cpu->env;
705 struct kvm_one_reg reg;
706 int ret;
708 /* SLB shadow or DTL can't be registered unless a master VPA is
709 * registered. That means when restoring state, if a VPA *is*
710 * registered, we need to set that up first. If not, we need to
711 * deregister the others before deregistering the master VPA */
712 assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
714 if (env->vpa_addr) {
715 reg.id = KVM_REG_PPC_VPA_ADDR;
716 reg.addr = (uintptr_t)&env->vpa_addr;
717 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
718 if (ret < 0) {
719 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
720 return ret;
724 assert((uintptr_t)&env->slb_shadow_size
725 == ((uintptr_t)&env->slb_shadow_addr + 8));
726 reg.id = KVM_REG_PPC_VPA_SLB;
727 reg.addr = (uintptr_t)&env->slb_shadow_addr;
728 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
729 if (ret < 0) {
730 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
731 return ret;
734 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
735 reg.id = KVM_REG_PPC_VPA_DTL;
736 reg.addr = (uintptr_t)&env->dtl_addr;
737 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
738 if (ret < 0) {
739 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
740 strerror(errno));
741 return ret;
744 if (!env->vpa_addr) {
745 reg.id = KVM_REG_PPC_VPA_ADDR;
746 reg.addr = (uintptr_t)&env->vpa_addr;
747 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
748 if (ret < 0) {
749 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
750 return ret;
754 return 0;
756 #endif /* TARGET_PPC64 */
758 int kvm_arch_put_registers(CPUState *cs, int level)
760 PowerPCCPU *cpu = POWERPC_CPU(cs);
761 CPUPPCState *env = &cpu->env;
762 struct kvm_regs regs;
763 int ret;
764 int i;
766 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
767 if (ret < 0) {
768 return ret;
771 regs.ctr = env->ctr;
772 regs.lr = env->lr;
773 regs.xer = cpu_read_xer(env);
774 regs.msr = env->msr;
775 regs.pc = env->nip;
777 regs.srr0 = env->spr[SPR_SRR0];
778 regs.srr1 = env->spr[SPR_SRR1];
780 regs.sprg0 = env->spr[SPR_SPRG0];
781 regs.sprg1 = env->spr[SPR_SPRG1];
782 regs.sprg2 = env->spr[SPR_SPRG2];
783 regs.sprg3 = env->spr[SPR_SPRG3];
784 regs.sprg4 = env->spr[SPR_SPRG4];
785 regs.sprg5 = env->spr[SPR_SPRG5];
786 regs.sprg6 = env->spr[SPR_SPRG6];
787 regs.sprg7 = env->spr[SPR_SPRG7];
789 regs.pid = env->spr[SPR_BOOKE_PID];
791 for (i = 0;i < 32; i++)
792 regs.gpr[i] = env->gpr[i];
794 regs.cr = 0;
795 for (i = 0; i < 8; i++) {
796 regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
799 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
800 if (ret < 0)
801 return ret;
803 kvm_put_fp(cs);
805 if (env->tlb_dirty) {
806 kvm_sw_tlb_put(cpu);
807 env->tlb_dirty = false;
810 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
811 struct kvm_sregs sregs;
813 sregs.pvr = env->spr[SPR_PVR];
815 sregs.u.s.sdr1 = env->spr[SPR_SDR1];
817 /* Sync SLB */
818 #ifdef TARGET_PPC64
819 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
820 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
821 if (env->slb[i].esid & SLB_ESID_V) {
822 sregs.u.s.ppc64.slb[i].slbe |= i;
824 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
826 #endif
828 /* Sync SRs */
829 for (i = 0; i < 16; i++) {
830 sregs.u.s.ppc32.sr[i] = env->sr[i];
833 /* Sync BATs */
834 for (i = 0; i < 8; i++) {
835 /* Beware. We have to swap upper and lower bits here */
836 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
837 | env->DBAT[1][i];
838 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
839 | env->IBAT[1][i];
842 ret = kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
843 if (ret) {
844 return ret;
848 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
849 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
852 if (cap_one_reg) {
853 int i;
855 /* We deliberately ignore errors here, for kernels which have
856 * the ONE_REG calls, but don't support the specific
857 * registers, there's a reasonable chance things will still
858 * work, at least until we try to migrate. */
859 for (i = 0; i < 1024; i++) {
860 uint64_t id = env->spr_cb[i].one_reg_id;
862 if (id != 0) {
863 kvm_put_one_spr(cs, id, i);
867 #ifdef TARGET_PPC64
868 if (cap_papr) {
869 if (kvm_put_vpa(cs) < 0) {
870 DPRINTF("Warning: Unable to set VPA information to KVM\n");
874 kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
875 #endif /* TARGET_PPC64 */
878 return ret;
881 int kvm_arch_get_registers(CPUState *cs)
883 PowerPCCPU *cpu = POWERPC_CPU(cs);
884 CPUPPCState *env = &cpu->env;
885 struct kvm_regs regs;
886 struct kvm_sregs sregs;
887 uint32_t cr;
888 int i, ret;
890 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
891 if (ret < 0)
892 return ret;
894 cr = regs.cr;
895 for (i = 7; i >= 0; i--) {
896 env->crf[i] = cr & 15;
897 cr >>= 4;
900 env->ctr = regs.ctr;
901 env->lr = regs.lr;
902 cpu_write_xer(env, regs.xer);
903 env->msr = regs.msr;
904 env->nip = regs.pc;
906 env->spr[SPR_SRR0] = regs.srr0;
907 env->spr[SPR_SRR1] = regs.srr1;
909 env->spr[SPR_SPRG0] = regs.sprg0;
910 env->spr[SPR_SPRG1] = regs.sprg1;
911 env->spr[SPR_SPRG2] = regs.sprg2;
912 env->spr[SPR_SPRG3] = regs.sprg3;
913 env->spr[SPR_SPRG4] = regs.sprg4;
914 env->spr[SPR_SPRG5] = regs.sprg5;
915 env->spr[SPR_SPRG6] = regs.sprg6;
916 env->spr[SPR_SPRG7] = regs.sprg7;
918 env->spr[SPR_BOOKE_PID] = regs.pid;
920 for (i = 0;i < 32; i++)
921 env->gpr[i] = regs.gpr[i];
923 kvm_get_fp(cs);
925 if (cap_booke_sregs) {
926 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
927 if (ret < 0) {
928 return ret;
931 if (sregs.u.e.features & KVM_SREGS_E_BASE) {
932 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
933 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
934 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
935 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
936 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
937 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
938 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
939 env->spr[SPR_DECR] = sregs.u.e.dec;
940 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
941 env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
942 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
945 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
946 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
947 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
948 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
949 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
950 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
953 if (sregs.u.e.features & KVM_SREGS_E_64) {
954 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
957 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
958 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
961 if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
962 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
963 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
964 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
965 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
966 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
967 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
968 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
969 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
970 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
971 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
972 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
973 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
974 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
975 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
976 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
977 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
979 if (sregs.u.e.features & KVM_SREGS_E_SPE) {
980 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
981 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
982 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
985 if (sregs.u.e.features & KVM_SREGS_E_PM) {
986 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
989 if (sregs.u.e.features & KVM_SREGS_E_PC) {
990 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
991 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
995 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
996 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
997 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
998 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
999 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1000 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1001 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1002 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1003 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1004 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1005 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1008 if (sregs.u.e.features & KVM_SREGS_EXP) {
1009 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1012 if (sregs.u.e.features & KVM_SREGS_E_PD) {
1013 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1014 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1017 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1018 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1019 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1020 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1022 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1023 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1024 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1029 if (cap_segstate) {
1030 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
1031 if (ret < 0) {
1032 return ret;
1035 if (!env->external_htab) {
1036 ppc_store_sdr1(env, sregs.u.s.sdr1);
1039 /* Sync SLB */
1040 #ifdef TARGET_PPC64
1042 * The packed SLB array we get from KVM_GET_SREGS only contains
1043 * information about valid entries. So we flush our internal
1044 * copy to get rid of stale ones, then put all valid SLB entries
1045 * back in.
1047 memset(env->slb, 0, sizeof(env->slb));
1048 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1049 target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1050 target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1052 * Only restore valid entries
1054 if (rb & SLB_ESID_V) {
1055 ppc_store_slb(env, rb, rs);
1058 #endif
1060 /* Sync SRs */
1061 for (i = 0; i < 16; i++) {
1062 env->sr[i] = sregs.u.s.ppc32.sr[i];
1065 /* Sync BATs */
1066 for (i = 0; i < 8; i++) {
1067 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1068 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1069 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1070 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1074 if (cap_hior) {
1075 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1078 if (cap_one_reg) {
1079 int i;
1081 /* We deliberately ignore errors here, for kernels which have
1082 * the ONE_REG calls, but don't support the specific
1083 * registers, there's a reasonable chance things will still
1084 * work, at least until we try to migrate. */
1085 for (i = 0; i < 1024; i++) {
1086 uint64_t id = env->spr_cb[i].one_reg_id;
1088 if (id != 0) {
1089 kvm_get_one_spr(cs, id, i);
1093 #ifdef TARGET_PPC64
1094 if (cap_papr) {
1095 if (kvm_get_vpa(cs) < 0) {
1096 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1100 kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1101 #endif
1104 return 0;
1107 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1109 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1111 if (irq != PPC_INTERRUPT_EXT) {
1112 return 0;
1115 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1116 return 0;
1119 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1121 return 0;
1124 #if defined(TARGET_PPCEMB)
1125 #define PPC_INPUT_INT PPC40x_INPUT_INT
1126 #elif defined(TARGET_PPC64)
1127 #define PPC_INPUT_INT PPC970_INPUT_INT
1128 #else
1129 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1130 #endif
1132 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1134 PowerPCCPU *cpu = POWERPC_CPU(cs);
1135 CPUPPCState *env = &cpu->env;
1136 int r;
1137 unsigned irq;
1139 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1140 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1141 if (!cap_interrupt_level &&
1142 run->ready_for_interrupt_injection &&
1143 (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1144 (env->irq_input_state & (1<<PPC_INPUT_INT)))
1146 /* For now KVM disregards the 'irq' argument. However, in the
1147 * future KVM could cache it in-kernel to avoid a heavyweight exit
1148 * when reading the UIC.
1150 irq = KVM_INTERRUPT_SET;
1152 DPRINTF("injected interrupt %d\n", irq);
1153 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1154 if (r < 0) {
1155 printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1158 /* Always wake up soon in case the interrupt was level based */
1159 timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1160 (get_ticks_per_sec() / 50));
1163 /* We don't know if there are more interrupts pending after this. However,
1164 * the guest will return to userspace in the course of handling this one
1165 * anyways, so we will get a chance to deliver the rest. */
1168 void kvm_arch_post_run(CPUState *cpu, struct kvm_run *run)
1172 int kvm_arch_process_async_events(CPUState *cs)
1174 return cs->halted;
1177 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1179 CPUState *cs = CPU(cpu);
1180 CPUPPCState *env = &cpu->env;
1182 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1183 cs->halted = 1;
1184 cs->exception_index = EXCP_HLT;
1187 return 0;
1190 /* map dcr access to existing qemu dcr emulation */
1191 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1193 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1194 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1196 return 0;
1199 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1201 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1202 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1204 return 0;
1207 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1209 PowerPCCPU *cpu = POWERPC_CPU(cs);
1210 CPUPPCState *env = &cpu->env;
1211 int ret;
1213 switch (run->exit_reason) {
1214 case KVM_EXIT_DCR:
1215 if (run->dcr.is_write) {
1216 DPRINTF("handle dcr write\n");
1217 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1218 } else {
1219 DPRINTF("handle dcr read\n");
1220 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1222 break;
1223 case KVM_EXIT_HLT:
1224 DPRINTF("handle halt\n");
1225 ret = kvmppc_handle_halt(cpu);
1226 break;
1227 #if defined(TARGET_PPC64)
1228 case KVM_EXIT_PAPR_HCALL:
1229 DPRINTF("handle PAPR hypercall\n");
1230 run->papr_hcall.ret = spapr_hypercall(cpu,
1231 run->papr_hcall.nr,
1232 run->papr_hcall.args);
1233 ret = 0;
1234 break;
1235 #endif
1236 case KVM_EXIT_EPR:
1237 DPRINTF("handle epr\n");
1238 run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1239 ret = 0;
1240 break;
1241 case KVM_EXIT_WATCHDOG:
1242 DPRINTF("handle watchdog expiry\n");
1243 watchdog_perform_action();
1244 ret = 0;
1245 break;
1247 default:
1248 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1249 ret = -1;
1250 break;
1253 return ret;
1256 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1258 CPUState *cs = CPU(cpu);
1259 uint32_t bits = tsr_bits;
1260 struct kvm_one_reg reg = {
1261 .id = KVM_REG_PPC_OR_TSR,
1262 .addr = (uintptr_t) &bits,
1265 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1268 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1271 CPUState *cs = CPU(cpu);
1272 uint32_t bits = tsr_bits;
1273 struct kvm_one_reg reg = {
1274 .id = KVM_REG_PPC_CLEAR_TSR,
1275 .addr = (uintptr_t) &bits,
1278 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1281 int kvmppc_set_tcr(PowerPCCPU *cpu)
1283 CPUState *cs = CPU(cpu);
1284 CPUPPCState *env = &cpu->env;
1285 uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1287 struct kvm_one_reg reg = {
1288 .id = KVM_REG_PPC_TCR,
1289 .addr = (uintptr_t) &tcr,
1292 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1295 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1297 CPUState *cs = CPU(cpu);
1298 int ret;
1300 if (!kvm_enabled()) {
1301 return -1;
1304 if (!cap_ppc_watchdog) {
1305 printf("warning: KVM does not support watchdog");
1306 return -1;
1309 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1310 if (ret < 0) {
1311 fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1312 __func__, strerror(-ret));
1313 return ret;
1316 return ret;
1319 static int read_cpuinfo(const char *field, char *value, int len)
1321 FILE *f;
1322 int ret = -1;
1323 int field_len = strlen(field);
1324 char line[512];
1326 f = fopen("/proc/cpuinfo", "r");
1327 if (!f) {
1328 return -1;
1331 do {
1332 if(!fgets(line, sizeof(line), f)) {
1333 break;
1335 if (!strncmp(line, field, field_len)) {
1336 pstrcpy(value, len, line);
1337 ret = 0;
1338 break;
1340 } while(*line);
1342 fclose(f);
1344 return ret;
1347 uint32_t kvmppc_get_tbfreq(void)
1349 char line[512];
1350 char *ns;
1351 uint32_t retval = get_ticks_per_sec();
1353 if (read_cpuinfo("timebase", line, sizeof(line))) {
1354 return retval;
1357 if (!(ns = strchr(line, ':'))) {
1358 return retval;
1361 ns++;
1363 retval = atoi(ns);
1364 return retval;
1367 /* Try to find a device tree node for a CPU with clock-frequency property */
1368 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1370 struct dirent *dirp;
1371 DIR *dp;
1373 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1374 printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1375 return -1;
1378 buf[0] = '\0';
1379 while ((dirp = readdir(dp)) != NULL) {
1380 FILE *f;
1381 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1382 dirp->d_name);
1383 f = fopen(buf, "r");
1384 if (f) {
1385 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1386 fclose(f);
1387 break;
1389 buf[0] = '\0';
1391 closedir(dp);
1392 if (buf[0] == '\0') {
1393 printf("Unknown host!\n");
1394 return -1;
1397 return 0;
1400 /* Read a CPU node property from the host device tree that's a single
1401 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1402 * (can't find or open the property, or doesn't understand the
1403 * format) */
1404 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1406 char buf[PATH_MAX];
1407 union {
1408 uint32_t v32;
1409 uint64_t v64;
1410 } u;
1411 FILE *f;
1412 int len;
1414 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1415 return -1;
1418 strncat(buf, "/", sizeof(buf) - strlen(buf));
1419 strncat(buf, propname, sizeof(buf) - strlen(buf));
1421 f = fopen(buf, "rb");
1422 if (!f) {
1423 return -1;
1426 len = fread(&u, 1, sizeof(u), f);
1427 fclose(f);
1428 switch (len) {
1429 case 4:
1430 /* property is a 32-bit quantity */
1431 return be32_to_cpu(u.v32);
1432 case 8:
1433 return be64_to_cpu(u.v64);
1436 return 0;
1439 uint64_t kvmppc_get_clockfreq(void)
1441 return kvmppc_read_int_cpu_dt("clock-frequency");
1444 uint32_t kvmppc_get_vmx(void)
1446 return kvmppc_read_int_cpu_dt("ibm,vmx");
1449 uint32_t kvmppc_get_dfp(void)
1451 return kvmppc_read_int_cpu_dt("ibm,dfp");
1454 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1456 PowerPCCPU *cpu = ppc_env_get_cpu(env);
1457 CPUState *cs = CPU(cpu);
1459 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1460 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1461 return 0;
1464 return 1;
1467 int kvmppc_get_hasidle(CPUPPCState *env)
1469 struct kvm_ppc_pvinfo pvinfo;
1471 if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1472 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1473 return 1;
1476 return 0;
1479 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1481 uint32_t *hc = (uint32_t*)buf;
1482 struct kvm_ppc_pvinfo pvinfo;
1484 if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1485 memcpy(buf, pvinfo.hcall, buf_len);
1486 return 0;
1490 * Fallback to always fail hypercalls:
1492 * li r3, -1
1493 * nop
1494 * nop
1495 * nop
1498 hc[0] = 0x3860ffff;
1499 hc[1] = 0x60000000;
1500 hc[2] = 0x60000000;
1501 hc[3] = 0x60000000;
1503 return 0;
1506 void kvmppc_set_papr(PowerPCCPU *cpu)
1508 CPUState *cs = CPU(cpu);
1509 int ret;
1511 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
1512 if (ret) {
1513 cpu_abort(cs, "This KVM version does not support PAPR\n");
1516 /* Update the capability flag so we sync the right information
1517 * with kvm */
1518 cap_papr = 1;
1521 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t cpu_version)
1523 return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &cpu_version);
1526 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
1528 CPUState *cs = CPU(cpu);
1529 int ret;
1531 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
1532 if (ret && mpic_proxy) {
1533 cpu_abort(cs, "This KVM version does not support EPR\n");
1537 int kvmppc_smt_threads(void)
1539 return cap_ppc_smt ? cap_ppc_smt : 1;
1542 #ifdef TARGET_PPC64
1543 off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem)
1545 void *rma;
1546 off_t size;
1547 int fd;
1548 struct kvm_allocate_rma ret;
1549 MemoryRegion *rma_region;
1551 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
1552 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
1553 * not necessary on this hardware
1554 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
1556 * FIXME: We should allow the user to force contiguous RMA
1557 * allocation in the cap_ppc_rma==1 case.
1559 if (cap_ppc_rma < 2) {
1560 return 0;
1563 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
1564 if (fd < 0) {
1565 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
1566 strerror(errno));
1567 return -1;
1570 size = MIN(ret.rma_size, 256ul << 20);
1572 rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1573 if (rma == MAP_FAILED) {
1574 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
1575 return -1;
1578 rma_region = g_new(MemoryRegion, 1);
1579 memory_region_init_ram_ptr(rma_region, NULL, name, size, rma);
1580 vmstate_register_ram_global(rma_region);
1581 memory_region_add_subregion(sysmem, 0, rma_region);
1583 return size;
1586 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
1588 struct kvm_ppc_smmu_info info;
1589 long rampagesize, best_page_shift;
1590 int i;
1592 if (cap_ppc_rma >= 2) {
1593 return current_size;
1596 /* Find the largest hardware supported page size that's less than
1597 * or equal to the (logical) backing page size of guest RAM */
1598 kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
1599 rampagesize = getrampagesize();
1600 best_page_shift = 0;
1602 for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
1603 struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
1605 if (!sps->page_shift) {
1606 continue;
1609 if ((sps->page_shift > best_page_shift)
1610 && ((1UL << sps->page_shift) <= rampagesize)) {
1611 best_page_shift = sps->page_shift;
1615 return MIN(current_size,
1616 1ULL << (best_page_shift + hash_shift - 7));
1618 #endif
1620 bool kvmppc_spapr_use_multitce(void)
1622 return cap_spapr_multitce;
1625 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd)
1627 struct kvm_create_spapr_tce args = {
1628 .liobn = liobn,
1629 .window_size = window_size,
1631 long len;
1632 int fd;
1633 void *table;
1635 /* Must set fd to -1 so we don't try to munmap when called for
1636 * destroying the table, which the upper layers -will- do
1638 *pfd = -1;
1639 if (!cap_spapr_tce) {
1640 return NULL;
1643 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
1644 if (fd < 0) {
1645 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
1646 liobn);
1647 return NULL;
1650 len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
1651 /* FIXME: round this up to page size */
1653 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1654 if (table == MAP_FAILED) {
1655 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
1656 liobn);
1657 close(fd);
1658 return NULL;
1661 *pfd = fd;
1662 return table;
1665 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
1667 long len;
1669 if (fd < 0) {
1670 return -1;
1673 len = nb_table * sizeof(uint64_t);
1674 if ((munmap(table, len) < 0) ||
1675 (close(fd) < 0)) {
1676 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
1677 strerror(errno));
1678 /* Leak the table */
1681 return 0;
1684 int kvmppc_reset_htab(int shift_hint)
1686 uint32_t shift = shift_hint;
1688 if (!kvm_enabled()) {
1689 /* Full emulation, tell caller to allocate htab itself */
1690 return 0;
1692 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
1693 int ret;
1694 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
1695 if (ret == -ENOTTY) {
1696 /* At least some versions of PR KVM advertise the
1697 * capability, but don't implement the ioctl(). Oops.
1698 * Return 0 so that we allocate the htab in qemu, as is
1699 * correct for PR. */
1700 return 0;
1701 } else if (ret < 0) {
1702 return ret;
1704 return shift;
1707 /* We have a kernel that predates the htab reset calls. For PR
1708 * KVM, we need to allocate the htab ourselves, for an HV KVM of
1709 * this era, it has allocated a 16MB fixed size hash table
1710 * already. Kernels of this era have the GET_PVINFO capability
1711 * only on PR, so we use this hack to determine the right
1712 * answer */
1713 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
1714 /* PR - tell caller to allocate htab */
1715 return 0;
1716 } else {
1717 /* HV - assume 16MB kernel allocated htab */
1718 return 24;
1722 static inline uint32_t mfpvr(void)
1724 uint32_t pvr;
1726 asm ("mfpvr %0"
1727 : "=r"(pvr));
1728 return pvr;
1731 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
1733 if (on) {
1734 *word |= flags;
1735 } else {
1736 *word &= ~flags;
1740 static void kvmppc_host_cpu_initfn(Object *obj)
1742 assert(kvm_enabled());
1745 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
1747 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
1748 uint32_t vmx = kvmppc_get_vmx();
1749 uint32_t dfp = kvmppc_get_dfp();
1750 uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
1751 uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
1753 /* Now fix up the class with information we can query from the host */
1754 pcc->pvr = mfpvr();
1756 if (vmx != -1) {
1757 /* Only override when we know what the host supports */
1758 alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
1759 alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
1761 if (dfp != -1) {
1762 /* Only override when we know what the host supports */
1763 alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
1766 if (dcache_size != -1) {
1767 pcc->l1_dcache_size = dcache_size;
1770 if (icache_size != -1) {
1771 pcc->l1_icache_size = icache_size;
1775 bool kvmppc_has_cap_epr(void)
1777 return cap_epr;
1780 bool kvmppc_has_cap_htab_fd(void)
1782 return cap_htab_fd;
1785 bool kvmppc_has_cap_fixup_hcalls(void)
1787 return cap_fixup_hcalls;
1790 static PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc)
1792 ObjectClass *oc = OBJECT_CLASS(pcc);
1794 while (oc && !object_class_is_abstract(oc)) {
1795 oc = object_class_get_parent(oc);
1797 assert(oc);
1799 return POWERPC_CPU_CLASS(oc);
1802 static int kvm_ppc_register_host_cpu_type(void)
1804 TypeInfo type_info = {
1805 .name = TYPE_HOST_POWERPC_CPU,
1806 .instance_init = kvmppc_host_cpu_initfn,
1807 .class_init = kvmppc_host_cpu_class_init,
1809 uint32_t host_pvr = mfpvr();
1810 PowerPCCPUClass *pvr_pcc;
1811 DeviceClass *dc;
1813 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
1814 if (pvr_pcc == NULL) {
1815 pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
1817 if (pvr_pcc == NULL) {
1818 return -1;
1820 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
1821 type_register(&type_info);
1823 /* Register generic family CPU class for a family */
1824 pvr_pcc = ppc_cpu_get_family_class(pvr_pcc);
1825 dc = DEVICE_CLASS(pvr_pcc);
1826 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
1827 type_info.name = g_strdup_printf("%s-"TYPE_POWERPC_CPU, dc->desc);
1828 type_register(&type_info);
1830 return 0;
1833 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
1835 struct kvm_rtas_token_args args = {
1836 .token = token,
1839 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
1840 return -ENOENT;
1843 strncpy(args.name, function, sizeof(args.name));
1845 return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
1848 int kvmppc_get_htab_fd(bool write)
1850 struct kvm_get_htab_fd s = {
1851 .flags = write ? KVM_GET_HTAB_WRITE : 0,
1852 .start_index = 0,
1855 if (!cap_htab_fd) {
1856 fprintf(stderr, "KVM version doesn't support saving the hash table\n");
1857 return -1;
1860 return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
1863 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
1865 int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
1866 uint8_t buf[bufsize];
1867 ssize_t rc;
1869 do {
1870 rc = read(fd, buf, bufsize);
1871 if (rc < 0) {
1872 fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
1873 strerror(errno));
1874 return rc;
1875 } else if (rc) {
1876 /* Kernel already retuns data in BE format for the file */
1877 qemu_put_buffer(f, buf, rc);
1879 } while ((rc != 0)
1880 && ((max_ns < 0)
1881 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
1883 return (rc == 0) ? 1 : 0;
1886 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
1887 uint16_t n_valid, uint16_t n_invalid)
1889 struct kvm_get_htab_header *buf;
1890 size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
1891 ssize_t rc;
1893 buf = alloca(chunksize);
1894 /* This is KVM on ppc, so this is all big-endian */
1895 buf->index = index;
1896 buf->n_valid = n_valid;
1897 buf->n_invalid = n_invalid;
1899 qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
1901 rc = write(fd, buf, chunksize);
1902 if (rc < 0) {
1903 fprintf(stderr, "Error writing KVM hash table: %s\n",
1904 strerror(errno));
1905 return rc;
1907 if (rc != chunksize) {
1908 /* We should never get a short write on a single chunk */
1909 fprintf(stderr, "Short write, restoring KVM hash table\n");
1910 return -1;
1912 return 0;
1915 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
1917 return true;
1920 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
1922 return 1;
1925 int kvm_arch_on_sigbus(int code, void *addr)
1927 return 1;
1930 void kvm_arch_init_irq_routing(KVMState *s)
1934 int kvm_arch_insert_sw_breakpoint(CPUState *cpu, struct kvm_sw_breakpoint *bp)
1936 return -EINVAL;
1939 int kvm_arch_remove_sw_breakpoint(CPUState *cpu, struct kvm_sw_breakpoint *bp)
1941 return -EINVAL;
1944 int kvm_arch_insert_hw_breakpoint(target_ulong addr, target_ulong len, int type)
1946 return -EINVAL;
1949 int kvm_arch_remove_hw_breakpoint(target_ulong addr, target_ulong len, int type)
1951 return -EINVAL;
1954 void kvm_arch_remove_all_hw_breakpoints(void)
1958 void kvm_arch_update_guest_debug(CPUState *cpu, struct kvm_guest_debug *dbg)
1962 struct kvm_get_htab_buf {
1963 struct kvm_get_htab_header header;
1965 * We require one extra byte for read
1967 target_ulong hpte[(HPTES_PER_GROUP * 2) + 1];
1970 uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index)
1972 int htab_fd;
1973 struct kvm_get_htab_fd ghf;
1974 struct kvm_get_htab_buf *hpte_buf;
1976 ghf.flags = 0;
1977 ghf.start_index = pte_index;
1978 htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
1979 if (htab_fd < 0) {
1980 goto error_out;
1983 hpte_buf = g_malloc0(sizeof(*hpte_buf));
1985 * Read the hpte group
1987 if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) {
1988 goto out_close;
1991 close(htab_fd);
1992 return (uint64_t)(uintptr_t) hpte_buf->hpte;
1994 out_close:
1995 g_free(hpte_buf);
1996 close(htab_fd);
1997 error_out:
1998 return 0;
2001 void kvmppc_hash64_free_pteg(uint64_t token)
2003 struct kvm_get_htab_buf *htab_buf;
2005 htab_buf = container_of((void *)(uintptr_t) token, struct kvm_get_htab_buf,
2006 hpte);
2007 g_free(htab_buf);
2008 return;
2011 void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
2012 target_ulong pte0, target_ulong pte1)
2014 int htab_fd;
2015 struct kvm_get_htab_fd ghf;
2016 struct kvm_get_htab_buf hpte_buf;
2018 ghf.flags = 0;
2019 ghf.start_index = 0; /* Ignored */
2020 htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2021 if (htab_fd < 0) {
2022 goto error_out;
2025 hpte_buf.header.n_valid = 1;
2026 hpte_buf.header.n_invalid = 0;
2027 hpte_buf.header.index = pte_index;
2028 hpte_buf.hpte[0] = pte0;
2029 hpte_buf.hpte[1] = pte1;
2031 * Write the hpte entry.
2032 * CAUTION: write() has the warn_unused_result attribute. Hence we
2033 * need to check the return value, even though we do nothing.
2035 if (write(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) {
2036 goto out_close;
2039 out_close:
2040 close(htab_fd);
2041 return;
2043 error_out:
2044 return;