PPC: KVM: suppress warnings about not supported SPRs
[qemu-kvm.git] / target-ppc / kvm.c
blob33d69d2e56aecfa47266e3b994ea71588585e243
1 /*
2 * PowerPC implementation of KVM hooks
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
7 * Authors:
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
17 #include <dirent.h>
18 #include <sys/types.h>
19 #include <sys/ioctl.h>
20 #include <sys/mman.h>
21 #include <sys/vfs.h>
23 #include <linux/kvm.h>
25 #include "qemu-common.h"
26 #include "qemu/timer.h"
27 #include "sysemu/sysemu.h"
28 #include "sysemu/kvm.h"
29 #include "kvm_ppc.h"
30 #include "cpu.h"
31 #include "sysemu/cpus.h"
32 #include "sysemu/device_tree.h"
33 #include "mmu-hash64.h"
35 #include "hw/sysbus.h"
36 #include "hw/ppc/spapr.h"
37 #include "hw/ppc/spapr_vio.h"
38 #include "sysemu/watchdog.h"
39 #include "trace.h"
41 //#define DEBUG_KVM
43 #ifdef DEBUG_KVM
44 #define DPRINTF(fmt, ...) \
45 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
46 #else
47 #define DPRINTF(fmt, ...) \
48 do { } while (0)
49 #endif
51 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
53 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
54 KVM_CAP_LAST_INFO
57 static int cap_interrupt_unset = false;
58 static int cap_interrupt_level = false;
59 static int cap_segstate;
60 static int cap_booke_sregs;
61 static int cap_ppc_smt;
62 static int cap_ppc_rma;
63 static int cap_spapr_tce;
64 static int cap_hior;
65 static int cap_one_reg;
66 static int cap_epr;
67 static int cap_ppc_watchdog;
68 static int cap_papr;
69 static int cap_htab_fd;
71 /* XXX We have a race condition where we actually have a level triggered
72 * interrupt, but the infrastructure can't expose that yet, so the guest
73 * takes but ignores it, goes to sleep and never gets notified that there's
74 * still an interrupt pending.
76 * As a quick workaround, let's just wake up again 20 ms after we injected
77 * an interrupt. That way we can assure that we're always reinjecting
78 * interrupts in case the guest swallowed them.
80 static QEMUTimer *idle_timer;
82 static void kvm_kick_cpu(void *opaque)
84 PowerPCCPU *cpu = opaque;
86 qemu_cpu_kick(CPU(cpu));
89 static int kvm_ppc_register_host_cpu_type(void);
91 int kvm_arch_init(KVMState *s)
93 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
94 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
95 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
96 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
97 cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
98 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
99 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
100 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
101 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
102 cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
103 cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
104 /* Note: we don't set cap_papr here, because this capability is
105 * only activated after this by kvmppc_set_papr() */
106 cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
108 if (!cap_interrupt_level) {
109 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
110 "VM to stall at times!\n");
113 kvm_ppc_register_host_cpu_type();
115 return 0;
118 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
120 CPUPPCState *cenv = &cpu->env;
121 CPUState *cs = CPU(cpu);
122 struct kvm_sregs sregs;
123 int ret;
125 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
126 /* What we're really trying to say is "if we're on BookE, we use
127 the native PVR for now". This is the only sane way to check
128 it though, so we potentially confuse users that they can run
129 BookE guests on BookS. Let's hope nobody dares enough :) */
130 return 0;
131 } else {
132 if (!cap_segstate) {
133 fprintf(stderr, "kvm error: missing PVR setting capability\n");
134 return -ENOSYS;
138 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
139 if (ret) {
140 return ret;
143 sregs.pvr = cenv->spr[SPR_PVR];
144 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
147 /* Set up a shared TLB array with KVM */
148 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
150 CPUPPCState *env = &cpu->env;
151 CPUState *cs = CPU(cpu);
152 struct kvm_book3e_206_tlb_params params = {};
153 struct kvm_config_tlb cfg = {};
154 struct kvm_enable_cap encap = {};
155 unsigned int entries = 0;
156 int ret, i;
158 if (!kvm_enabled() ||
159 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
160 return 0;
163 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
165 for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
166 params.tlb_sizes[i] = booke206_tlb_size(env, i);
167 params.tlb_ways[i] = booke206_tlb_ways(env, i);
168 entries += params.tlb_sizes[i];
171 assert(entries == env->nb_tlb);
172 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
174 env->tlb_dirty = true;
176 cfg.array = (uintptr_t)env->tlb.tlbm;
177 cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
178 cfg.params = (uintptr_t)&params;
179 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
181 encap.cap = KVM_CAP_SW_TLB;
182 encap.args[0] = (uintptr_t)&cfg;
184 ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &encap);
185 if (ret < 0) {
186 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
187 __func__, strerror(-ret));
188 return ret;
191 env->kvm_sw_tlb = true;
192 return 0;
196 #if defined(TARGET_PPC64)
197 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
198 struct kvm_ppc_smmu_info *info)
200 CPUPPCState *env = &cpu->env;
201 CPUState *cs = CPU(cpu);
203 memset(info, 0, sizeof(*info));
205 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
206 * need to "guess" what the supported page sizes are.
208 * For that to work we make a few assumptions:
210 * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
211 * KVM which only supports 4K and 16M pages, but supports them
212 * regardless of the backing store characteritics. We also don't
213 * support 1T segments.
215 * This is safe as if HV KVM ever supports that capability or PR
216 * KVM grows supports for more page/segment sizes, those versions
217 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
218 * will not hit this fallback
220 * - Else we are running HV KVM. This means we only support page
221 * sizes that fit in the backing store. Additionally we only
222 * advertize 64K pages if the processor is ARCH 2.06 and we assume
223 * P7 encodings for the SLB and hash table. Here too, we assume
224 * support for any newer processor will mean a kernel that
225 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
226 * this fallback.
228 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
229 /* No flags */
230 info->flags = 0;
231 info->slb_size = 64;
233 /* Standard 4k base page size segment */
234 info->sps[0].page_shift = 12;
235 info->sps[0].slb_enc = 0;
236 info->sps[0].enc[0].page_shift = 12;
237 info->sps[0].enc[0].pte_enc = 0;
239 /* Standard 16M large page size segment */
240 info->sps[1].page_shift = 24;
241 info->sps[1].slb_enc = SLB_VSID_L;
242 info->sps[1].enc[0].page_shift = 24;
243 info->sps[1].enc[0].pte_enc = 0;
244 } else {
245 int i = 0;
247 /* HV KVM has backing store size restrictions */
248 info->flags = KVM_PPC_PAGE_SIZES_REAL;
250 if (env->mmu_model & POWERPC_MMU_1TSEG) {
251 info->flags |= KVM_PPC_1T_SEGMENTS;
254 if (env->mmu_model == POWERPC_MMU_2_06) {
255 info->slb_size = 32;
256 } else {
257 info->slb_size = 64;
260 /* Standard 4k base page size segment */
261 info->sps[i].page_shift = 12;
262 info->sps[i].slb_enc = 0;
263 info->sps[i].enc[0].page_shift = 12;
264 info->sps[i].enc[0].pte_enc = 0;
265 i++;
267 /* 64K on MMU 2.06 */
268 if (env->mmu_model == POWERPC_MMU_2_06) {
269 info->sps[i].page_shift = 16;
270 info->sps[i].slb_enc = 0x110;
271 info->sps[i].enc[0].page_shift = 16;
272 info->sps[i].enc[0].pte_enc = 1;
273 i++;
276 /* Standard 16M large page size segment */
277 info->sps[i].page_shift = 24;
278 info->sps[i].slb_enc = SLB_VSID_L;
279 info->sps[i].enc[0].page_shift = 24;
280 info->sps[i].enc[0].pte_enc = 0;
284 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
286 CPUState *cs = CPU(cpu);
287 int ret;
289 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
290 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
291 if (ret == 0) {
292 return;
296 kvm_get_fallback_smmu_info(cpu, info);
299 static long getrampagesize(void)
301 struct statfs fs;
302 int ret;
304 if (!mem_path) {
305 /* guest RAM is backed by normal anonymous pages */
306 return getpagesize();
309 do {
310 ret = statfs(mem_path, &fs);
311 } while (ret != 0 && errno == EINTR);
313 if (ret != 0) {
314 fprintf(stderr, "Couldn't statfs() memory path: %s\n",
315 strerror(errno));
316 exit(1);
319 #define HUGETLBFS_MAGIC 0x958458f6
321 if (fs.f_type != HUGETLBFS_MAGIC) {
322 /* Explicit mempath, but it's ordinary pages */
323 return getpagesize();
326 /* It's hugepage, return the huge page size */
327 return fs.f_bsize;
330 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
332 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
333 return true;
336 return (1ul << shift) <= rampgsize;
339 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
341 static struct kvm_ppc_smmu_info smmu_info;
342 static bool has_smmu_info;
343 CPUPPCState *env = &cpu->env;
344 long rampagesize;
345 int iq, ik, jq, jk;
347 /* We only handle page sizes for 64-bit server guests for now */
348 if (!(env->mmu_model & POWERPC_MMU_64)) {
349 return;
352 /* Collect MMU info from kernel if not already */
353 if (!has_smmu_info) {
354 kvm_get_smmu_info(cpu, &smmu_info);
355 has_smmu_info = true;
358 rampagesize = getrampagesize();
360 /* Convert to QEMU form */
361 memset(&env->sps, 0, sizeof(env->sps));
363 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
364 struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
365 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
367 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
368 ksps->page_shift)) {
369 continue;
371 qsps->page_shift = ksps->page_shift;
372 qsps->slb_enc = ksps->slb_enc;
373 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
374 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
375 ksps->enc[jk].page_shift)) {
376 continue;
378 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
379 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
380 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
381 break;
384 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
385 break;
388 env->slb_nr = smmu_info.slb_size;
389 if (smmu_info.flags & KVM_PPC_1T_SEGMENTS) {
390 env->mmu_model |= POWERPC_MMU_1TSEG;
391 } else {
392 env->mmu_model &= ~POWERPC_MMU_1TSEG;
395 #else /* defined (TARGET_PPC64) */
397 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
401 #endif /* !defined (TARGET_PPC64) */
403 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
405 return cpu->cpu_index;
408 int kvm_arch_init_vcpu(CPUState *cs)
410 PowerPCCPU *cpu = POWERPC_CPU(cs);
411 CPUPPCState *cenv = &cpu->env;
412 int ret;
414 /* Gather server mmu info from KVM and update the CPU state */
415 kvm_fixup_page_sizes(cpu);
417 /* Synchronize sregs with kvm */
418 ret = kvm_arch_sync_sregs(cpu);
419 if (ret) {
420 return ret;
423 idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
425 /* Some targets support access to KVM's guest TLB. */
426 switch (cenv->mmu_model) {
427 case POWERPC_MMU_BOOKE206:
428 ret = kvm_booke206_tlb_init(cpu);
429 break;
430 default:
431 break;
434 return ret;
437 void kvm_arch_reset_vcpu(CPUState *cpu)
441 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
443 CPUPPCState *env = &cpu->env;
444 CPUState *cs = CPU(cpu);
445 struct kvm_dirty_tlb dirty_tlb;
446 unsigned char *bitmap;
447 int ret;
449 if (!env->kvm_sw_tlb) {
450 return;
453 bitmap = g_malloc((env->nb_tlb + 7) / 8);
454 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
456 dirty_tlb.bitmap = (uintptr_t)bitmap;
457 dirty_tlb.num_dirty = env->nb_tlb;
459 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
460 if (ret) {
461 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
462 __func__, strerror(-ret));
465 g_free(bitmap);
468 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
470 PowerPCCPU *cpu = POWERPC_CPU(cs);
471 CPUPPCState *env = &cpu->env;
472 union {
473 uint32_t u32;
474 uint64_t u64;
475 } val;
476 struct kvm_one_reg reg = {
477 .id = id,
478 .addr = (uintptr_t) &val,
480 int ret;
482 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
483 if (ret != 0) {
484 trace_kvm_failed_spr_get(spr, strerror(errno));
485 } else {
486 switch (id & KVM_REG_SIZE_MASK) {
487 case KVM_REG_SIZE_U32:
488 env->spr[spr] = val.u32;
489 break;
491 case KVM_REG_SIZE_U64:
492 env->spr[spr] = val.u64;
493 break;
495 default:
496 /* Don't handle this size yet */
497 abort();
502 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
504 PowerPCCPU *cpu = POWERPC_CPU(cs);
505 CPUPPCState *env = &cpu->env;
506 union {
507 uint32_t u32;
508 uint64_t u64;
509 } val;
510 struct kvm_one_reg reg = {
511 .id = id,
512 .addr = (uintptr_t) &val,
514 int ret;
516 switch (id & KVM_REG_SIZE_MASK) {
517 case KVM_REG_SIZE_U32:
518 val.u32 = env->spr[spr];
519 break;
521 case KVM_REG_SIZE_U64:
522 val.u64 = env->spr[spr];
523 break;
525 default:
526 /* Don't handle this size yet */
527 abort();
530 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
531 if (ret != 0) {
532 trace_kvm_failed_spr_set(spr, strerror(errno));
536 static int kvm_put_fp(CPUState *cs)
538 PowerPCCPU *cpu = POWERPC_CPU(cs);
539 CPUPPCState *env = &cpu->env;
540 struct kvm_one_reg reg;
541 int i;
542 int ret;
544 if (env->insns_flags & PPC_FLOAT) {
545 uint64_t fpscr = env->fpscr;
546 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
548 reg.id = KVM_REG_PPC_FPSCR;
549 reg.addr = (uintptr_t)&fpscr;
550 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
551 if (ret < 0) {
552 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
553 return ret;
556 for (i = 0; i < 32; i++) {
557 uint64_t vsr[2];
559 vsr[0] = float64_val(env->fpr[i]);
560 vsr[1] = env->vsr[i];
561 reg.addr = (uintptr_t) &vsr;
562 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
564 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
565 if (ret < 0) {
566 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
567 i, strerror(errno));
568 return ret;
573 if (env->insns_flags & PPC_ALTIVEC) {
574 reg.id = KVM_REG_PPC_VSCR;
575 reg.addr = (uintptr_t)&env->vscr;
576 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
577 if (ret < 0) {
578 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
579 return ret;
582 for (i = 0; i < 32; i++) {
583 reg.id = KVM_REG_PPC_VR(i);
584 reg.addr = (uintptr_t)&env->avr[i];
585 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
586 if (ret < 0) {
587 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
588 return ret;
593 return 0;
596 static int kvm_get_fp(CPUState *cs)
598 PowerPCCPU *cpu = POWERPC_CPU(cs);
599 CPUPPCState *env = &cpu->env;
600 struct kvm_one_reg reg;
601 int i;
602 int ret;
604 if (env->insns_flags & PPC_FLOAT) {
605 uint64_t fpscr;
606 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
608 reg.id = KVM_REG_PPC_FPSCR;
609 reg.addr = (uintptr_t)&fpscr;
610 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
611 if (ret < 0) {
612 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
613 return ret;
614 } else {
615 env->fpscr = fpscr;
618 for (i = 0; i < 32; i++) {
619 uint64_t vsr[2];
621 reg.addr = (uintptr_t) &vsr;
622 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
624 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
625 if (ret < 0) {
626 DPRINTF("Unable to get %s%d from KVM: %s\n",
627 vsx ? "VSR" : "FPR", i, strerror(errno));
628 return ret;
629 } else {
630 env->fpr[i] = vsr[0];
631 if (vsx) {
632 env->vsr[i] = vsr[1];
638 if (env->insns_flags & PPC_ALTIVEC) {
639 reg.id = KVM_REG_PPC_VSCR;
640 reg.addr = (uintptr_t)&env->vscr;
641 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
642 if (ret < 0) {
643 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
644 return ret;
647 for (i = 0; i < 32; i++) {
648 reg.id = KVM_REG_PPC_VR(i);
649 reg.addr = (uintptr_t)&env->avr[i];
650 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
651 if (ret < 0) {
652 DPRINTF("Unable to get VR%d from KVM: %s\n",
653 i, strerror(errno));
654 return ret;
659 return 0;
662 #if defined(TARGET_PPC64)
663 static int kvm_get_vpa(CPUState *cs)
665 PowerPCCPU *cpu = POWERPC_CPU(cs);
666 CPUPPCState *env = &cpu->env;
667 struct kvm_one_reg reg;
668 int ret;
670 reg.id = KVM_REG_PPC_VPA_ADDR;
671 reg.addr = (uintptr_t)&env->vpa_addr;
672 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
673 if (ret < 0) {
674 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
675 return ret;
678 assert((uintptr_t)&env->slb_shadow_size
679 == ((uintptr_t)&env->slb_shadow_addr + 8));
680 reg.id = KVM_REG_PPC_VPA_SLB;
681 reg.addr = (uintptr_t)&env->slb_shadow_addr;
682 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
683 if (ret < 0) {
684 DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
685 strerror(errno));
686 return ret;
689 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
690 reg.id = KVM_REG_PPC_VPA_DTL;
691 reg.addr = (uintptr_t)&env->dtl_addr;
692 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
693 if (ret < 0) {
694 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
695 strerror(errno));
696 return ret;
699 return 0;
702 static int kvm_put_vpa(CPUState *cs)
704 PowerPCCPU *cpu = POWERPC_CPU(cs);
705 CPUPPCState *env = &cpu->env;
706 struct kvm_one_reg reg;
707 int ret;
709 /* SLB shadow or DTL can't be registered unless a master VPA is
710 * registered. That means when restoring state, if a VPA *is*
711 * registered, we need to set that up first. If not, we need to
712 * deregister the others before deregistering the master VPA */
713 assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
715 if (env->vpa_addr) {
716 reg.id = KVM_REG_PPC_VPA_ADDR;
717 reg.addr = (uintptr_t)&env->vpa_addr;
718 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
719 if (ret < 0) {
720 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
721 return ret;
725 assert((uintptr_t)&env->slb_shadow_size
726 == ((uintptr_t)&env->slb_shadow_addr + 8));
727 reg.id = KVM_REG_PPC_VPA_SLB;
728 reg.addr = (uintptr_t)&env->slb_shadow_addr;
729 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
730 if (ret < 0) {
731 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
732 return ret;
735 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
736 reg.id = KVM_REG_PPC_VPA_DTL;
737 reg.addr = (uintptr_t)&env->dtl_addr;
738 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
739 if (ret < 0) {
740 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
741 strerror(errno));
742 return ret;
745 if (!env->vpa_addr) {
746 reg.id = KVM_REG_PPC_VPA_ADDR;
747 reg.addr = (uintptr_t)&env->vpa_addr;
748 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
749 if (ret < 0) {
750 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
751 return ret;
755 return 0;
757 #endif /* TARGET_PPC64 */
759 int kvm_arch_put_registers(CPUState *cs, int level)
761 PowerPCCPU *cpu = POWERPC_CPU(cs);
762 CPUPPCState *env = &cpu->env;
763 struct kvm_regs regs;
764 int ret;
765 int i;
767 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
768 if (ret < 0) {
769 return ret;
772 regs.ctr = env->ctr;
773 regs.lr = env->lr;
774 regs.xer = cpu_read_xer(env);
775 regs.msr = env->msr;
776 regs.pc = env->nip;
778 regs.srr0 = env->spr[SPR_SRR0];
779 regs.srr1 = env->spr[SPR_SRR1];
781 regs.sprg0 = env->spr[SPR_SPRG0];
782 regs.sprg1 = env->spr[SPR_SPRG1];
783 regs.sprg2 = env->spr[SPR_SPRG2];
784 regs.sprg3 = env->spr[SPR_SPRG3];
785 regs.sprg4 = env->spr[SPR_SPRG4];
786 regs.sprg5 = env->spr[SPR_SPRG5];
787 regs.sprg6 = env->spr[SPR_SPRG6];
788 regs.sprg7 = env->spr[SPR_SPRG7];
790 regs.pid = env->spr[SPR_BOOKE_PID];
792 for (i = 0;i < 32; i++)
793 regs.gpr[i] = env->gpr[i];
795 regs.cr = 0;
796 for (i = 0; i < 8; i++) {
797 regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
800 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
801 if (ret < 0)
802 return ret;
804 kvm_put_fp(cs);
806 if (env->tlb_dirty) {
807 kvm_sw_tlb_put(cpu);
808 env->tlb_dirty = false;
811 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
812 struct kvm_sregs sregs;
814 sregs.pvr = env->spr[SPR_PVR];
816 sregs.u.s.sdr1 = env->spr[SPR_SDR1];
818 /* Sync SLB */
819 #ifdef TARGET_PPC64
820 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
821 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
822 if (env->slb[i].esid & SLB_ESID_V) {
823 sregs.u.s.ppc64.slb[i].slbe |= i;
825 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
827 #endif
829 /* Sync SRs */
830 for (i = 0; i < 16; i++) {
831 sregs.u.s.ppc32.sr[i] = env->sr[i];
834 /* Sync BATs */
835 for (i = 0; i < 8; i++) {
836 /* Beware. We have to swap upper and lower bits here */
837 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
838 | env->DBAT[1][i];
839 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
840 | env->IBAT[1][i];
843 ret = kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
844 if (ret) {
845 return ret;
849 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
850 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
853 if (cap_one_reg) {
854 int i;
856 /* We deliberately ignore errors here, for kernels which have
857 * the ONE_REG calls, but don't support the specific
858 * registers, there's a reasonable chance things will still
859 * work, at least until we try to migrate. */
860 for (i = 0; i < 1024; i++) {
861 uint64_t id = env->spr_cb[i].one_reg_id;
863 if (id != 0) {
864 kvm_put_one_spr(cs, id, i);
868 #ifdef TARGET_PPC64
869 if (cap_papr) {
870 if (kvm_put_vpa(cs) < 0) {
871 DPRINTF("Warning: Unable to set VPA information to KVM\n");
874 #endif /* TARGET_PPC64 */
877 return ret;
880 int kvm_arch_get_registers(CPUState *cs)
882 PowerPCCPU *cpu = POWERPC_CPU(cs);
883 CPUPPCState *env = &cpu->env;
884 struct kvm_regs regs;
885 struct kvm_sregs sregs;
886 uint32_t cr;
887 int i, ret;
889 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
890 if (ret < 0)
891 return ret;
893 cr = regs.cr;
894 for (i = 7; i >= 0; i--) {
895 env->crf[i] = cr & 15;
896 cr >>= 4;
899 env->ctr = regs.ctr;
900 env->lr = regs.lr;
901 cpu_write_xer(env, regs.xer);
902 env->msr = regs.msr;
903 env->nip = regs.pc;
905 env->spr[SPR_SRR0] = regs.srr0;
906 env->spr[SPR_SRR1] = regs.srr1;
908 env->spr[SPR_SPRG0] = regs.sprg0;
909 env->spr[SPR_SPRG1] = regs.sprg1;
910 env->spr[SPR_SPRG2] = regs.sprg2;
911 env->spr[SPR_SPRG3] = regs.sprg3;
912 env->spr[SPR_SPRG4] = regs.sprg4;
913 env->spr[SPR_SPRG5] = regs.sprg5;
914 env->spr[SPR_SPRG6] = regs.sprg6;
915 env->spr[SPR_SPRG7] = regs.sprg7;
917 env->spr[SPR_BOOKE_PID] = regs.pid;
919 for (i = 0;i < 32; i++)
920 env->gpr[i] = regs.gpr[i];
922 kvm_get_fp(cs);
924 if (cap_booke_sregs) {
925 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
926 if (ret < 0) {
927 return ret;
930 if (sregs.u.e.features & KVM_SREGS_E_BASE) {
931 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
932 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
933 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
934 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
935 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
936 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
937 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
938 env->spr[SPR_DECR] = sregs.u.e.dec;
939 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
940 env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
941 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
944 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
945 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
946 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
947 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
948 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
949 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
952 if (sregs.u.e.features & KVM_SREGS_E_64) {
953 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
956 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
957 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
960 if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
961 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
962 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
963 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
964 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
965 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
966 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
967 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
968 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
969 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
970 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
971 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
972 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
973 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
974 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
975 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
976 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
978 if (sregs.u.e.features & KVM_SREGS_E_SPE) {
979 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
980 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
981 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
984 if (sregs.u.e.features & KVM_SREGS_E_PM) {
985 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
988 if (sregs.u.e.features & KVM_SREGS_E_PC) {
989 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
990 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
994 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
995 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
996 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
997 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
998 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
999 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1000 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1001 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1002 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1003 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1004 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1007 if (sregs.u.e.features & KVM_SREGS_EXP) {
1008 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1011 if (sregs.u.e.features & KVM_SREGS_E_PD) {
1012 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1013 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1016 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1017 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1018 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1019 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1021 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1022 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1023 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1028 if (cap_segstate) {
1029 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
1030 if (ret < 0) {
1031 return ret;
1034 ppc_store_sdr1(env, sregs.u.s.sdr1);
1036 /* Sync SLB */
1037 #ifdef TARGET_PPC64
1039 * The packed SLB array we get from KVM_GET_SREGS only contains
1040 * information about valid entries. So we flush our internal
1041 * copy to get rid of stale ones, then put all valid SLB entries
1042 * back in.
1044 memset(env->slb, 0, sizeof(env->slb));
1045 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1046 target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1047 target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1049 * Only restore valid entries
1051 if (rb & SLB_ESID_V) {
1052 ppc_store_slb(env, rb, rs);
1055 #endif
1057 /* Sync SRs */
1058 for (i = 0; i < 16; i++) {
1059 env->sr[i] = sregs.u.s.ppc32.sr[i];
1062 /* Sync BATs */
1063 for (i = 0; i < 8; i++) {
1064 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1065 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1066 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1067 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1071 if (cap_hior) {
1072 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1075 if (cap_one_reg) {
1076 int i;
1078 /* We deliberately ignore errors here, for kernels which have
1079 * the ONE_REG calls, but don't support the specific
1080 * registers, there's a reasonable chance things will still
1081 * work, at least until we try to migrate. */
1082 for (i = 0; i < 1024; i++) {
1083 uint64_t id = env->spr_cb[i].one_reg_id;
1085 if (id != 0) {
1086 kvm_get_one_spr(cs, id, i);
1090 #ifdef TARGET_PPC64
1091 if (cap_papr) {
1092 if (kvm_get_vpa(cs) < 0) {
1093 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1096 #endif
1099 return 0;
1102 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1104 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1106 if (irq != PPC_INTERRUPT_EXT) {
1107 return 0;
1110 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1111 return 0;
1114 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1116 return 0;
1119 #if defined(TARGET_PPCEMB)
1120 #define PPC_INPUT_INT PPC40x_INPUT_INT
1121 #elif defined(TARGET_PPC64)
1122 #define PPC_INPUT_INT PPC970_INPUT_INT
1123 #else
1124 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1125 #endif
1127 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1129 PowerPCCPU *cpu = POWERPC_CPU(cs);
1130 CPUPPCState *env = &cpu->env;
1131 int r;
1132 unsigned irq;
1134 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1135 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1136 if (!cap_interrupt_level &&
1137 run->ready_for_interrupt_injection &&
1138 (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1139 (env->irq_input_state & (1<<PPC_INPUT_INT)))
1141 /* For now KVM disregards the 'irq' argument. However, in the
1142 * future KVM could cache it in-kernel to avoid a heavyweight exit
1143 * when reading the UIC.
1145 irq = KVM_INTERRUPT_SET;
1147 DPRINTF("injected interrupt %d\n", irq);
1148 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1149 if (r < 0) {
1150 printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1153 /* Always wake up soon in case the interrupt was level based */
1154 timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1155 (get_ticks_per_sec() / 50));
1158 /* We don't know if there are more interrupts pending after this. However,
1159 * the guest will return to userspace in the course of handling this one
1160 * anyways, so we will get a chance to deliver the rest. */
1163 void kvm_arch_post_run(CPUState *cpu, struct kvm_run *run)
1167 int kvm_arch_process_async_events(CPUState *cs)
1169 return cs->halted;
1172 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1174 CPUState *cs = CPU(cpu);
1175 CPUPPCState *env = &cpu->env;
1177 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1178 cs->halted = 1;
1179 env->exception_index = EXCP_HLT;
1182 return 0;
1185 /* map dcr access to existing qemu dcr emulation */
1186 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1188 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1189 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1191 return 0;
1194 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1196 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1197 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1199 return 0;
1202 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1204 PowerPCCPU *cpu = POWERPC_CPU(cs);
1205 CPUPPCState *env = &cpu->env;
1206 int ret;
1208 switch (run->exit_reason) {
1209 case KVM_EXIT_DCR:
1210 if (run->dcr.is_write) {
1211 DPRINTF("handle dcr write\n");
1212 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1213 } else {
1214 DPRINTF("handle dcr read\n");
1215 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1217 break;
1218 case KVM_EXIT_HLT:
1219 DPRINTF("handle halt\n");
1220 ret = kvmppc_handle_halt(cpu);
1221 break;
1222 #if defined(TARGET_PPC64)
1223 case KVM_EXIT_PAPR_HCALL:
1224 DPRINTF("handle PAPR hypercall\n");
1225 run->papr_hcall.ret = spapr_hypercall(cpu,
1226 run->papr_hcall.nr,
1227 run->papr_hcall.args);
1228 ret = 0;
1229 break;
1230 #endif
1231 case KVM_EXIT_EPR:
1232 DPRINTF("handle epr\n");
1233 run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1234 ret = 0;
1235 break;
1236 case KVM_EXIT_WATCHDOG:
1237 DPRINTF("handle watchdog expiry\n");
1238 watchdog_perform_action();
1239 ret = 0;
1240 break;
1242 default:
1243 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1244 ret = -1;
1245 break;
1248 return ret;
1251 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1253 CPUState *cs = CPU(cpu);
1254 uint32_t bits = tsr_bits;
1255 struct kvm_one_reg reg = {
1256 .id = KVM_REG_PPC_OR_TSR,
1257 .addr = (uintptr_t) &bits,
1260 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1263 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1266 CPUState *cs = CPU(cpu);
1267 uint32_t bits = tsr_bits;
1268 struct kvm_one_reg reg = {
1269 .id = KVM_REG_PPC_CLEAR_TSR,
1270 .addr = (uintptr_t) &bits,
1273 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1276 int kvmppc_set_tcr(PowerPCCPU *cpu)
1278 CPUState *cs = CPU(cpu);
1279 CPUPPCState *env = &cpu->env;
1280 uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1282 struct kvm_one_reg reg = {
1283 .id = KVM_REG_PPC_TCR,
1284 .addr = (uintptr_t) &tcr,
1287 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1290 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1292 CPUState *cs = CPU(cpu);
1293 struct kvm_enable_cap encap = {};
1294 int ret;
1296 if (!kvm_enabled()) {
1297 return -1;
1300 if (!cap_ppc_watchdog) {
1301 printf("warning: KVM does not support watchdog");
1302 return -1;
1305 encap.cap = KVM_CAP_PPC_BOOKE_WATCHDOG;
1306 ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &encap);
1307 if (ret < 0) {
1308 fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1309 __func__, strerror(-ret));
1310 return ret;
1313 return ret;
1316 static int read_cpuinfo(const char *field, char *value, int len)
1318 FILE *f;
1319 int ret = -1;
1320 int field_len = strlen(field);
1321 char line[512];
1323 f = fopen("/proc/cpuinfo", "r");
1324 if (!f) {
1325 return -1;
1328 do {
1329 if(!fgets(line, sizeof(line), f)) {
1330 break;
1332 if (!strncmp(line, field, field_len)) {
1333 pstrcpy(value, len, line);
1334 ret = 0;
1335 break;
1337 } while(*line);
1339 fclose(f);
1341 return ret;
1344 uint32_t kvmppc_get_tbfreq(void)
1346 char line[512];
1347 char *ns;
1348 uint32_t retval = get_ticks_per_sec();
1350 if (read_cpuinfo("timebase", line, sizeof(line))) {
1351 return retval;
1354 if (!(ns = strchr(line, ':'))) {
1355 return retval;
1358 ns++;
1360 retval = atoi(ns);
1361 return retval;
1364 /* Try to find a device tree node for a CPU with clock-frequency property */
1365 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1367 struct dirent *dirp;
1368 DIR *dp;
1370 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1371 printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1372 return -1;
1375 buf[0] = '\0';
1376 while ((dirp = readdir(dp)) != NULL) {
1377 FILE *f;
1378 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1379 dirp->d_name);
1380 f = fopen(buf, "r");
1381 if (f) {
1382 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1383 fclose(f);
1384 break;
1386 buf[0] = '\0';
1388 closedir(dp);
1389 if (buf[0] == '\0') {
1390 printf("Unknown host!\n");
1391 return -1;
1394 return 0;
1397 /* Read a CPU node property from the host device tree that's a single
1398 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1399 * (can't find or open the property, or doesn't understand the
1400 * format) */
1401 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1403 char buf[PATH_MAX];
1404 union {
1405 uint32_t v32;
1406 uint64_t v64;
1407 } u;
1408 FILE *f;
1409 int len;
1411 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1412 return -1;
1415 strncat(buf, "/", sizeof(buf) - strlen(buf));
1416 strncat(buf, propname, sizeof(buf) - strlen(buf));
1418 f = fopen(buf, "rb");
1419 if (!f) {
1420 return -1;
1423 len = fread(&u, 1, sizeof(u), f);
1424 fclose(f);
1425 switch (len) {
1426 case 4:
1427 /* property is a 32-bit quantity */
1428 return be32_to_cpu(u.v32);
1429 case 8:
1430 return be64_to_cpu(u.v64);
1433 return 0;
1436 uint64_t kvmppc_get_clockfreq(void)
1438 return kvmppc_read_int_cpu_dt("clock-frequency");
1441 uint32_t kvmppc_get_vmx(void)
1443 return kvmppc_read_int_cpu_dt("ibm,vmx");
1446 uint32_t kvmppc_get_dfp(void)
1448 return kvmppc_read_int_cpu_dt("ibm,dfp");
1451 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1453 PowerPCCPU *cpu = ppc_env_get_cpu(env);
1454 CPUState *cs = CPU(cpu);
1456 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1457 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1458 return 0;
1461 return 1;
1464 int kvmppc_get_hasidle(CPUPPCState *env)
1466 struct kvm_ppc_pvinfo pvinfo;
1468 if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1469 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1470 return 1;
1473 return 0;
1476 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1478 uint32_t *hc = (uint32_t*)buf;
1479 struct kvm_ppc_pvinfo pvinfo;
1481 if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1482 memcpy(buf, pvinfo.hcall, buf_len);
1483 return 0;
1487 * Fallback to always fail hypercalls:
1489 * li r3, -1
1490 * nop
1491 * nop
1492 * nop
1495 hc[0] = 0x3860ffff;
1496 hc[1] = 0x60000000;
1497 hc[2] = 0x60000000;
1498 hc[3] = 0x60000000;
1500 return 0;
1503 void kvmppc_set_papr(PowerPCCPU *cpu)
1505 CPUPPCState *env = &cpu->env;
1506 CPUState *cs = CPU(cpu);
1507 struct kvm_enable_cap cap = {};
1508 int ret;
1510 cap.cap = KVM_CAP_PPC_PAPR;
1511 ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &cap);
1513 if (ret) {
1514 cpu_abort(env, "This KVM version does not support PAPR\n");
1517 /* Update the capability flag so we sync the right information
1518 * with kvm */
1519 cap_papr = 1;
1522 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
1524 CPUPPCState *env = &cpu->env;
1525 CPUState *cs = CPU(cpu);
1526 struct kvm_enable_cap cap = {};
1527 int ret;
1529 cap.cap = KVM_CAP_PPC_EPR;
1530 cap.args[0] = mpic_proxy;
1531 ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &cap);
1533 if (ret && mpic_proxy) {
1534 cpu_abort(env, "This KVM version does not support EPR\n");
1538 int kvmppc_smt_threads(void)
1540 return cap_ppc_smt ? cap_ppc_smt : 1;
1543 #ifdef TARGET_PPC64
1544 off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem)
1546 void *rma;
1547 off_t size;
1548 int fd;
1549 struct kvm_allocate_rma ret;
1550 MemoryRegion *rma_region;
1552 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
1553 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
1554 * not necessary on this hardware
1555 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
1557 * FIXME: We should allow the user to force contiguous RMA
1558 * allocation in the cap_ppc_rma==1 case.
1560 if (cap_ppc_rma < 2) {
1561 return 0;
1564 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
1565 if (fd < 0) {
1566 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
1567 strerror(errno));
1568 return -1;
1571 size = MIN(ret.rma_size, 256ul << 20);
1573 rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1574 if (rma == MAP_FAILED) {
1575 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
1576 return -1;
1579 rma_region = g_new(MemoryRegion, 1);
1580 memory_region_init_ram_ptr(rma_region, NULL, name, size, rma);
1581 vmstate_register_ram_global(rma_region);
1582 memory_region_add_subregion(sysmem, 0, rma_region);
1584 return size;
1587 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
1589 struct kvm_ppc_smmu_info info;
1590 long rampagesize, best_page_shift;
1591 int i;
1593 if (cap_ppc_rma >= 2) {
1594 return current_size;
1597 /* Find the largest hardware supported page size that's less than
1598 * or equal to the (logical) backing page size of guest RAM */
1599 kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
1600 rampagesize = getrampagesize();
1601 best_page_shift = 0;
1603 for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
1604 struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
1606 if (!sps->page_shift) {
1607 continue;
1610 if ((sps->page_shift > best_page_shift)
1611 && ((1UL << sps->page_shift) <= rampagesize)) {
1612 best_page_shift = sps->page_shift;
1616 return MIN(current_size,
1617 1ULL << (best_page_shift + hash_shift - 7));
1619 #endif
1621 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd)
1623 struct kvm_create_spapr_tce args = {
1624 .liobn = liobn,
1625 .window_size = window_size,
1627 long len;
1628 int fd;
1629 void *table;
1631 /* Must set fd to -1 so we don't try to munmap when called for
1632 * destroying the table, which the upper layers -will- do
1634 *pfd = -1;
1635 if (!cap_spapr_tce) {
1636 return NULL;
1639 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
1640 if (fd < 0) {
1641 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
1642 liobn);
1643 return NULL;
1646 len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
1647 /* FIXME: round this up to page size */
1649 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1650 if (table == MAP_FAILED) {
1651 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
1652 liobn);
1653 close(fd);
1654 return NULL;
1657 *pfd = fd;
1658 return table;
1661 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t window_size)
1663 long len;
1665 if (fd < 0) {
1666 return -1;
1669 len = (window_size / SPAPR_TCE_PAGE_SIZE)*sizeof(uint64_t);
1670 if ((munmap(table, len) < 0) ||
1671 (close(fd) < 0)) {
1672 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
1673 strerror(errno));
1674 /* Leak the table */
1677 return 0;
1680 int kvmppc_reset_htab(int shift_hint)
1682 uint32_t shift = shift_hint;
1684 if (!kvm_enabled()) {
1685 /* Full emulation, tell caller to allocate htab itself */
1686 return 0;
1688 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
1689 int ret;
1690 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
1691 if (ret == -ENOTTY) {
1692 /* At least some versions of PR KVM advertise the
1693 * capability, but don't implement the ioctl(). Oops.
1694 * Return 0 so that we allocate the htab in qemu, as is
1695 * correct for PR. */
1696 return 0;
1697 } else if (ret < 0) {
1698 return ret;
1700 return shift;
1703 /* We have a kernel that predates the htab reset calls. For PR
1704 * KVM, we need to allocate the htab ourselves, for an HV KVM of
1705 * this era, it has allocated a 16MB fixed size hash table
1706 * already. Kernels of this era have the GET_PVINFO capability
1707 * only on PR, so we use this hack to determine the right
1708 * answer */
1709 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
1710 /* PR - tell caller to allocate htab */
1711 return 0;
1712 } else {
1713 /* HV - assume 16MB kernel allocated htab */
1714 return 24;
1718 static inline uint32_t mfpvr(void)
1720 uint32_t pvr;
1722 asm ("mfpvr %0"
1723 : "=r"(pvr));
1724 return pvr;
1727 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
1729 if (on) {
1730 *word |= flags;
1731 } else {
1732 *word &= ~flags;
1736 static void kvmppc_host_cpu_initfn(Object *obj)
1738 assert(kvm_enabled());
1741 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
1743 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
1744 uint32_t vmx = kvmppc_get_vmx();
1745 uint32_t dfp = kvmppc_get_dfp();
1746 uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
1747 uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
1749 /* Now fix up the class with information we can query from the host */
1750 pcc->pvr = mfpvr();
1752 if (vmx != -1) {
1753 /* Only override when we know what the host supports */
1754 alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
1755 alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
1757 if (dfp != -1) {
1758 /* Only override when we know what the host supports */
1759 alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
1762 if (dcache_size != -1) {
1763 pcc->l1_dcache_size = dcache_size;
1766 if (icache_size != -1) {
1767 pcc->l1_icache_size = icache_size;
1771 int kvmppc_fixup_cpu(PowerPCCPU *cpu)
1773 CPUState *cs = CPU(cpu);
1774 int smt;
1776 /* Adjust cpu index for SMT */
1777 smt = kvmppc_smt_threads();
1778 cs->cpu_index = (cs->cpu_index / smp_threads) * smt
1779 + (cs->cpu_index % smp_threads);
1781 return 0;
1784 bool kvmppc_has_cap_epr(void)
1786 return cap_epr;
1789 static int kvm_ppc_register_host_cpu_type(void)
1791 TypeInfo type_info = {
1792 .name = TYPE_HOST_POWERPC_CPU,
1793 .instance_init = kvmppc_host_cpu_initfn,
1794 .class_init = kvmppc_host_cpu_class_init,
1796 uint32_t host_pvr = mfpvr();
1797 PowerPCCPUClass *pvr_pcc;
1799 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
1800 if (pvr_pcc == NULL) {
1801 pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
1803 if (pvr_pcc == NULL) {
1804 return -1;
1806 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
1807 type_register(&type_info);
1808 return 0;
1811 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
1813 struct kvm_rtas_token_args args = {
1814 .token = token,
1817 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
1818 return -ENOENT;
1821 strncpy(args.name, function, sizeof(args.name));
1823 return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
1826 int kvmppc_get_htab_fd(bool write)
1828 struct kvm_get_htab_fd s = {
1829 .flags = write ? KVM_GET_HTAB_WRITE : 0,
1830 .start_index = 0,
1833 if (!cap_htab_fd) {
1834 fprintf(stderr, "KVM version doesn't support saving the hash table\n");
1835 return -1;
1838 return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
1841 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
1843 int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
1844 uint8_t buf[bufsize];
1845 ssize_t rc;
1847 do {
1848 rc = read(fd, buf, bufsize);
1849 if (rc < 0) {
1850 fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
1851 strerror(errno));
1852 return rc;
1853 } else if (rc) {
1854 /* Kernel already retuns data in BE format for the file */
1855 qemu_put_buffer(f, buf, rc);
1857 } while ((rc != 0)
1858 && ((max_ns < 0)
1859 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
1861 return (rc == 0) ? 1 : 0;
1864 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
1865 uint16_t n_valid, uint16_t n_invalid)
1867 struct kvm_get_htab_header *buf;
1868 size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
1869 ssize_t rc;
1871 buf = alloca(chunksize);
1872 /* This is KVM on ppc, so this is all big-endian */
1873 buf->index = index;
1874 buf->n_valid = n_valid;
1875 buf->n_invalid = n_invalid;
1877 qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
1879 rc = write(fd, buf, chunksize);
1880 if (rc < 0) {
1881 fprintf(stderr, "Error writing KVM hash table: %s\n",
1882 strerror(errno));
1883 return rc;
1885 if (rc != chunksize) {
1886 /* We should never get a short write on a single chunk */
1887 fprintf(stderr, "Short write, restoring KVM hash table\n");
1888 return -1;
1890 return 0;
1893 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
1895 return true;
1898 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
1900 return 1;
1903 int kvm_arch_on_sigbus(int code, void *addr)
1905 return 1;
1908 void kvm_arch_init_irq_routing(KVMState *s)
1912 int kvm_arch_insert_sw_breakpoint(CPUState *cpu, struct kvm_sw_breakpoint *bp)
1914 return -EINVAL;
1917 int kvm_arch_remove_sw_breakpoint(CPUState *cpu, struct kvm_sw_breakpoint *bp)
1919 return -EINVAL;
1922 int kvm_arch_insert_hw_breakpoint(target_ulong addr, target_ulong len, int type)
1924 return -EINVAL;
1927 int kvm_arch_remove_hw_breakpoint(target_ulong addr, target_ulong len, int type)
1929 return -EINVAL;
1932 void kvm_arch_remove_all_hw_breakpoints(void)
1936 void kvm_arch_update_guest_debug(CPUState *cpu, struct kvm_guest_debug *dbg)