target-ppc: Update ppc_hash64_store_hpte to support updating in-kernel htab
[qemu-kvm.git] / target-ppc / kvm.c
blobb5fff70f09b6898fb20e05f38b07b69852269219
1 /*
2 * PowerPC implementation of KVM hooks
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
7 * Authors:
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
17 #include <dirent.h>
18 #include <sys/types.h>
19 #include <sys/ioctl.h>
20 #include <sys/mman.h>
21 #include <sys/vfs.h>
23 #include <linux/kvm.h>
25 #include "qemu-common.h"
26 #include "qemu/timer.h"
27 #include "sysemu/sysemu.h"
28 #include "sysemu/kvm.h"
29 #include "kvm_ppc.h"
30 #include "cpu.h"
31 #include "sysemu/cpus.h"
32 #include "sysemu/device_tree.h"
33 #include "mmu-hash64.h"
35 #include "hw/sysbus.h"
36 #include "hw/ppc/spapr.h"
37 #include "hw/ppc/spapr_vio.h"
38 #include "sysemu/watchdog.h"
39 #include "trace.h"
41 //#define DEBUG_KVM
43 #ifdef DEBUG_KVM
44 #define DPRINTF(fmt, ...) \
45 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
46 #else
47 #define DPRINTF(fmt, ...) \
48 do { } while (0)
49 #endif
51 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
53 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
54 KVM_CAP_LAST_INFO
57 static int cap_interrupt_unset = false;
58 static int cap_interrupt_level = false;
59 static int cap_segstate;
60 static int cap_booke_sregs;
61 static int cap_ppc_smt;
62 static int cap_ppc_rma;
63 static int cap_spapr_tce;
64 static int cap_hior;
65 static int cap_one_reg;
66 static int cap_epr;
67 static int cap_ppc_watchdog;
68 static int cap_papr;
69 static int cap_htab_fd;
71 /* XXX We have a race condition where we actually have a level triggered
72 * interrupt, but the infrastructure can't expose that yet, so the guest
73 * takes but ignores it, goes to sleep and never gets notified that there's
74 * still an interrupt pending.
76 * As a quick workaround, let's just wake up again 20 ms after we injected
77 * an interrupt. That way we can assure that we're always reinjecting
78 * interrupts in case the guest swallowed them.
80 static QEMUTimer *idle_timer;
82 static void kvm_kick_cpu(void *opaque)
84 PowerPCCPU *cpu = opaque;
86 qemu_cpu_kick(CPU(cpu));
89 static int kvm_ppc_register_host_cpu_type(void);
91 int kvm_arch_init(KVMState *s)
93 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
94 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
95 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
96 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
97 cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
98 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
99 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
100 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
101 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
102 cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
103 cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
104 /* Note: we don't set cap_papr here, because this capability is
105 * only activated after this by kvmppc_set_papr() */
106 cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
108 if (!cap_interrupt_level) {
109 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
110 "VM to stall at times!\n");
113 kvm_ppc_register_host_cpu_type();
115 return 0;
118 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
120 CPUPPCState *cenv = &cpu->env;
121 CPUState *cs = CPU(cpu);
122 struct kvm_sregs sregs;
123 int ret;
125 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
126 /* What we're really trying to say is "if we're on BookE, we use
127 the native PVR for now". This is the only sane way to check
128 it though, so we potentially confuse users that they can run
129 BookE guests on BookS. Let's hope nobody dares enough :) */
130 return 0;
131 } else {
132 if (!cap_segstate) {
133 fprintf(stderr, "kvm error: missing PVR setting capability\n");
134 return -ENOSYS;
138 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
139 if (ret) {
140 return ret;
143 sregs.pvr = cenv->spr[SPR_PVR];
144 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
147 /* Set up a shared TLB array with KVM */
148 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
150 CPUPPCState *env = &cpu->env;
151 CPUState *cs = CPU(cpu);
152 struct kvm_book3e_206_tlb_params params = {};
153 struct kvm_config_tlb cfg = {};
154 struct kvm_enable_cap encap = {};
155 unsigned int entries = 0;
156 int ret, i;
158 if (!kvm_enabled() ||
159 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
160 return 0;
163 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
165 for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
166 params.tlb_sizes[i] = booke206_tlb_size(env, i);
167 params.tlb_ways[i] = booke206_tlb_ways(env, i);
168 entries += params.tlb_sizes[i];
171 assert(entries == env->nb_tlb);
172 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
174 env->tlb_dirty = true;
176 cfg.array = (uintptr_t)env->tlb.tlbm;
177 cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
178 cfg.params = (uintptr_t)&params;
179 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
181 encap.cap = KVM_CAP_SW_TLB;
182 encap.args[0] = (uintptr_t)&cfg;
184 ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &encap);
185 if (ret < 0) {
186 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
187 __func__, strerror(-ret));
188 return ret;
191 env->kvm_sw_tlb = true;
192 return 0;
196 #if defined(TARGET_PPC64)
197 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
198 struct kvm_ppc_smmu_info *info)
200 CPUPPCState *env = &cpu->env;
201 CPUState *cs = CPU(cpu);
203 memset(info, 0, sizeof(*info));
205 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
206 * need to "guess" what the supported page sizes are.
208 * For that to work we make a few assumptions:
210 * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
211 * KVM which only supports 4K and 16M pages, but supports them
212 * regardless of the backing store characteritics. We also don't
213 * support 1T segments.
215 * This is safe as if HV KVM ever supports that capability or PR
216 * KVM grows supports for more page/segment sizes, those versions
217 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
218 * will not hit this fallback
220 * - Else we are running HV KVM. This means we only support page
221 * sizes that fit in the backing store. Additionally we only
222 * advertize 64K pages if the processor is ARCH 2.06 and we assume
223 * P7 encodings for the SLB and hash table. Here too, we assume
224 * support for any newer processor will mean a kernel that
225 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
226 * this fallback.
228 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
229 /* No flags */
230 info->flags = 0;
231 info->slb_size = 64;
233 /* Standard 4k base page size segment */
234 info->sps[0].page_shift = 12;
235 info->sps[0].slb_enc = 0;
236 info->sps[0].enc[0].page_shift = 12;
237 info->sps[0].enc[0].pte_enc = 0;
239 /* Standard 16M large page size segment */
240 info->sps[1].page_shift = 24;
241 info->sps[1].slb_enc = SLB_VSID_L;
242 info->sps[1].enc[0].page_shift = 24;
243 info->sps[1].enc[0].pte_enc = 0;
244 } else {
245 int i = 0;
247 /* HV KVM has backing store size restrictions */
248 info->flags = KVM_PPC_PAGE_SIZES_REAL;
250 if (env->mmu_model & POWERPC_MMU_1TSEG) {
251 info->flags |= KVM_PPC_1T_SEGMENTS;
254 if (env->mmu_model == POWERPC_MMU_2_06) {
255 info->slb_size = 32;
256 } else {
257 info->slb_size = 64;
260 /* Standard 4k base page size segment */
261 info->sps[i].page_shift = 12;
262 info->sps[i].slb_enc = 0;
263 info->sps[i].enc[0].page_shift = 12;
264 info->sps[i].enc[0].pte_enc = 0;
265 i++;
267 /* 64K on MMU 2.06 */
268 if (env->mmu_model == POWERPC_MMU_2_06) {
269 info->sps[i].page_shift = 16;
270 info->sps[i].slb_enc = 0x110;
271 info->sps[i].enc[0].page_shift = 16;
272 info->sps[i].enc[0].pte_enc = 1;
273 i++;
276 /* Standard 16M large page size segment */
277 info->sps[i].page_shift = 24;
278 info->sps[i].slb_enc = SLB_VSID_L;
279 info->sps[i].enc[0].page_shift = 24;
280 info->sps[i].enc[0].pte_enc = 0;
284 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
286 CPUState *cs = CPU(cpu);
287 int ret;
289 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
290 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
291 if (ret == 0) {
292 return;
296 kvm_get_fallback_smmu_info(cpu, info);
299 static long getrampagesize(void)
301 struct statfs fs;
302 int ret;
304 if (!mem_path) {
305 /* guest RAM is backed by normal anonymous pages */
306 return getpagesize();
309 do {
310 ret = statfs(mem_path, &fs);
311 } while (ret != 0 && errno == EINTR);
313 if (ret != 0) {
314 fprintf(stderr, "Couldn't statfs() memory path: %s\n",
315 strerror(errno));
316 exit(1);
319 #define HUGETLBFS_MAGIC 0x958458f6
321 if (fs.f_type != HUGETLBFS_MAGIC) {
322 /* Explicit mempath, but it's ordinary pages */
323 return getpagesize();
326 /* It's hugepage, return the huge page size */
327 return fs.f_bsize;
330 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
332 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
333 return true;
336 return (1ul << shift) <= rampgsize;
339 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
341 static struct kvm_ppc_smmu_info smmu_info;
342 static bool has_smmu_info;
343 CPUPPCState *env = &cpu->env;
344 long rampagesize;
345 int iq, ik, jq, jk;
347 /* We only handle page sizes for 64-bit server guests for now */
348 if (!(env->mmu_model & POWERPC_MMU_64)) {
349 return;
352 /* Collect MMU info from kernel if not already */
353 if (!has_smmu_info) {
354 kvm_get_smmu_info(cpu, &smmu_info);
355 has_smmu_info = true;
358 rampagesize = getrampagesize();
360 /* Convert to QEMU form */
361 memset(&env->sps, 0, sizeof(env->sps));
363 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
364 struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
365 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
367 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
368 ksps->page_shift)) {
369 continue;
371 qsps->page_shift = ksps->page_shift;
372 qsps->slb_enc = ksps->slb_enc;
373 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
374 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
375 ksps->enc[jk].page_shift)) {
376 continue;
378 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
379 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
380 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
381 break;
384 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
385 break;
388 env->slb_nr = smmu_info.slb_size;
389 if (smmu_info.flags & KVM_PPC_1T_SEGMENTS) {
390 env->mmu_model |= POWERPC_MMU_1TSEG;
391 } else {
392 env->mmu_model &= ~POWERPC_MMU_1TSEG;
395 #else /* defined (TARGET_PPC64) */
397 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
401 #endif /* !defined (TARGET_PPC64) */
403 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
405 return cpu->cpu_index;
408 int kvm_arch_init_vcpu(CPUState *cs)
410 PowerPCCPU *cpu = POWERPC_CPU(cs);
411 CPUPPCState *cenv = &cpu->env;
412 int ret;
414 /* Gather server mmu info from KVM and update the CPU state */
415 kvm_fixup_page_sizes(cpu);
417 /* Synchronize sregs with kvm */
418 ret = kvm_arch_sync_sregs(cpu);
419 if (ret) {
420 return ret;
423 idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
425 /* Some targets support access to KVM's guest TLB. */
426 switch (cenv->mmu_model) {
427 case POWERPC_MMU_BOOKE206:
428 ret = kvm_booke206_tlb_init(cpu);
429 break;
430 default:
431 break;
434 return ret;
437 void kvm_arch_reset_vcpu(CPUState *cpu)
441 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
443 CPUPPCState *env = &cpu->env;
444 CPUState *cs = CPU(cpu);
445 struct kvm_dirty_tlb dirty_tlb;
446 unsigned char *bitmap;
447 int ret;
449 if (!env->kvm_sw_tlb) {
450 return;
453 bitmap = g_malloc((env->nb_tlb + 7) / 8);
454 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
456 dirty_tlb.bitmap = (uintptr_t)bitmap;
457 dirty_tlb.num_dirty = env->nb_tlb;
459 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
460 if (ret) {
461 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
462 __func__, strerror(-ret));
465 g_free(bitmap);
468 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
470 PowerPCCPU *cpu = POWERPC_CPU(cs);
471 CPUPPCState *env = &cpu->env;
472 union {
473 uint32_t u32;
474 uint64_t u64;
475 } val;
476 struct kvm_one_reg reg = {
477 .id = id,
478 .addr = (uintptr_t) &val,
480 int ret;
482 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
483 if (ret != 0) {
484 trace_kvm_failed_spr_get(spr, strerror(errno));
485 } else {
486 switch (id & KVM_REG_SIZE_MASK) {
487 case KVM_REG_SIZE_U32:
488 env->spr[spr] = val.u32;
489 break;
491 case KVM_REG_SIZE_U64:
492 env->spr[spr] = val.u64;
493 break;
495 default:
496 /* Don't handle this size yet */
497 abort();
502 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
504 PowerPCCPU *cpu = POWERPC_CPU(cs);
505 CPUPPCState *env = &cpu->env;
506 union {
507 uint32_t u32;
508 uint64_t u64;
509 } val;
510 struct kvm_one_reg reg = {
511 .id = id,
512 .addr = (uintptr_t) &val,
514 int ret;
516 switch (id & KVM_REG_SIZE_MASK) {
517 case KVM_REG_SIZE_U32:
518 val.u32 = env->spr[spr];
519 break;
521 case KVM_REG_SIZE_U64:
522 val.u64 = env->spr[spr];
523 break;
525 default:
526 /* Don't handle this size yet */
527 abort();
530 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
531 if (ret != 0) {
532 trace_kvm_failed_spr_set(spr, strerror(errno));
536 static int kvm_put_fp(CPUState *cs)
538 PowerPCCPU *cpu = POWERPC_CPU(cs);
539 CPUPPCState *env = &cpu->env;
540 struct kvm_one_reg reg;
541 int i;
542 int ret;
544 if (env->insns_flags & PPC_FLOAT) {
545 uint64_t fpscr = env->fpscr;
546 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
548 reg.id = KVM_REG_PPC_FPSCR;
549 reg.addr = (uintptr_t)&fpscr;
550 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
551 if (ret < 0) {
552 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
553 return ret;
556 for (i = 0; i < 32; i++) {
557 uint64_t vsr[2];
559 vsr[0] = float64_val(env->fpr[i]);
560 vsr[1] = env->vsr[i];
561 reg.addr = (uintptr_t) &vsr;
562 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
564 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
565 if (ret < 0) {
566 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
567 i, strerror(errno));
568 return ret;
573 if (env->insns_flags & PPC_ALTIVEC) {
574 reg.id = KVM_REG_PPC_VSCR;
575 reg.addr = (uintptr_t)&env->vscr;
576 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
577 if (ret < 0) {
578 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
579 return ret;
582 for (i = 0; i < 32; i++) {
583 reg.id = KVM_REG_PPC_VR(i);
584 reg.addr = (uintptr_t)&env->avr[i];
585 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
586 if (ret < 0) {
587 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
588 return ret;
593 return 0;
596 static int kvm_get_fp(CPUState *cs)
598 PowerPCCPU *cpu = POWERPC_CPU(cs);
599 CPUPPCState *env = &cpu->env;
600 struct kvm_one_reg reg;
601 int i;
602 int ret;
604 if (env->insns_flags & PPC_FLOAT) {
605 uint64_t fpscr;
606 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
608 reg.id = KVM_REG_PPC_FPSCR;
609 reg.addr = (uintptr_t)&fpscr;
610 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
611 if (ret < 0) {
612 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
613 return ret;
614 } else {
615 env->fpscr = fpscr;
618 for (i = 0; i < 32; i++) {
619 uint64_t vsr[2];
621 reg.addr = (uintptr_t) &vsr;
622 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
624 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
625 if (ret < 0) {
626 DPRINTF("Unable to get %s%d from KVM: %s\n",
627 vsx ? "VSR" : "FPR", i, strerror(errno));
628 return ret;
629 } else {
630 env->fpr[i] = vsr[0];
631 if (vsx) {
632 env->vsr[i] = vsr[1];
638 if (env->insns_flags & PPC_ALTIVEC) {
639 reg.id = KVM_REG_PPC_VSCR;
640 reg.addr = (uintptr_t)&env->vscr;
641 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
642 if (ret < 0) {
643 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
644 return ret;
647 for (i = 0; i < 32; i++) {
648 reg.id = KVM_REG_PPC_VR(i);
649 reg.addr = (uintptr_t)&env->avr[i];
650 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
651 if (ret < 0) {
652 DPRINTF("Unable to get VR%d from KVM: %s\n",
653 i, strerror(errno));
654 return ret;
659 return 0;
662 #if defined(TARGET_PPC64)
663 static int kvm_get_vpa(CPUState *cs)
665 PowerPCCPU *cpu = POWERPC_CPU(cs);
666 CPUPPCState *env = &cpu->env;
667 struct kvm_one_reg reg;
668 int ret;
670 reg.id = KVM_REG_PPC_VPA_ADDR;
671 reg.addr = (uintptr_t)&env->vpa_addr;
672 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
673 if (ret < 0) {
674 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
675 return ret;
678 assert((uintptr_t)&env->slb_shadow_size
679 == ((uintptr_t)&env->slb_shadow_addr + 8));
680 reg.id = KVM_REG_PPC_VPA_SLB;
681 reg.addr = (uintptr_t)&env->slb_shadow_addr;
682 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
683 if (ret < 0) {
684 DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
685 strerror(errno));
686 return ret;
689 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
690 reg.id = KVM_REG_PPC_VPA_DTL;
691 reg.addr = (uintptr_t)&env->dtl_addr;
692 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
693 if (ret < 0) {
694 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
695 strerror(errno));
696 return ret;
699 return 0;
702 static int kvm_put_vpa(CPUState *cs)
704 PowerPCCPU *cpu = POWERPC_CPU(cs);
705 CPUPPCState *env = &cpu->env;
706 struct kvm_one_reg reg;
707 int ret;
709 /* SLB shadow or DTL can't be registered unless a master VPA is
710 * registered. That means when restoring state, if a VPA *is*
711 * registered, we need to set that up first. If not, we need to
712 * deregister the others before deregistering the master VPA */
713 assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
715 if (env->vpa_addr) {
716 reg.id = KVM_REG_PPC_VPA_ADDR;
717 reg.addr = (uintptr_t)&env->vpa_addr;
718 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
719 if (ret < 0) {
720 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
721 return ret;
725 assert((uintptr_t)&env->slb_shadow_size
726 == ((uintptr_t)&env->slb_shadow_addr + 8));
727 reg.id = KVM_REG_PPC_VPA_SLB;
728 reg.addr = (uintptr_t)&env->slb_shadow_addr;
729 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
730 if (ret < 0) {
731 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
732 return ret;
735 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
736 reg.id = KVM_REG_PPC_VPA_DTL;
737 reg.addr = (uintptr_t)&env->dtl_addr;
738 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
739 if (ret < 0) {
740 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
741 strerror(errno));
742 return ret;
745 if (!env->vpa_addr) {
746 reg.id = KVM_REG_PPC_VPA_ADDR;
747 reg.addr = (uintptr_t)&env->vpa_addr;
748 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
749 if (ret < 0) {
750 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
751 return ret;
755 return 0;
757 #endif /* TARGET_PPC64 */
759 int kvm_arch_put_registers(CPUState *cs, int level)
761 PowerPCCPU *cpu = POWERPC_CPU(cs);
762 CPUPPCState *env = &cpu->env;
763 struct kvm_regs regs;
764 int ret;
765 int i;
767 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
768 if (ret < 0) {
769 return ret;
772 regs.ctr = env->ctr;
773 regs.lr = env->lr;
774 regs.xer = cpu_read_xer(env);
775 regs.msr = env->msr;
776 regs.pc = env->nip;
778 regs.srr0 = env->spr[SPR_SRR0];
779 regs.srr1 = env->spr[SPR_SRR1];
781 regs.sprg0 = env->spr[SPR_SPRG0];
782 regs.sprg1 = env->spr[SPR_SPRG1];
783 regs.sprg2 = env->spr[SPR_SPRG2];
784 regs.sprg3 = env->spr[SPR_SPRG3];
785 regs.sprg4 = env->spr[SPR_SPRG4];
786 regs.sprg5 = env->spr[SPR_SPRG5];
787 regs.sprg6 = env->spr[SPR_SPRG6];
788 regs.sprg7 = env->spr[SPR_SPRG7];
790 regs.pid = env->spr[SPR_BOOKE_PID];
792 for (i = 0;i < 32; i++)
793 regs.gpr[i] = env->gpr[i];
795 regs.cr = 0;
796 for (i = 0; i < 8; i++) {
797 regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
800 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
801 if (ret < 0)
802 return ret;
804 kvm_put_fp(cs);
806 if (env->tlb_dirty) {
807 kvm_sw_tlb_put(cpu);
808 env->tlb_dirty = false;
811 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
812 struct kvm_sregs sregs;
814 sregs.pvr = env->spr[SPR_PVR];
816 sregs.u.s.sdr1 = env->spr[SPR_SDR1];
818 /* Sync SLB */
819 #ifdef TARGET_PPC64
820 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
821 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
822 if (env->slb[i].esid & SLB_ESID_V) {
823 sregs.u.s.ppc64.slb[i].slbe |= i;
825 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
827 #endif
829 /* Sync SRs */
830 for (i = 0; i < 16; i++) {
831 sregs.u.s.ppc32.sr[i] = env->sr[i];
834 /* Sync BATs */
835 for (i = 0; i < 8; i++) {
836 /* Beware. We have to swap upper and lower bits here */
837 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
838 | env->DBAT[1][i];
839 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
840 | env->IBAT[1][i];
843 ret = kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
844 if (ret) {
845 return ret;
849 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
850 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
853 if (cap_one_reg) {
854 int i;
856 /* We deliberately ignore errors here, for kernels which have
857 * the ONE_REG calls, but don't support the specific
858 * registers, there's a reasonable chance things will still
859 * work, at least until we try to migrate. */
860 for (i = 0; i < 1024; i++) {
861 uint64_t id = env->spr_cb[i].one_reg_id;
863 if (id != 0) {
864 kvm_put_one_spr(cs, id, i);
868 #ifdef TARGET_PPC64
869 if (cap_papr) {
870 if (kvm_put_vpa(cs) < 0) {
871 DPRINTF("Warning: Unable to set VPA information to KVM\n");
874 #endif /* TARGET_PPC64 */
877 return ret;
880 int kvm_arch_get_registers(CPUState *cs)
882 PowerPCCPU *cpu = POWERPC_CPU(cs);
883 CPUPPCState *env = &cpu->env;
884 struct kvm_regs regs;
885 struct kvm_sregs sregs;
886 uint32_t cr;
887 int i, ret;
889 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
890 if (ret < 0)
891 return ret;
893 cr = regs.cr;
894 for (i = 7; i >= 0; i--) {
895 env->crf[i] = cr & 15;
896 cr >>= 4;
899 env->ctr = regs.ctr;
900 env->lr = regs.lr;
901 cpu_write_xer(env, regs.xer);
902 env->msr = regs.msr;
903 env->nip = regs.pc;
905 env->spr[SPR_SRR0] = regs.srr0;
906 env->spr[SPR_SRR1] = regs.srr1;
908 env->spr[SPR_SPRG0] = regs.sprg0;
909 env->spr[SPR_SPRG1] = regs.sprg1;
910 env->spr[SPR_SPRG2] = regs.sprg2;
911 env->spr[SPR_SPRG3] = regs.sprg3;
912 env->spr[SPR_SPRG4] = regs.sprg4;
913 env->spr[SPR_SPRG5] = regs.sprg5;
914 env->spr[SPR_SPRG6] = regs.sprg6;
915 env->spr[SPR_SPRG7] = regs.sprg7;
917 env->spr[SPR_BOOKE_PID] = regs.pid;
919 for (i = 0;i < 32; i++)
920 env->gpr[i] = regs.gpr[i];
922 kvm_get_fp(cs);
924 if (cap_booke_sregs) {
925 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
926 if (ret < 0) {
927 return ret;
930 if (sregs.u.e.features & KVM_SREGS_E_BASE) {
931 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
932 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
933 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
934 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
935 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
936 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
937 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
938 env->spr[SPR_DECR] = sregs.u.e.dec;
939 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
940 env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
941 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
944 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
945 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
946 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
947 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
948 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
949 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
952 if (sregs.u.e.features & KVM_SREGS_E_64) {
953 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
956 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
957 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
960 if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
961 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
962 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
963 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
964 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
965 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
966 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
967 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
968 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
969 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
970 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
971 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
972 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
973 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
974 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
975 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
976 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
978 if (sregs.u.e.features & KVM_SREGS_E_SPE) {
979 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
980 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
981 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
984 if (sregs.u.e.features & KVM_SREGS_E_PM) {
985 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
988 if (sregs.u.e.features & KVM_SREGS_E_PC) {
989 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
990 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
994 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
995 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
996 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
997 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
998 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
999 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1000 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1001 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1002 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1003 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1004 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1007 if (sregs.u.e.features & KVM_SREGS_EXP) {
1008 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1011 if (sregs.u.e.features & KVM_SREGS_E_PD) {
1012 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1013 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1016 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1017 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1018 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1019 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1021 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1022 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1023 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1028 if (cap_segstate) {
1029 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
1030 if (ret < 0) {
1031 return ret;
1034 if (!env->external_htab) {
1035 ppc_store_sdr1(env, sregs.u.s.sdr1);
1038 /* Sync SLB */
1039 #ifdef TARGET_PPC64
1041 * The packed SLB array we get from KVM_GET_SREGS only contains
1042 * information about valid entries. So we flush our internal
1043 * copy to get rid of stale ones, then put all valid SLB entries
1044 * back in.
1046 memset(env->slb, 0, sizeof(env->slb));
1047 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1048 target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1049 target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1051 * Only restore valid entries
1053 if (rb & SLB_ESID_V) {
1054 ppc_store_slb(env, rb, rs);
1057 #endif
1059 /* Sync SRs */
1060 for (i = 0; i < 16; i++) {
1061 env->sr[i] = sregs.u.s.ppc32.sr[i];
1064 /* Sync BATs */
1065 for (i = 0; i < 8; i++) {
1066 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1067 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1068 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1069 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1073 if (cap_hior) {
1074 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1077 if (cap_one_reg) {
1078 int i;
1080 /* We deliberately ignore errors here, for kernels which have
1081 * the ONE_REG calls, but don't support the specific
1082 * registers, there's a reasonable chance things will still
1083 * work, at least until we try to migrate. */
1084 for (i = 0; i < 1024; i++) {
1085 uint64_t id = env->spr_cb[i].one_reg_id;
1087 if (id != 0) {
1088 kvm_get_one_spr(cs, id, i);
1092 #ifdef TARGET_PPC64
1093 if (cap_papr) {
1094 if (kvm_get_vpa(cs) < 0) {
1095 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1098 #endif
1101 return 0;
1104 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1106 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1108 if (irq != PPC_INTERRUPT_EXT) {
1109 return 0;
1112 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1113 return 0;
1116 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1118 return 0;
1121 #if defined(TARGET_PPCEMB)
1122 #define PPC_INPUT_INT PPC40x_INPUT_INT
1123 #elif defined(TARGET_PPC64)
1124 #define PPC_INPUT_INT PPC970_INPUT_INT
1125 #else
1126 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1127 #endif
1129 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1131 PowerPCCPU *cpu = POWERPC_CPU(cs);
1132 CPUPPCState *env = &cpu->env;
1133 int r;
1134 unsigned irq;
1136 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1137 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1138 if (!cap_interrupt_level &&
1139 run->ready_for_interrupt_injection &&
1140 (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1141 (env->irq_input_state & (1<<PPC_INPUT_INT)))
1143 /* For now KVM disregards the 'irq' argument. However, in the
1144 * future KVM could cache it in-kernel to avoid a heavyweight exit
1145 * when reading the UIC.
1147 irq = KVM_INTERRUPT_SET;
1149 DPRINTF("injected interrupt %d\n", irq);
1150 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1151 if (r < 0) {
1152 printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1155 /* Always wake up soon in case the interrupt was level based */
1156 timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1157 (get_ticks_per_sec() / 50));
1160 /* We don't know if there are more interrupts pending after this. However,
1161 * the guest will return to userspace in the course of handling this one
1162 * anyways, so we will get a chance to deliver the rest. */
1165 void kvm_arch_post_run(CPUState *cpu, struct kvm_run *run)
1169 int kvm_arch_process_async_events(CPUState *cs)
1171 return cs->halted;
1174 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1176 CPUState *cs = CPU(cpu);
1177 CPUPPCState *env = &cpu->env;
1179 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1180 cs->halted = 1;
1181 env->exception_index = EXCP_HLT;
1184 return 0;
1187 /* map dcr access to existing qemu dcr emulation */
1188 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1190 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1191 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1193 return 0;
1196 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1198 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1199 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1201 return 0;
1204 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1206 PowerPCCPU *cpu = POWERPC_CPU(cs);
1207 CPUPPCState *env = &cpu->env;
1208 int ret;
1210 switch (run->exit_reason) {
1211 case KVM_EXIT_DCR:
1212 if (run->dcr.is_write) {
1213 DPRINTF("handle dcr write\n");
1214 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1215 } else {
1216 DPRINTF("handle dcr read\n");
1217 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1219 break;
1220 case KVM_EXIT_HLT:
1221 DPRINTF("handle halt\n");
1222 ret = kvmppc_handle_halt(cpu);
1223 break;
1224 #if defined(TARGET_PPC64)
1225 case KVM_EXIT_PAPR_HCALL:
1226 DPRINTF("handle PAPR hypercall\n");
1227 run->papr_hcall.ret = spapr_hypercall(cpu,
1228 run->papr_hcall.nr,
1229 run->papr_hcall.args);
1230 ret = 0;
1231 break;
1232 #endif
1233 case KVM_EXIT_EPR:
1234 DPRINTF("handle epr\n");
1235 run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1236 ret = 0;
1237 break;
1238 case KVM_EXIT_WATCHDOG:
1239 DPRINTF("handle watchdog expiry\n");
1240 watchdog_perform_action();
1241 ret = 0;
1242 break;
1244 default:
1245 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1246 ret = -1;
1247 break;
1250 return ret;
1253 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1255 CPUState *cs = CPU(cpu);
1256 uint32_t bits = tsr_bits;
1257 struct kvm_one_reg reg = {
1258 .id = KVM_REG_PPC_OR_TSR,
1259 .addr = (uintptr_t) &bits,
1262 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1265 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1268 CPUState *cs = CPU(cpu);
1269 uint32_t bits = tsr_bits;
1270 struct kvm_one_reg reg = {
1271 .id = KVM_REG_PPC_CLEAR_TSR,
1272 .addr = (uintptr_t) &bits,
1275 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1278 int kvmppc_set_tcr(PowerPCCPU *cpu)
1280 CPUState *cs = CPU(cpu);
1281 CPUPPCState *env = &cpu->env;
1282 uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1284 struct kvm_one_reg reg = {
1285 .id = KVM_REG_PPC_TCR,
1286 .addr = (uintptr_t) &tcr,
1289 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1292 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1294 CPUState *cs = CPU(cpu);
1295 struct kvm_enable_cap encap = {};
1296 int ret;
1298 if (!kvm_enabled()) {
1299 return -1;
1302 if (!cap_ppc_watchdog) {
1303 printf("warning: KVM does not support watchdog");
1304 return -1;
1307 encap.cap = KVM_CAP_PPC_BOOKE_WATCHDOG;
1308 ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &encap);
1309 if (ret < 0) {
1310 fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1311 __func__, strerror(-ret));
1312 return ret;
1315 return ret;
1318 static int read_cpuinfo(const char *field, char *value, int len)
1320 FILE *f;
1321 int ret = -1;
1322 int field_len = strlen(field);
1323 char line[512];
1325 f = fopen("/proc/cpuinfo", "r");
1326 if (!f) {
1327 return -1;
1330 do {
1331 if(!fgets(line, sizeof(line), f)) {
1332 break;
1334 if (!strncmp(line, field, field_len)) {
1335 pstrcpy(value, len, line);
1336 ret = 0;
1337 break;
1339 } while(*line);
1341 fclose(f);
1343 return ret;
1346 uint32_t kvmppc_get_tbfreq(void)
1348 char line[512];
1349 char *ns;
1350 uint32_t retval = get_ticks_per_sec();
1352 if (read_cpuinfo("timebase", line, sizeof(line))) {
1353 return retval;
1356 if (!(ns = strchr(line, ':'))) {
1357 return retval;
1360 ns++;
1362 retval = atoi(ns);
1363 return retval;
1366 /* Try to find a device tree node for a CPU with clock-frequency property */
1367 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1369 struct dirent *dirp;
1370 DIR *dp;
1372 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1373 printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1374 return -1;
1377 buf[0] = '\0';
1378 while ((dirp = readdir(dp)) != NULL) {
1379 FILE *f;
1380 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1381 dirp->d_name);
1382 f = fopen(buf, "r");
1383 if (f) {
1384 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1385 fclose(f);
1386 break;
1388 buf[0] = '\0';
1390 closedir(dp);
1391 if (buf[0] == '\0') {
1392 printf("Unknown host!\n");
1393 return -1;
1396 return 0;
1399 /* Read a CPU node property from the host device tree that's a single
1400 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1401 * (can't find or open the property, or doesn't understand the
1402 * format) */
1403 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1405 char buf[PATH_MAX];
1406 union {
1407 uint32_t v32;
1408 uint64_t v64;
1409 } u;
1410 FILE *f;
1411 int len;
1413 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1414 return -1;
1417 strncat(buf, "/", sizeof(buf) - strlen(buf));
1418 strncat(buf, propname, sizeof(buf) - strlen(buf));
1420 f = fopen(buf, "rb");
1421 if (!f) {
1422 return -1;
1425 len = fread(&u, 1, sizeof(u), f);
1426 fclose(f);
1427 switch (len) {
1428 case 4:
1429 /* property is a 32-bit quantity */
1430 return be32_to_cpu(u.v32);
1431 case 8:
1432 return be64_to_cpu(u.v64);
1435 return 0;
1438 uint64_t kvmppc_get_clockfreq(void)
1440 return kvmppc_read_int_cpu_dt("clock-frequency");
1443 uint32_t kvmppc_get_vmx(void)
1445 return kvmppc_read_int_cpu_dt("ibm,vmx");
1448 uint32_t kvmppc_get_dfp(void)
1450 return kvmppc_read_int_cpu_dt("ibm,dfp");
1453 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1455 PowerPCCPU *cpu = ppc_env_get_cpu(env);
1456 CPUState *cs = CPU(cpu);
1458 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1459 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1460 return 0;
1463 return 1;
1466 int kvmppc_get_hasidle(CPUPPCState *env)
1468 struct kvm_ppc_pvinfo pvinfo;
1470 if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1471 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1472 return 1;
1475 return 0;
1478 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1480 uint32_t *hc = (uint32_t*)buf;
1481 struct kvm_ppc_pvinfo pvinfo;
1483 if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1484 memcpy(buf, pvinfo.hcall, buf_len);
1485 return 0;
1489 * Fallback to always fail hypercalls:
1491 * li r3, -1
1492 * nop
1493 * nop
1494 * nop
1497 hc[0] = 0x3860ffff;
1498 hc[1] = 0x60000000;
1499 hc[2] = 0x60000000;
1500 hc[3] = 0x60000000;
1502 return 0;
1505 void kvmppc_set_papr(PowerPCCPU *cpu)
1507 CPUPPCState *env = &cpu->env;
1508 CPUState *cs = CPU(cpu);
1509 struct kvm_enable_cap cap = {};
1510 int ret;
1512 cap.cap = KVM_CAP_PPC_PAPR;
1513 ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &cap);
1515 if (ret) {
1516 cpu_abort(env, "This KVM version does not support PAPR\n");
1519 /* Update the capability flag so we sync the right information
1520 * with kvm */
1521 cap_papr = 1;
1524 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
1526 CPUPPCState *env = &cpu->env;
1527 CPUState *cs = CPU(cpu);
1528 struct kvm_enable_cap cap = {};
1529 int ret;
1531 cap.cap = KVM_CAP_PPC_EPR;
1532 cap.args[0] = mpic_proxy;
1533 ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &cap);
1535 if (ret && mpic_proxy) {
1536 cpu_abort(env, "This KVM version does not support EPR\n");
1540 int kvmppc_smt_threads(void)
1542 return cap_ppc_smt ? cap_ppc_smt : 1;
1545 #ifdef TARGET_PPC64
1546 off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem)
1548 void *rma;
1549 off_t size;
1550 int fd;
1551 struct kvm_allocate_rma ret;
1552 MemoryRegion *rma_region;
1554 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
1555 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
1556 * not necessary on this hardware
1557 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
1559 * FIXME: We should allow the user to force contiguous RMA
1560 * allocation in the cap_ppc_rma==1 case.
1562 if (cap_ppc_rma < 2) {
1563 return 0;
1566 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
1567 if (fd < 0) {
1568 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
1569 strerror(errno));
1570 return -1;
1573 size = MIN(ret.rma_size, 256ul << 20);
1575 rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1576 if (rma == MAP_FAILED) {
1577 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
1578 return -1;
1581 rma_region = g_new(MemoryRegion, 1);
1582 memory_region_init_ram_ptr(rma_region, NULL, name, size, rma);
1583 vmstate_register_ram_global(rma_region);
1584 memory_region_add_subregion(sysmem, 0, rma_region);
1586 return size;
1589 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
1591 struct kvm_ppc_smmu_info info;
1592 long rampagesize, best_page_shift;
1593 int i;
1595 if (cap_ppc_rma >= 2) {
1596 return current_size;
1599 /* Find the largest hardware supported page size that's less than
1600 * or equal to the (logical) backing page size of guest RAM */
1601 kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
1602 rampagesize = getrampagesize();
1603 best_page_shift = 0;
1605 for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
1606 struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
1608 if (!sps->page_shift) {
1609 continue;
1612 if ((sps->page_shift > best_page_shift)
1613 && ((1UL << sps->page_shift) <= rampagesize)) {
1614 best_page_shift = sps->page_shift;
1618 return MIN(current_size,
1619 1ULL << (best_page_shift + hash_shift - 7));
1621 #endif
1623 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd)
1625 struct kvm_create_spapr_tce args = {
1626 .liobn = liobn,
1627 .window_size = window_size,
1629 long len;
1630 int fd;
1631 void *table;
1633 /* Must set fd to -1 so we don't try to munmap when called for
1634 * destroying the table, which the upper layers -will- do
1636 *pfd = -1;
1637 if (!cap_spapr_tce) {
1638 return NULL;
1641 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
1642 if (fd < 0) {
1643 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
1644 liobn);
1645 return NULL;
1648 len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
1649 /* FIXME: round this up to page size */
1651 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1652 if (table == MAP_FAILED) {
1653 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
1654 liobn);
1655 close(fd);
1656 return NULL;
1659 *pfd = fd;
1660 return table;
1663 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t window_size)
1665 long len;
1667 if (fd < 0) {
1668 return -1;
1671 len = (window_size / SPAPR_TCE_PAGE_SIZE)*sizeof(uint64_t);
1672 if ((munmap(table, len) < 0) ||
1673 (close(fd) < 0)) {
1674 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
1675 strerror(errno));
1676 /* Leak the table */
1679 return 0;
1682 int kvmppc_reset_htab(int shift_hint)
1684 uint32_t shift = shift_hint;
1686 if (!kvm_enabled()) {
1687 /* Full emulation, tell caller to allocate htab itself */
1688 return 0;
1690 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
1691 int ret;
1692 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
1693 if (ret == -ENOTTY) {
1694 /* At least some versions of PR KVM advertise the
1695 * capability, but don't implement the ioctl(). Oops.
1696 * Return 0 so that we allocate the htab in qemu, as is
1697 * correct for PR. */
1698 return 0;
1699 } else if (ret < 0) {
1700 return ret;
1702 return shift;
1705 /* We have a kernel that predates the htab reset calls. For PR
1706 * KVM, we need to allocate the htab ourselves, for an HV KVM of
1707 * this era, it has allocated a 16MB fixed size hash table
1708 * already. Kernels of this era have the GET_PVINFO capability
1709 * only on PR, so we use this hack to determine the right
1710 * answer */
1711 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
1712 /* PR - tell caller to allocate htab */
1713 return 0;
1714 } else {
1715 /* HV - assume 16MB kernel allocated htab */
1716 return 24;
1720 static inline uint32_t mfpvr(void)
1722 uint32_t pvr;
1724 asm ("mfpvr %0"
1725 : "=r"(pvr));
1726 return pvr;
1729 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
1731 if (on) {
1732 *word |= flags;
1733 } else {
1734 *word &= ~flags;
1738 static void kvmppc_host_cpu_initfn(Object *obj)
1740 assert(kvm_enabled());
1743 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
1745 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
1746 uint32_t vmx = kvmppc_get_vmx();
1747 uint32_t dfp = kvmppc_get_dfp();
1748 uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
1749 uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
1751 /* Now fix up the class with information we can query from the host */
1752 pcc->pvr = mfpvr();
1754 if (vmx != -1) {
1755 /* Only override when we know what the host supports */
1756 alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
1757 alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
1759 if (dfp != -1) {
1760 /* Only override when we know what the host supports */
1761 alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
1764 if (dcache_size != -1) {
1765 pcc->l1_dcache_size = dcache_size;
1768 if (icache_size != -1) {
1769 pcc->l1_icache_size = icache_size;
1773 int kvmppc_fixup_cpu(PowerPCCPU *cpu)
1775 CPUState *cs = CPU(cpu);
1776 int smt;
1778 /* Adjust cpu index for SMT */
1779 smt = kvmppc_smt_threads();
1780 cs->cpu_index = (cs->cpu_index / smp_threads) * smt
1781 + (cs->cpu_index % smp_threads);
1783 return 0;
1786 bool kvmppc_has_cap_epr(void)
1788 return cap_epr;
1791 bool kvmppc_has_cap_htab_fd(void)
1793 return cap_htab_fd;
1796 static int kvm_ppc_register_host_cpu_type(void)
1798 TypeInfo type_info = {
1799 .name = TYPE_HOST_POWERPC_CPU,
1800 .instance_init = kvmppc_host_cpu_initfn,
1801 .class_init = kvmppc_host_cpu_class_init,
1803 uint32_t host_pvr = mfpvr();
1804 PowerPCCPUClass *pvr_pcc;
1806 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
1807 if (pvr_pcc == NULL) {
1808 pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
1810 if (pvr_pcc == NULL) {
1811 return -1;
1813 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
1814 type_register(&type_info);
1815 return 0;
1818 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
1820 struct kvm_rtas_token_args args = {
1821 .token = token,
1824 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
1825 return -ENOENT;
1828 strncpy(args.name, function, sizeof(args.name));
1830 return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
1833 int kvmppc_get_htab_fd(bool write)
1835 struct kvm_get_htab_fd s = {
1836 .flags = write ? KVM_GET_HTAB_WRITE : 0,
1837 .start_index = 0,
1840 if (!cap_htab_fd) {
1841 fprintf(stderr, "KVM version doesn't support saving the hash table\n");
1842 return -1;
1845 return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
1848 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
1850 int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
1851 uint8_t buf[bufsize];
1852 ssize_t rc;
1854 do {
1855 rc = read(fd, buf, bufsize);
1856 if (rc < 0) {
1857 fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
1858 strerror(errno));
1859 return rc;
1860 } else if (rc) {
1861 /* Kernel already retuns data in BE format for the file */
1862 qemu_put_buffer(f, buf, rc);
1864 } while ((rc != 0)
1865 && ((max_ns < 0)
1866 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
1868 return (rc == 0) ? 1 : 0;
1871 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
1872 uint16_t n_valid, uint16_t n_invalid)
1874 struct kvm_get_htab_header *buf;
1875 size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
1876 ssize_t rc;
1878 buf = alloca(chunksize);
1879 /* This is KVM on ppc, so this is all big-endian */
1880 buf->index = index;
1881 buf->n_valid = n_valid;
1882 buf->n_invalid = n_invalid;
1884 qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
1886 rc = write(fd, buf, chunksize);
1887 if (rc < 0) {
1888 fprintf(stderr, "Error writing KVM hash table: %s\n",
1889 strerror(errno));
1890 return rc;
1892 if (rc != chunksize) {
1893 /* We should never get a short write on a single chunk */
1894 fprintf(stderr, "Short write, restoring KVM hash table\n");
1895 return -1;
1897 return 0;
1900 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
1902 return true;
1905 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
1907 return 1;
1910 int kvm_arch_on_sigbus(int code, void *addr)
1912 return 1;
1915 void kvm_arch_init_irq_routing(KVMState *s)
1919 int kvm_arch_insert_sw_breakpoint(CPUState *cpu, struct kvm_sw_breakpoint *bp)
1921 return -EINVAL;
1924 int kvm_arch_remove_sw_breakpoint(CPUState *cpu, struct kvm_sw_breakpoint *bp)
1926 return -EINVAL;
1929 int kvm_arch_insert_hw_breakpoint(target_ulong addr, target_ulong len, int type)
1931 return -EINVAL;
1934 int kvm_arch_remove_hw_breakpoint(target_ulong addr, target_ulong len, int type)
1936 return -EINVAL;
1939 void kvm_arch_remove_all_hw_breakpoints(void)
1943 void kvm_arch_update_guest_debug(CPUState *cpu, struct kvm_guest_debug *dbg)
1947 struct kvm_get_htab_buf {
1948 struct kvm_get_htab_header header;
1950 * We require one extra byte for read
1952 target_ulong hpte[(HPTES_PER_GROUP * 2) + 1];
1955 uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index)
1957 int htab_fd;
1958 struct kvm_get_htab_fd ghf;
1959 struct kvm_get_htab_buf *hpte_buf;
1961 ghf.flags = 0;
1962 ghf.start_index = pte_index;
1963 htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
1964 if (htab_fd < 0) {
1965 goto error_out;
1968 hpte_buf = g_malloc0(sizeof(*hpte_buf));
1970 * Read the hpte group
1972 if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) {
1973 goto out_close;
1976 close(htab_fd);
1977 return (uint64_t)(uintptr_t) hpte_buf->hpte;
1979 out_close:
1980 g_free(hpte_buf);
1981 close(htab_fd);
1982 error_out:
1983 return 0;
1986 void kvmppc_hash64_free_pteg(uint64_t token)
1988 struct kvm_get_htab_buf *htab_buf;
1990 htab_buf = container_of((void *)(uintptr_t) token, struct kvm_get_htab_buf,
1991 hpte);
1992 g_free(htab_buf);
1993 return;
1996 void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
1997 target_ulong pte0, target_ulong pte1)
1999 int htab_fd;
2000 struct kvm_get_htab_fd ghf;
2001 struct kvm_get_htab_buf hpte_buf;
2003 ghf.flags = 0;
2004 ghf.start_index = 0; /* Ignored */
2005 htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2006 if (htab_fd < 0) {
2007 goto error_out;
2010 hpte_buf.header.n_valid = 1;
2011 hpte_buf.header.n_invalid = 0;
2012 hpte_buf.header.index = pte_index;
2013 hpte_buf.hpte[0] = pte0;
2014 hpte_buf.hpte[1] = pte1;
2016 * Write the hpte entry.
2017 * CAUTION: write() has the warn_unused_result attribute. Hence we
2018 * need to check the return value, even though we do nothing.
2020 if (write(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) {
2021 goto out_close;
2024 out_close:
2025 close(htab_fd);
2026 return;
2028 error_out:
2029 return;