target-arm: A64: Add 2-reg-misc REV* instructions
[qemu/ar7.git] / target-ppc / kvm.c
blob781b72f1ea5a06fb85b86b10738cee2bfe09acb6
1 /*
2 * PowerPC implementation of KVM hooks
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
7 * Authors:
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
17 #include <dirent.h>
18 #include <sys/types.h>
19 #include <sys/ioctl.h>
20 #include <sys/mman.h>
21 #include <sys/vfs.h>
23 #include <linux/kvm.h>
25 #include "qemu-common.h"
26 #include "qemu/timer.h"
27 #include "sysemu/sysemu.h"
28 #include "sysemu/kvm.h"
29 #include "kvm_ppc.h"
30 #include "cpu.h"
31 #include "sysemu/cpus.h"
32 #include "sysemu/device_tree.h"
33 #include "mmu-hash64.h"
35 #include "hw/sysbus.h"
36 #include "hw/ppc/spapr.h"
37 #include "hw/ppc/spapr_vio.h"
38 #include "sysemu/watchdog.h"
40 //#define DEBUG_KVM
42 #ifdef DEBUG_KVM
43 #define DPRINTF(fmt, ...) \
44 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
45 #else
46 #define DPRINTF(fmt, ...) \
47 do { } while (0)
48 #endif
50 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
52 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
53 KVM_CAP_LAST_INFO
56 static int cap_interrupt_unset = false;
57 static int cap_interrupt_level = false;
58 static int cap_segstate;
59 static int cap_booke_sregs;
60 static int cap_ppc_smt;
61 static int cap_ppc_rma;
62 static int cap_spapr_tce;
63 static int cap_hior;
64 static int cap_one_reg;
65 static int cap_epr;
66 static int cap_ppc_watchdog;
67 static int cap_papr;
68 static int cap_htab_fd;
70 /* XXX We have a race condition where we actually have a level triggered
71 * interrupt, but the infrastructure can't expose that yet, so the guest
72 * takes but ignores it, goes to sleep and never gets notified that there's
73 * still an interrupt pending.
75 * As a quick workaround, let's just wake up again 20 ms after we injected
76 * an interrupt. That way we can assure that we're always reinjecting
77 * interrupts in case the guest swallowed them.
79 static QEMUTimer *idle_timer;
81 static void kvm_kick_cpu(void *opaque)
83 PowerPCCPU *cpu = opaque;
85 qemu_cpu_kick(CPU(cpu));
88 static int kvm_ppc_register_host_cpu_type(void);
90 int kvm_arch_init(KVMState *s)
92 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
93 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
94 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
95 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
96 cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
97 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
98 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
99 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
100 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
101 cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
102 cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
103 /* Note: we don't set cap_papr here, because this capability is
104 * only activated after this by kvmppc_set_papr() */
105 cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
107 if (!cap_interrupt_level) {
108 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
109 "VM to stall at times!\n");
112 kvm_ppc_register_host_cpu_type();
114 return 0;
117 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
119 CPUPPCState *cenv = &cpu->env;
120 CPUState *cs = CPU(cpu);
121 struct kvm_sregs sregs;
122 int ret;
124 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
125 /* What we're really trying to say is "if we're on BookE, we use
126 the native PVR for now". This is the only sane way to check
127 it though, so we potentially confuse users that they can run
128 BookE guests on BookS. Let's hope nobody dares enough :) */
129 return 0;
130 } else {
131 if (!cap_segstate) {
132 fprintf(stderr, "kvm error: missing PVR setting capability\n");
133 return -ENOSYS;
137 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
138 if (ret) {
139 return ret;
142 sregs.pvr = cenv->spr[SPR_PVR];
143 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
146 /* Set up a shared TLB array with KVM */
147 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
149 CPUPPCState *env = &cpu->env;
150 CPUState *cs = CPU(cpu);
151 struct kvm_book3e_206_tlb_params params = {};
152 struct kvm_config_tlb cfg = {};
153 struct kvm_enable_cap encap = {};
154 unsigned int entries = 0;
155 int ret, i;
157 if (!kvm_enabled() ||
158 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
159 return 0;
162 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
164 for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
165 params.tlb_sizes[i] = booke206_tlb_size(env, i);
166 params.tlb_ways[i] = booke206_tlb_ways(env, i);
167 entries += params.tlb_sizes[i];
170 assert(entries == env->nb_tlb);
171 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
173 env->tlb_dirty = true;
175 cfg.array = (uintptr_t)env->tlb.tlbm;
176 cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
177 cfg.params = (uintptr_t)&params;
178 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
180 encap.cap = KVM_CAP_SW_TLB;
181 encap.args[0] = (uintptr_t)&cfg;
183 ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &encap);
184 if (ret < 0) {
185 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
186 __func__, strerror(-ret));
187 return ret;
190 env->kvm_sw_tlb = true;
191 return 0;
195 #if defined(TARGET_PPC64)
196 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
197 struct kvm_ppc_smmu_info *info)
199 CPUPPCState *env = &cpu->env;
200 CPUState *cs = CPU(cpu);
202 memset(info, 0, sizeof(*info));
204 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
205 * need to "guess" what the supported page sizes are.
207 * For that to work we make a few assumptions:
209 * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
210 * KVM which only supports 4K and 16M pages, but supports them
211 * regardless of the backing store characteritics. We also don't
212 * support 1T segments.
214 * This is safe as if HV KVM ever supports that capability or PR
215 * KVM grows supports for more page/segment sizes, those versions
216 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
217 * will not hit this fallback
219 * - Else we are running HV KVM. This means we only support page
220 * sizes that fit in the backing store. Additionally we only
221 * advertize 64K pages if the processor is ARCH 2.06 and we assume
222 * P7 encodings for the SLB and hash table. Here too, we assume
223 * support for any newer processor will mean a kernel that
224 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
225 * this fallback.
227 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
228 /* No flags */
229 info->flags = 0;
230 info->slb_size = 64;
232 /* Standard 4k base page size segment */
233 info->sps[0].page_shift = 12;
234 info->sps[0].slb_enc = 0;
235 info->sps[0].enc[0].page_shift = 12;
236 info->sps[0].enc[0].pte_enc = 0;
238 /* Standard 16M large page size segment */
239 info->sps[1].page_shift = 24;
240 info->sps[1].slb_enc = SLB_VSID_L;
241 info->sps[1].enc[0].page_shift = 24;
242 info->sps[1].enc[0].pte_enc = 0;
243 } else {
244 int i = 0;
246 /* HV KVM has backing store size restrictions */
247 info->flags = KVM_PPC_PAGE_SIZES_REAL;
249 if (env->mmu_model & POWERPC_MMU_1TSEG) {
250 info->flags |= KVM_PPC_1T_SEGMENTS;
253 if (env->mmu_model == POWERPC_MMU_2_06) {
254 info->slb_size = 32;
255 } else {
256 info->slb_size = 64;
259 /* Standard 4k base page size segment */
260 info->sps[i].page_shift = 12;
261 info->sps[i].slb_enc = 0;
262 info->sps[i].enc[0].page_shift = 12;
263 info->sps[i].enc[0].pte_enc = 0;
264 i++;
266 /* 64K on MMU 2.06 */
267 if (env->mmu_model == POWERPC_MMU_2_06) {
268 info->sps[i].page_shift = 16;
269 info->sps[i].slb_enc = 0x110;
270 info->sps[i].enc[0].page_shift = 16;
271 info->sps[i].enc[0].pte_enc = 1;
272 i++;
275 /* Standard 16M large page size segment */
276 info->sps[i].page_shift = 24;
277 info->sps[i].slb_enc = SLB_VSID_L;
278 info->sps[i].enc[0].page_shift = 24;
279 info->sps[i].enc[0].pte_enc = 0;
283 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
285 CPUState *cs = CPU(cpu);
286 int ret;
288 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
289 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
290 if (ret == 0) {
291 return;
295 kvm_get_fallback_smmu_info(cpu, info);
298 static long getrampagesize(void)
300 struct statfs fs;
301 int ret;
303 if (!mem_path) {
304 /* guest RAM is backed by normal anonymous pages */
305 return getpagesize();
308 do {
309 ret = statfs(mem_path, &fs);
310 } while (ret != 0 && errno == EINTR);
312 if (ret != 0) {
313 fprintf(stderr, "Couldn't statfs() memory path: %s\n",
314 strerror(errno));
315 exit(1);
318 #define HUGETLBFS_MAGIC 0x958458f6
320 if (fs.f_type != HUGETLBFS_MAGIC) {
321 /* Explicit mempath, but it's ordinary pages */
322 return getpagesize();
325 /* It's hugepage, return the huge page size */
326 return fs.f_bsize;
329 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
331 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
332 return true;
335 return (1ul << shift) <= rampgsize;
338 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
340 static struct kvm_ppc_smmu_info smmu_info;
341 static bool has_smmu_info;
342 CPUPPCState *env = &cpu->env;
343 long rampagesize;
344 int iq, ik, jq, jk;
346 /* We only handle page sizes for 64-bit server guests for now */
347 if (!(env->mmu_model & POWERPC_MMU_64)) {
348 return;
351 /* Collect MMU info from kernel if not already */
352 if (!has_smmu_info) {
353 kvm_get_smmu_info(cpu, &smmu_info);
354 has_smmu_info = true;
357 rampagesize = getrampagesize();
359 /* Convert to QEMU form */
360 memset(&env->sps, 0, sizeof(env->sps));
362 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
363 struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
364 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
366 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
367 ksps->page_shift)) {
368 continue;
370 qsps->page_shift = ksps->page_shift;
371 qsps->slb_enc = ksps->slb_enc;
372 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
373 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
374 ksps->enc[jk].page_shift)) {
375 continue;
377 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
378 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
379 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
380 break;
383 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
384 break;
387 env->slb_nr = smmu_info.slb_size;
388 if (smmu_info.flags & KVM_PPC_1T_SEGMENTS) {
389 env->mmu_model |= POWERPC_MMU_1TSEG;
390 } else {
391 env->mmu_model &= ~POWERPC_MMU_1TSEG;
394 #else /* defined (TARGET_PPC64) */
396 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
400 #endif /* !defined (TARGET_PPC64) */
402 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
404 return cpu->cpu_index;
407 int kvm_arch_init_vcpu(CPUState *cs)
409 PowerPCCPU *cpu = POWERPC_CPU(cs);
410 CPUPPCState *cenv = &cpu->env;
411 int ret;
413 /* Gather server mmu info from KVM and update the CPU state */
414 kvm_fixup_page_sizes(cpu);
416 /* Synchronize sregs with kvm */
417 ret = kvm_arch_sync_sregs(cpu);
418 if (ret) {
419 return ret;
422 idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
424 /* Some targets support access to KVM's guest TLB. */
425 switch (cenv->mmu_model) {
426 case POWERPC_MMU_BOOKE206:
427 ret = kvm_booke206_tlb_init(cpu);
428 break;
429 default:
430 break;
433 return ret;
436 void kvm_arch_reset_vcpu(CPUState *cpu)
440 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
442 CPUPPCState *env = &cpu->env;
443 CPUState *cs = CPU(cpu);
444 struct kvm_dirty_tlb dirty_tlb;
445 unsigned char *bitmap;
446 int ret;
448 if (!env->kvm_sw_tlb) {
449 return;
452 bitmap = g_malloc((env->nb_tlb + 7) / 8);
453 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
455 dirty_tlb.bitmap = (uintptr_t)bitmap;
456 dirty_tlb.num_dirty = env->nb_tlb;
458 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
459 if (ret) {
460 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
461 __func__, strerror(-ret));
464 g_free(bitmap);
467 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
469 PowerPCCPU *cpu = POWERPC_CPU(cs);
470 CPUPPCState *env = &cpu->env;
471 union {
472 uint32_t u32;
473 uint64_t u64;
474 } val;
475 struct kvm_one_reg reg = {
476 .id = id,
477 .addr = (uintptr_t) &val,
479 int ret;
481 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
482 if (ret != 0) {
483 fprintf(stderr, "Warning: Unable to retrieve SPR %d from KVM: %s\n",
484 spr, strerror(errno));
485 } else {
486 switch (id & KVM_REG_SIZE_MASK) {
487 case KVM_REG_SIZE_U32:
488 env->spr[spr] = val.u32;
489 break;
491 case KVM_REG_SIZE_U64:
492 env->spr[spr] = val.u64;
493 break;
495 default:
496 /* Don't handle this size yet */
497 abort();
502 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
504 PowerPCCPU *cpu = POWERPC_CPU(cs);
505 CPUPPCState *env = &cpu->env;
506 union {
507 uint32_t u32;
508 uint64_t u64;
509 } val;
510 struct kvm_one_reg reg = {
511 .id = id,
512 .addr = (uintptr_t) &val,
514 int ret;
516 switch (id & KVM_REG_SIZE_MASK) {
517 case KVM_REG_SIZE_U32:
518 val.u32 = env->spr[spr];
519 break;
521 case KVM_REG_SIZE_U64:
522 val.u64 = env->spr[spr];
523 break;
525 default:
526 /* Don't handle this size yet */
527 abort();
530 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
531 if (ret != 0) {
532 fprintf(stderr, "Warning: Unable to set SPR %d to KVM: %s\n",
533 spr, strerror(errno));
537 static int kvm_put_fp(CPUState *cs)
539 PowerPCCPU *cpu = POWERPC_CPU(cs);
540 CPUPPCState *env = &cpu->env;
541 struct kvm_one_reg reg;
542 int i;
543 int ret;
545 if (env->insns_flags & PPC_FLOAT) {
546 uint64_t fpscr = env->fpscr;
547 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
549 reg.id = KVM_REG_PPC_FPSCR;
550 reg.addr = (uintptr_t)&fpscr;
551 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
552 if (ret < 0) {
553 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
554 return ret;
557 for (i = 0; i < 32; i++) {
558 uint64_t vsr[2];
560 vsr[0] = float64_val(env->fpr[i]);
561 vsr[1] = env->vsr[i];
562 reg.addr = (uintptr_t) &vsr;
563 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
565 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
566 if (ret < 0) {
567 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
568 i, strerror(errno));
569 return ret;
574 if (env->insns_flags & PPC_ALTIVEC) {
575 reg.id = KVM_REG_PPC_VSCR;
576 reg.addr = (uintptr_t)&env->vscr;
577 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
578 if (ret < 0) {
579 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
580 return ret;
583 for (i = 0; i < 32; i++) {
584 reg.id = KVM_REG_PPC_VR(i);
585 reg.addr = (uintptr_t)&env->avr[i];
586 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
587 if (ret < 0) {
588 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
589 return ret;
594 return 0;
597 static int kvm_get_fp(CPUState *cs)
599 PowerPCCPU *cpu = POWERPC_CPU(cs);
600 CPUPPCState *env = &cpu->env;
601 struct kvm_one_reg reg;
602 int i;
603 int ret;
605 if (env->insns_flags & PPC_FLOAT) {
606 uint64_t fpscr;
607 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
609 reg.id = KVM_REG_PPC_FPSCR;
610 reg.addr = (uintptr_t)&fpscr;
611 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
612 if (ret < 0) {
613 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
614 return ret;
615 } else {
616 env->fpscr = fpscr;
619 for (i = 0; i < 32; i++) {
620 uint64_t vsr[2];
622 reg.addr = (uintptr_t) &vsr;
623 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
625 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
626 if (ret < 0) {
627 DPRINTF("Unable to get %s%d from KVM: %s\n",
628 vsx ? "VSR" : "FPR", i, strerror(errno));
629 return ret;
630 } else {
631 env->fpr[i] = vsr[0];
632 if (vsx) {
633 env->vsr[i] = vsr[1];
639 if (env->insns_flags & PPC_ALTIVEC) {
640 reg.id = KVM_REG_PPC_VSCR;
641 reg.addr = (uintptr_t)&env->vscr;
642 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
643 if (ret < 0) {
644 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
645 return ret;
648 for (i = 0; i < 32; i++) {
649 reg.id = KVM_REG_PPC_VR(i);
650 reg.addr = (uintptr_t)&env->avr[i];
651 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
652 if (ret < 0) {
653 DPRINTF("Unable to get VR%d from KVM: %s\n",
654 i, strerror(errno));
655 return ret;
660 return 0;
663 #if defined(TARGET_PPC64)
664 static int kvm_get_vpa(CPUState *cs)
666 PowerPCCPU *cpu = POWERPC_CPU(cs);
667 CPUPPCState *env = &cpu->env;
668 struct kvm_one_reg reg;
669 int ret;
671 reg.id = KVM_REG_PPC_VPA_ADDR;
672 reg.addr = (uintptr_t)&env->vpa_addr;
673 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
674 if (ret < 0) {
675 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
676 return ret;
679 assert((uintptr_t)&env->slb_shadow_size
680 == ((uintptr_t)&env->slb_shadow_addr + 8));
681 reg.id = KVM_REG_PPC_VPA_SLB;
682 reg.addr = (uintptr_t)&env->slb_shadow_addr;
683 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
684 if (ret < 0) {
685 DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
686 strerror(errno));
687 return ret;
690 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
691 reg.id = KVM_REG_PPC_VPA_DTL;
692 reg.addr = (uintptr_t)&env->dtl_addr;
693 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
694 if (ret < 0) {
695 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
696 strerror(errno));
697 return ret;
700 return 0;
703 static int kvm_put_vpa(CPUState *cs)
705 PowerPCCPU *cpu = POWERPC_CPU(cs);
706 CPUPPCState *env = &cpu->env;
707 struct kvm_one_reg reg;
708 int ret;
710 /* SLB shadow or DTL can't be registered unless a master VPA is
711 * registered. That means when restoring state, if a VPA *is*
712 * registered, we need to set that up first. If not, we need to
713 * deregister the others before deregistering the master VPA */
714 assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
716 if (env->vpa_addr) {
717 reg.id = KVM_REG_PPC_VPA_ADDR;
718 reg.addr = (uintptr_t)&env->vpa_addr;
719 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
720 if (ret < 0) {
721 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
722 return ret;
726 assert((uintptr_t)&env->slb_shadow_size
727 == ((uintptr_t)&env->slb_shadow_addr + 8));
728 reg.id = KVM_REG_PPC_VPA_SLB;
729 reg.addr = (uintptr_t)&env->slb_shadow_addr;
730 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
731 if (ret < 0) {
732 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
733 return ret;
736 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
737 reg.id = KVM_REG_PPC_VPA_DTL;
738 reg.addr = (uintptr_t)&env->dtl_addr;
739 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
740 if (ret < 0) {
741 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
742 strerror(errno));
743 return ret;
746 if (!env->vpa_addr) {
747 reg.id = KVM_REG_PPC_VPA_ADDR;
748 reg.addr = (uintptr_t)&env->vpa_addr;
749 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
750 if (ret < 0) {
751 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
752 return ret;
756 return 0;
758 #endif /* TARGET_PPC64 */
760 int kvm_arch_put_registers(CPUState *cs, int level)
762 PowerPCCPU *cpu = POWERPC_CPU(cs);
763 CPUPPCState *env = &cpu->env;
764 struct kvm_regs regs;
765 int ret;
766 int i;
768 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
769 if (ret < 0) {
770 return ret;
773 regs.ctr = env->ctr;
774 regs.lr = env->lr;
775 regs.xer = cpu_read_xer(env);
776 regs.msr = env->msr;
777 regs.pc = env->nip;
779 regs.srr0 = env->spr[SPR_SRR0];
780 regs.srr1 = env->spr[SPR_SRR1];
782 regs.sprg0 = env->spr[SPR_SPRG0];
783 regs.sprg1 = env->spr[SPR_SPRG1];
784 regs.sprg2 = env->spr[SPR_SPRG2];
785 regs.sprg3 = env->spr[SPR_SPRG3];
786 regs.sprg4 = env->spr[SPR_SPRG4];
787 regs.sprg5 = env->spr[SPR_SPRG5];
788 regs.sprg6 = env->spr[SPR_SPRG6];
789 regs.sprg7 = env->spr[SPR_SPRG7];
791 regs.pid = env->spr[SPR_BOOKE_PID];
793 for (i = 0;i < 32; i++)
794 regs.gpr[i] = env->gpr[i];
796 regs.cr = 0;
797 for (i = 0; i < 8; i++) {
798 regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
801 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
802 if (ret < 0)
803 return ret;
805 kvm_put_fp(cs);
807 if (env->tlb_dirty) {
808 kvm_sw_tlb_put(cpu);
809 env->tlb_dirty = false;
812 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
813 struct kvm_sregs sregs;
815 sregs.pvr = env->spr[SPR_PVR];
817 sregs.u.s.sdr1 = env->spr[SPR_SDR1];
819 /* Sync SLB */
820 #ifdef TARGET_PPC64
821 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
822 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
823 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
825 #endif
827 /* Sync SRs */
828 for (i = 0; i < 16; i++) {
829 sregs.u.s.ppc32.sr[i] = env->sr[i];
832 /* Sync BATs */
833 for (i = 0; i < 8; i++) {
834 /* Beware. We have to swap upper and lower bits here */
835 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
836 | env->DBAT[1][i];
837 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
838 | env->IBAT[1][i];
841 ret = kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
842 if (ret) {
843 return ret;
847 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
848 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
851 if (cap_one_reg) {
852 int i;
854 /* We deliberately ignore errors here, for kernels which have
855 * the ONE_REG calls, but don't support the specific
856 * registers, there's a reasonable chance things will still
857 * work, at least until we try to migrate. */
858 for (i = 0; i < 1024; i++) {
859 uint64_t id = env->spr_cb[i].one_reg_id;
861 if (id != 0) {
862 kvm_put_one_spr(cs, id, i);
866 #ifdef TARGET_PPC64
867 if (cap_papr) {
868 if (kvm_put_vpa(cs) < 0) {
869 DPRINTF("Warning: Unable to set VPA information to KVM\n");
872 #endif /* TARGET_PPC64 */
875 return ret;
878 int kvm_arch_get_registers(CPUState *cs)
880 PowerPCCPU *cpu = POWERPC_CPU(cs);
881 CPUPPCState *env = &cpu->env;
882 struct kvm_regs regs;
883 struct kvm_sregs sregs;
884 uint32_t cr;
885 int i, ret;
887 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
888 if (ret < 0)
889 return ret;
891 cr = regs.cr;
892 for (i = 7; i >= 0; i--) {
893 env->crf[i] = cr & 15;
894 cr >>= 4;
897 env->ctr = regs.ctr;
898 env->lr = regs.lr;
899 cpu_write_xer(env, regs.xer);
900 env->msr = regs.msr;
901 env->nip = regs.pc;
903 env->spr[SPR_SRR0] = regs.srr0;
904 env->spr[SPR_SRR1] = regs.srr1;
906 env->spr[SPR_SPRG0] = regs.sprg0;
907 env->spr[SPR_SPRG1] = regs.sprg1;
908 env->spr[SPR_SPRG2] = regs.sprg2;
909 env->spr[SPR_SPRG3] = regs.sprg3;
910 env->spr[SPR_SPRG4] = regs.sprg4;
911 env->spr[SPR_SPRG5] = regs.sprg5;
912 env->spr[SPR_SPRG6] = regs.sprg6;
913 env->spr[SPR_SPRG7] = regs.sprg7;
915 env->spr[SPR_BOOKE_PID] = regs.pid;
917 for (i = 0;i < 32; i++)
918 env->gpr[i] = regs.gpr[i];
920 kvm_get_fp(cs);
922 if (cap_booke_sregs) {
923 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
924 if (ret < 0) {
925 return ret;
928 if (sregs.u.e.features & KVM_SREGS_E_BASE) {
929 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
930 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
931 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
932 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
933 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
934 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
935 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
936 env->spr[SPR_DECR] = sregs.u.e.dec;
937 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
938 env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
939 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
942 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
943 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
944 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
945 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
946 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
947 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
950 if (sregs.u.e.features & KVM_SREGS_E_64) {
951 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
954 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
955 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
958 if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
959 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
960 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
961 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
962 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
963 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
964 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
965 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
966 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
967 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
968 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
969 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
970 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
971 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
972 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
973 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
974 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
976 if (sregs.u.e.features & KVM_SREGS_E_SPE) {
977 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
978 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
979 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
982 if (sregs.u.e.features & KVM_SREGS_E_PM) {
983 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
986 if (sregs.u.e.features & KVM_SREGS_E_PC) {
987 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
988 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
992 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
993 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
994 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
995 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
996 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
997 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
998 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
999 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1000 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1001 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1002 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1005 if (sregs.u.e.features & KVM_SREGS_EXP) {
1006 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1009 if (sregs.u.e.features & KVM_SREGS_E_PD) {
1010 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1011 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1014 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1015 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1016 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1017 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1019 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1020 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1021 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1026 if (cap_segstate) {
1027 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
1028 if (ret < 0) {
1029 return ret;
1032 ppc_store_sdr1(env, sregs.u.s.sdr1);
1034 /* Sync SLB */
1035 #ifdef TARGET_PPC64
1037 * The packed SLB array we get from KVM_GET_SREGS only contains
1038 * information about valid entries. So we flush our internal
1039 * copy to get rid of stale ones, then put all valid SLB entries
1040 * back in.
1042 memset(env->slb, 0, sizeof(env->slb));
1043 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1044 target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1045 target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1047 * Only restore valid entries
1049 if (rb & SLB_ESID_V) {
1050 ppc_store_slb(env, rb, rs);
1053 #endif
1055 /* Sync SRs */
1056 for (i = 0; i < 16; i++) {
1057 env->sr[i] = sregs.u.s.ppc32.sr[i];
1060 /* Sync BATs */
1061 for (i = 0; i < 8; i++) {
1062 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1063 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1064 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1065 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1069 if (cap_hior) {
1070 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1073 if (cap_one_reg) {
1074 int i;
1076 /* We deliberately ignore errors here, for kernels which have
1077 * the ONE_REG calls, but don't support the specific
1078 * registers, there's a reasonable chance things will still
1079 * work, at least until we try to migrate. */
1080 for (i = 0; i < 1024; i++) {
1081 uint64_t id = env->spr_cb[i].one_reg_id;
1083 if (id != 0) {
1084 kvm_get_one_spr(cs, id, i);
1088 #ifdef TARGET_PPC64
1089 if (cap_papr) {
1090 if (kvm_get_vpa(cs) < 0) {
1091 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1094 #endif
1097 return 0;
1100 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1102 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1104 if (irq != PPC_INTERRUPT_EXT) {
1105 return 0;
1108 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1109 return 0;
1112 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1114 return 0;
1117 #if defined(TARGET_PPCEMB)
1118 #define PPC_INPUT_INT PPC40x_INPUT_INT
1119 #elif defined(TARGET_PPC64)
1120 #define PPC_INPUT_INT PPC970_INPUT_INT
1121 #else
1122 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1123 #endif
1125 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1127 PowerPCCPU *cpu = POWERPC_CPU(cs);
1128 CPUPPCState *env = &cpu->env;
1129 int r;
1130 unsigned irq;
1132 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1133 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1134 if (!cap_interrupt_level &&
1135 run->ready_for_interrupt_injection &&
1136 (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1137 (env->irq_input_state & (1<<PPC_INPUT_INT)))
1139 /* For now KVM disregards the 'irq' argument. However, in the
1140 * future KVM could cache it in-kernel to avoid a heavyweight exit
1141 * when reading the UIC.
1143 irq = KVM_INTERRUPT_SET;
1145 DPRINTF("injected interrupt %d\n", irq);
1146 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1147 if (r < 0) {
1148 printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1151 /* Always wake up soon in case the interrupt was level based */
1152 timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1153 (get_ticks_per_sec() / 50));
1156 /* We don't know if there are more interrupts pending after this. However,
1157 * the guest will return to userspace in the course of handling this one
1158 * anyways, so we will get a chance to deliver the rest. */
1161 void kvm_arch_post_run(CPUState *cpu, struct kvm_run *run)
1165 int kvm_arch_process_async_events(CPUState *cs)
1167 return cs->halted;
1170 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1172 CPUState *cs = CPU(cpu);
1173 CPUPPCState *env = &cpu->env;
1175 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1176 cs->halted = 1;
1177 env->exception_index = EXCP_HLT;
1180 return 0;
1183 /* map dcr access to existing qemu dcr emulation */
1184 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1186 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1187 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1189 return 0;
1192 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1194 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1195 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1197 return 0;
1200 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1202 PowerPCCPU *cpu = POWERPC_CPU(cs);
1203 CPUPPCState *env = &cpu->env;
1204 int ret;
1206 switch (run->exit_reason) {
1207 case KVM_EXIT_DCR:
1208 if (run->dcr.is_write) {
1209 DPRINTF("handle dcr write\n");
1210 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1211 } else {
1212 DPRINTF("handle dcr read\n");
1213 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1215 break;
1216 case KVM_EXIT_HLT:
1217 DPRINTF("handle halt\n");
1218 ret = kvmppc_handle_halt(cpu);
1219 break;
1220 #if defined(TARGET_PPC64)
1221 case KVM_EXIT_PAPR_HCALL:
1222 DPRINTF("handle PAPR hypercall\n");
1223 run->papr_hcall.ret = spapr_hypercall(cpu,
1224 run->papr_hcall.nr,
1225 run->papr_hcall.args);
1226 ret = 0;
1227 break;
1228 #endif
1229 case KVM_EXIT_EPR:
1230 DPRINTF("handle epr\n");
1231 run->epr.epr = ldl_phys(env->mpic_iack);
1232 ret = 0;
1233 break;
1234 case KVM_EXIT_WATCHDOG:
1235 DPRINTF("handle watchdog expiry\n");
1236 watchdog_perform_action();
1237 ret = 0;
1238 break;
1240 default:
1241 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1242 ret = -1;
1243 break;
1246 return ret;
1249 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1251 CPUState *cs = CPU(cpu);
1252 uint32_t bits = tsr_bits;
1253 struct kvm_one_reg reg = {
1254 .id = KVM_REG_PPC_OR_TSR,
1255 .addr = (uintptr_t) &bits,
1258 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1261 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1264 CPUState *cs = CPU(cpu);
1265 uint32_t bits = tsr_bits;
1266 struct kvm_one_reg reg = {
1267 .id = KVM_REG_PPC_CLEAR_TSR,
1268 .addr = (uintptr_t) &bits,
1271 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1274 int kvmppc_set_tcr(PowerPCCPU *cpu)
1276 CPUState *cs = CPU(cpu);
1277 CPUPPCState *env = &cpu->env;
1278 uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1280 struct kvm_one_reg reg = {
1281 .id = KVM_REG_PPC_TCR,
1282 .addr = (uintptr_t) &tcr,
1285 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1288 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1290 CPUState *cs = CPU(cpu);
1291 struct kvm_enable_cap encap = {};
1292 int ret;
1294 if (!kvm_enabled()) {
1295 return -1;
1298 if (!cap_ppc_watchdog) {
1299 printf("warning: KVM does not support watchdog");
1300 return -1;
1303 encap.cap = KVM_CAP_PPC_BOOKE_WATCHDOG;
1304 ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &encap);
1305 if (ret < 0) {
1306 fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1307 __func__, strerror(-ret));
1308 return ret;
1311 return ret;
1314 static int read_cpuinfo(const char *field, char *value, int len)
1316 FILE *f;
1317 int ret = -1;
1318 int field_len = strlen(field);
1319 char line[512];
1321 f = fopen("/proc/cpuinfo", "r");
1322 if (!f) {
1323 return -1;
1326 do {
1327 if(!fgets(line, sizeof(line), f)) {
1328 break;
1330 if (!strncmp(line, field, field_len)) {
1331 pstrcpy(value, len, line);
1332 ret = 0;
1333 break;
1335 } while(*line);
1337 fclose(f);
1339 return ret;
1342 uint32_t kvmppc_get_tbfreq(void)
1344 char line[512];
1345 char *ns;
1346 uint32_t retval = get_ticks_per_sec();
1348 if (read_cpuinfo("timebase", line, sizeof(line))) {
1349 return retval;
1352 if (!(ns = strchr(line, ':'))) {
1353 return retval;
1356 ns++;
1358 retval = atoi(ns);
1359 return retval;
1362 /* Try to find a device tree node for a CPU with clock-frequency property */
1363 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1365 struct dirent *dirp;
1366 DIR *dp;
1368 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1369 printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1370 return -1;
1373 buf[0] = '\0';
1374 while ((dirp = readdir(dp)) != NULL) {
1375 FILE *f;
1376 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1377 dirp->d_name);
1378 f = fopen(buf, "r");
1379 if (f) {
1380 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1381 fclose(f);
1382 break;
1384 buf[0] = '\0';
1386 closedir(dp);
1387 if (buf[0] == '\0') {
1388 printf("Unknown host!\n");
1389 return -1;
1392 return 0;
1395 /* Read a CPU node property from the host device tree that's a single
1396 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1397 * (can't find or open the property, or doesn't understand the
1398 * format) */
1399 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1401 char buf[PATH_MAX];
1402 union {
1403 uint32_t v32;
1404 uint64_t v64;
1405 } u;
1406 FILE *f;
1407 int len;
1409 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1410 return -1;
1413 strncat(buf, "/", sizeof(buf) - strlen(buf));
1414 strncat(buf, propname, sizeof(buf) - strlen(buf));
1416 f = fopen(buf, "rb");
1417 if (!f) {
1418 return -1;
1421 len = fread(&u, 1, sizeof(u), f);
1422 fclose(f);
1423 switch (len) {
1424 case 4:
1425 /* property is a 32-bit quantity */
1426 return be32_to_cpu(u.v32);
1427 case 8:
1428 return be64_to_cpu(u.v64);
1431 return 0;
1434 uint64_t kvmppc_get_clockfreq(void)
1436 return kvmppc_read_int_cpu_dt("clock-frequency");
1439 uint32_t kvmppc_get_vmx(void)
1441 return kvmppc_read_int_cpu_dt("ibm,vmx");
1444 uint32_t kvmppc_get_dfp(void)
1446 return kvmppc_read_int_cpu_dt("ibm,dfp");
1449 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1451 PowerPCCPU *cpu = ppc_env_get_cpu(env);
1452 CPUState *cs = CPU(cpu);
1454 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1455 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1456 return 0;
1459 return 1;
1462 int kvmppc_get_hasidle(CPUPPCState *env)
1464 struct kvm_ppc_pvinfo pvinfo;
1466 if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1467 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1468 return 1;
1471 return 0;
1474 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1476 uint32_t *hc = (uint32_t*)buf;
1477 struct kvm_ppc_pvinfo pvinfo;
1479 if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1480 memcpy(buf, pvinfo.hcall, buf_len);
1481 return 0;
1485 * Fallback to always fail hypercalls:
1487 * li r3, -1
1488 * nop
1489 * nop
1490 * nop
1493 hc[0] = 0x3860ffff;
1494 hc[1] = 0x60000000;
1495 hc[2] = 0x60000000;
1496 hc[3] = 0x60000000;
1498 return 0;
1501 void kvmppc_set_papr(PowerPCCPU *cpu)
1503 CPUPPCState *env = &cpu->env;
1504 CPUState *cs = CPU(cpu);
1505 struct kvm_enable_cap cap = {};
1506 int ret;
1508 cap.cap = KVM_CAP_PPC_PAPR;
1509 ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &cap);
1511 if (ret) {
1512 cpu_abort(env, "This KVM version does not support PAPR\n");
1515 /* Update the capability flag so we sync the right information
1516 * with kvm */
1517 cap_papr = 1;
1520 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
1522 CPUPPCState *env = &cpu->env;
1523 CPUState *cs = CPU(cpu);
1524 struct kvm_enable_cap cap = {};
1525 int ret;
1527 cap.cap = KVM_CAP_PPC_EPR;
1528 cap.args[0] = mpic_proxy;
1529 ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &cap);
1531 if (ret && mpic_proxy) {
1532 cpu_abort(env, "This KVM version does not support EPR\n");
1536 int kvmppc_smt_threads(void)
1538 return cap_ppc_smt ? cap_ppc_smt : 1;
1541 #ifdef TARGET_PPC64
1542 off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem)
1544 void *rma;
1545 off_t size;
1546 int fd;
1547 struct kvm_allocate_rma ret;
1548 MemoryRegion *rma_region;
1550 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
1551 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
1552 * not necessary on this hardware
1553 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
1555 * FIXME: We should allow the user to force contiguous RMA
1556 * allocation in the cap_ppc_rma==1 case.
1558 if (cap_ppc_rma < 2) {
1559 return 0;
1562 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
1563 if (fd < 0) {
1564 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
1565 strerror(errno));
1566 return -1;
1569 size = MIN(ret.rma_size, 256ul << 20);
1571 rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1572 if (rma == MAP_FAILED) {
1573 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
1574 return -1;
1577 rma_region = g_new(MemoryRegion, 1);
1578 memory_region_init_ram_ptr(rma_region, NULL, name, size, rma);
1579 vmstate_register_ram_global(rma_region);
1580 memory_region_add_subregion(sysmem, 0, rma_region);
1582 return size;
1585 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
1587 struct kvm_ppc_smmu_info info;
1588 long rampagesize, best_page_shift;
1589 int i;
1591 if (cap_ppc_rma >= 2) {
1592 return current_size;
1595 /* Find the largest hardware supported page size that's less than
1596 * or equal to the (logical) backing page size of guest RAM */
1597 kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
1598 rampagesize = getrampagesize();
1599 best_page_shift = 0;
1601 for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
1602 struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
1604 if (!sps->page_shift) {
1605 continue;
1608 if ((sps->page_shift > best_page_shift)
1609 && ((1UL << sps->page_shift) <= rampagesize)) {
1610 best_page_shift = sps->page_shift;
1614 return MIN(current_size,
1615 1ULL << (best_page_shift + hash_shift - 7));
1617 #endif
1619 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd)
1621 struct kvm_create_spapr_tce args = {
1622 .liobn = liobn,
1623 .window_size = window_size,
1625 long len;
1626 int fd;
1627 void *table;
1629 /* Must set fd to -1 so we don't try to munmap when called for
1630 * destroying the table, which the upper layers -will- do
1632 *pfd = -1;
1633 if (!cap_spapr_tce) {
1634 return NULL;
1637 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
1638 if (fd < 0) {
1639 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
1640 liobn);
1641 return NULL;
1644 len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
1645 /* FIXME: round this up to page size */
1647 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1648 if (table == MAP_FAILED) {
1649 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
1650 liobn);
1651 close(fd);
1652 return NULL;
1655 *pfd = fd;
1656 return table;
1659 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t window_size)
1661 long len;
1663 if (fd < 0) {
1664 return -1;
1667 len = (window_size / SPAPR_TCE_PAGE_SIZE)*sizeof(uint64_t);
1668 if ((munmap(table, len) < 0) ||
1669 (close(fd) < 0)) {
1670 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
1671 strerror(errno));
1672 /* Leak the table */
1675 return 0;
1678 int kvmppc_reset_htab(int shift_hint)
1680 uint32_t shift = shift_hint;
1682 if (!kvm_enabled()) {
1683 /* Full emulation, tell caller to allocate htab itself */
1684 return 0;
1686 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
1687 int ret;
1688 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
1689 if (ret == -ENOTTY) {
1690 /* At least some versions of PR KVM advertise the
1691 * capability, but don't implement the ioctl(). Oops.
1692 * Return 0 so that we allocate the htab in qemu, as is
1693 * correct for PR. */
1694 return 0;
1695 } else if (ret < 0) {
1696 return ret;
1698 return shift;
1701 /* We have a kernel that predates the htab reset calls. For PR
1702 * KVM, we need to allocate the htab ourselves, for an HV KVM of
1703 * this era, it has allocated a 16MB fixed size hash table
1704 * already. Kernels of this era have the GET_PVINFO capability
1705 * only on PR, so we use this hack to determine the right
1706 * answer */
1707 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
1708 /* PR - tell caller to allocate htab */
1709 return 0;
1710 } else {
1711 /* HV - assume 16MB kernel allocated htab */
1712 return 24;
1716 static inline uint32_t mfpvr(void)
1718 uint32_t pvr;
1720 asm ("mfpvr %0"
1721 : "=r"(pvr));
1722 return pvr;
1725 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
1727 if (on) {
1728 *word |= flags;
1729 } else {
1730 *word &= ~flags;
1734 static void kvmppc_host_cpu_initfn(Object *obj)
1736 assert(kvm_enabled());
1739 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
1741 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
1742 uint32_t vmx = kvmppc_get_vmx();
1743 uint32_t dfp = kvmppc_get_dfp();
1744 uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
1745 uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
1747 /* Now fix up the class with information we can query from the host */
1748 pcc->pvr = mfpvr();
1750 if (vmx != -1) {
1751 /* Only override when we know what the host supports */
1752 alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
1753 alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
1755 if (dfp != -1) {
1756 /* Only override when we know what the host supports */
1757 alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
1760 if (dcache_size != -1) {
1761 pcc->l1_dcache_size = dcache_size;
1764 if (icache_size != -1) {
1765 pcc->l1_icache_size = icache_size;
1769 int kvmppc_fixup_cpu(PowerPCCPU *cpu)
1771 CPUState *cs = CPU(cpu);
1772 int smt;
1774 /* Adjust cpu index for SMT */
1775 smt = kvmppc_smt_threads();
1776 cs->cpu_index = (cs->cpu_index / smp_threads) * smt
1777 + (cs->cpu_index % smp_threads);
1779 return 0;
1782 bool kvmppc_has_cap_epr(void)
1784 return cap_epr;
1787 static int kvm_ppc_register_host_cpu_type(void)
1789 TypeInfo type_info = {
1790 .name = TYPE_HOST_POWERPC_CPU,
1791 .instance_init = kvmppc_host_cpu_initfn,
1792 .class_init = kvmppc_host_cpu_class_init,
1794 uint32_t host_pvr = mfpvr();
1795 PowerPCCPUClass *pvr_pcc;
1797 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
1798 if (pvr_pcc == NULL) {
1799 pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
1801 if (pvr_pcc == NULL) {
1802 return -1;
1804 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
1805 type_register(&type_info);
1806 return 0;
1809 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
1811 struct kvm_rtas_token_args args = {
1812 .token = token,
1815 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
1816 return -ENOENT;
1819 strncpy(args.name, function, sizeof(args.name));
1821 return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
1824 int kvmppc_get_htab_fd(bool write)
1826 struct kvm_get_htab_fd s = {
1827 .flags = write ? KVM_GET_HTAB_WRITE : 0,
1828 .start_index = 0,
1831 if (!cap_htab_fd) {
1832 fprintf(stderr, "KVM version doesn't support saving the hash table\n");
1833 return -1;
1836 return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
1839 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
1841 int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
1842 uint8_t buf[bufsize];
1843 ssize_t rc;
1845 do {
1846 rc = read(fd, buf, bufsize);
1847 if (rc < 0) {
1848 fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
1849 strerror(errno));
1850 return rc;
1851 } else if (rc) {
1852 /* Kernel already retuns data in BE format for the file */
1853 qemu_put_buffer(f, buf, rc);
1855 } while ((rc != 0)
1856 && ((max_ns < 0)
1857 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
1859 return (rc == 0) ? 1 : 0;
1862 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
1863 uint16_t n_valid, uint16_t n_invalid)
1865 struct kvm_get_htab_header *buf;
1866 size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
1867 ssize_t rc;
1869 buf = alloca(chunksize);
1870 /* This is KVM on ppc, so this is all big-endian */
1871 buf->index = index;
1872 buf->n_valid = n_valid;
1873 buf->n_invalid = n_invalid;
1875 qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
1877 rc = write(fd, buf, chunksize);
1878 if (rc < 0) {
1879 fprintf(stderr, "Error writing KVM hash table: %s\n",
1880 strerror(errno));
1881 return rc;
1883 if (rc != chunksize) {
1884 /* We should never get a short write on a single chunk */
1885 fprintf(stderr, "Short write, restoring KVM hash table\n");
1886 return -1;
1888 return 0;
1891 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
1893 return true;
1896 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
1898 return 1;
1901 int kvm_arch_on_sigbus(int code, void *addr)
1903 return 1;
1906 void kvm_arch_init_irq_routing(KVMState *s)
1910 int kvm_arch_insert_sw_breakpoint(CPUState *cpu, struct kvm_sw_breakpoint *bp)
1912 return -EINVAL;
1915 int kvm_arch_remove_sw_breakpoint(CPUState *cpu, struct kvm_sw_breakpoint *bp)
1917 return -EINVAL;
1920 int kvm_arch_insert_hw_breakpoint(target_ulong addr, target_ulong len, int type)
1922 return -EINVAL;
1925 int kvm_arch_remove_hw_breakpoint(target_ulong addr, target_ulong len, int type)
1927 return -EINVAL;
1930 void kvm_arch_remove_all_hw_breakpoints(void)
1934 void kvm_arch_update_guest_debug(CPUState *cpu, struct kvm_guest_debug *dbg)