PPC: SPE: Fix high-bits bitmask
[qemu.git] / target-ppc / kvm.c
blobe6a1625b6b31620c53e1c247d8300a6c05e27ddd
1 /*
2 * PowerPC implementation of KVM hooks
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
7 * Authors:
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
17 #include <dirent.h>
18 #include <sys/types.h>
19 #include <sys/ioctl.h>
20 #include <sys/mman.h>
21 #include <sys/vfs.h>
23 #include <linux/kvm.h>
25 #include "qemu-common.h"
26 #include "qemu/timer.h"
27 #include "sysemu/sysemu.h"
28 #include "sysemu/kvm.h"
29 #include "kvm_ppc.h"
30 #include "cpu.h"
31 #include "sysemu/cpus.h"
32 #include "sysemu/device_tree.h"
33 #include "mmu-hash64.h"
35 #include "hw/sysbus.h"
36 #include "hw/ppc/spapr.h"
37 #include "hw/ppc/spapr_vio.h"
38 #include "hw/ppc/ppc.h"
39 #include "sysemu/watchdog.h"
40 #include "trace.h"
42 //#define DEBUG_KVM
44 #ifdef DEBUG_KVM
45 #define DPRINTF(fmt, ...) \
46 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
47 #else
48 #define DPRINTF(fmt, ...) \
49 do { } while (0)
50 #endif
52 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
54 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
55 KVM_CAP_LAST_INFO
58 static int cap_interrupt_unset = false;
59 static int cap_interrupt_level = false;
60 static int cap_segstate;
61 static int cap_booke_sregs;
62 static int cap_ppc_smt;
63 static int cap_ppc_rma;
64 static int cap_spapr_tce;
65 static int cap_spapr_multitce;
66 static int cap_hior;
67 static int cap_one_reg;
68 static int cap_epr;
69 static int cap_ppc_watchdog;
70 static int cap_papr;
71 static int cap_htab_fd;
73 /* XXX We have a race condition where we actually have a level triggered
74 * interrupt, but the infrastructure can't expose that yet, so the guest
75 * takes but ignores it, goes to sleep and never gets notified that there's
76 * still an interrupt pending.
78 * As a quick workaround, let's just wake up again 20 ms after we injected
79 * an interrupt. That way we can assure that we're always reinjecting
80 * interrupts in case the guest swallowed them.
82 static QEMUTimer *idle_timer;
84 static void kvm_kick_cpu(void *opaque)
86 PowerPCCPU *cpu = opaque;
88 qemu_cpu_kick(CPU(cpu));
91 static int kvm_ppc_register_host_cpu_type(void);
93 int kvm_arch_init(KVMState *s)
95 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
96 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
97 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
98 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
99 cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
100 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
101 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
102 cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
103 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
104 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
105 cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
106 cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
107 /* Note: we don't set cap_papr here, because this capability is
108 * only activated after this by kvmppc_set_papr() */
109 cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
111 if (!cap_interrupt_level) {
112 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
113 "VM to stall at times!\n");
116 kvm_ppc_register_host_cpu_type();
118 return 0;
121 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
123 CPUPPCState *cenv = &cpu->env;
124 CPUState *cs = CPU(cpu);
125 struct kvm_sregs sregs;
126 int ret;
128 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
129 /* What we're really trying to say is "if we're on BookE, we use
130 the native PVR for now". This is the only sane way to check
131 it though, so we potentially confuse users that they can run
132 BookE guests on BookS. Let's hope nobody dares enough :) */
133 return 0;
134 } else {
135 if (!cap_segstate) {
136 fprintf(stderr, "kvm error: missing PVR setting capability\n");
137 return -ENOSYS;
141 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
142 if (ret) {
143 return ret;
146 sregs.pvr = cenv->spr[SPR_PVR];
147 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
150 /* Set up a shared TLB array with KVM */
151 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
153 CPUPPCState *env = &cpu->env;
154 CPUState *cs = CPU(cpu);
155 struct kvm_book3e_206_tlb_params params = {};
156 struct kvm_config_tlb cfg = {};
157 unsigned int entries = 0;
158 int ret, i;
160 if (!kvm_enabled() ||
161 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
162 return 0;
165 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
167 for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
168 params.tlb_sizes[i] = booke206_tlb_size(env, i);
169 params.tlb_ways[i] = booke206_tlb_ways(env, i);
170 entries += params.tlb_sizes[i];
173 assert(entries == env->nb_tlb);
174 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
176 env->tlb_dirty = true;
178 cfg.array = (uintptr_t)env->tlb.tlbm;
179 cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
180 cfg.params = (uintptr_t)&params;
181 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
183 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
184 if (ret < 0) {
185 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
186 __func__, strerror(-ret));
187 return ret;
190 env->kvm_sw_tlb = true;
191 return 0;
195 #if defined(TARGET_PPC64)
196 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
197 struct kvm_ppc_smmu_info *info)
199 CPUPPCState *env = &cpu->env;
200 CPUState *cs = CPU(cpu);
202 memset(info, 0, sizeof(*info));
204 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
205 * need to "guess" what the supported page sizes are.
207 * For that to work we make a few assumptions:
209 * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
210 * KVM which only supports 4K and 16M pages, but supports them
211 * regardless of the backing store characteritics. We also don't
212 * support 1T segments.
214 * This is safe as if HV KVM ever supports that capability or PR
215 * KVM grows supports for more page/segment sizes, those versions
216 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
217 * will not hit this fallback
219 * - Else we are running HV KVM. This means we only support page
220 * sizes that fit in the backing store. Additionally we only
221 * advertize 64K pages if the processor is ARCH 2.06 and we assume
222 * P7 encodings for the SLB and hash table. Here too, we assume
223 * support for any newer processor will mean a kernel that
224 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
225 * this fallback.
227 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
228 /* No flags */
229 info->flags = 0;
230 info->slb_size = 64;
232 /* Standard 4k base page size segment */
233 info->sps[0].page_shift = 12;
234 info->sps[0].slb_enc = 0;
235 info->sps[0].enc[0].page_shift = 12;
236 info->sps[0].enc[0].pte_enc = 0;
238 /* Standard 16M large page size segment */
239 info->sps[1].page_shift = 24;
240 info->sps[1].slb_enc = SLB_VSID_L;
241 info->sps[1].enc[0].page_shift = 24;
242 info->sps[1].enc[0].pte_enc = 0;
243 } else {
244 int i = 0;
246 /* HV KVM has backing store size restrictions */
247 info->flags = KVM_PPC_PAGE_SIZES_REAL;
249 if (env->mmu_model & POWERPC_MMU_1TSEG) {
250 info->flags |= KVM_PPC_1T_SEGMENTS;
253 if (env->mmu_model == POWERPC_MMU_2_06) {
254 info->slb_size = 32;
255 } else {
256 info->slb_size = 64;
259 /* Standard 4k base page size segment */
260 info->sps[i].page_shift = 12;
261 info->sps[i].slb_enc = 0;
262 info->sps[i].enc[0].page_shift = 12;
263 info->sps[i].enc[0].pte_enc = 0;
264 i++;
266 /* 64K on MMU 2.06 */
267 if (env->mmu_model == POWERPC_MMU_2_06) {
268 info->sps[i].page_shift = 16;
269 info->sps[i].slb_enc = 0x110;
270 info->sps[i].enc[0].page_shift = 16;
271 info->sps[i].enc[0].pte_enc = 1;
272 i++;
275 /* Standard 16M large page size segment */
276 info->sps[i].page_shift = 24;
277 info->sps[i].slb_enc = SLB_VSID_L;
278 info->sps[i].enc[0].page_shift = 24;
279 info->sps[i].enc[0].pte_enc = 0;
283 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
285 CPUState *cs = CPU(cpu);
286 int ret;
288 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
289 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
290 if (ret == 0) {
291 return;
295 kvm_get_fallback_smmu_info(cpu, info);
298 static long getrampagesize(void)
300 struct statfs fs;
301 int ret;
303 if (!mem_path) {
304 /* guest RAM is backed by normal anonymous pages */
305 return getpagesize();
308 do {
309 ret = statfs(mem_path, &fs);
310 } while (ret != 0 && errno == EINTR);
312 if (ret != 0) {
313 fprintf(stderr, "Couldn't statfs() memory path: %s\n",
314 strerror(errno));
315 exit(1);
318 #define HUGETLBFS_MAGIC 0x958458f6
320 if (fs.f_type != HUGETLBFS_MAGIC) {
321 /* Explicit mempath, but it's ordinary pages */
322 return getpagesize();
325 /* It's hugepage, return the huge page size */
326 return fs.f_bsize;
329 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
331 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
332 return true;
335 return (1ul << shift) <= rampgsize;
338 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
340 static struct kvm_ppc_smmu_info smmu_info;
341 static bool has_smmu_info;
342 CPUPPCState *env = &cpu->env;
343 long rampagesize;
344 int iq, ik, jq, jk;
346 /* We only handle page sizes for 64-bit server guests for now */
347 if (!(env->mmu_model & POWERPC_MMU_64)) {
348 return;
351 /* Collect MMU info from kernel if not already */
352 if (!has_smmu_info) {
353 kvm_get_smmu_info(cpu, &smmu_info);
354 has_smmu_info = true;
357 rampagesize = getrampagesize();
359 /* Convert to QEMU form */
360 memset(&env->sps, 0, sizeof(env->sps));
363 * XXX This loop should be an entry wide AND of the capabilities that
364 * the selected CPU has with the capabilities that KVM supports.
366 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
367 struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
368 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
370 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
371 ksps->page_shift)) {
372 continue;
374 qsps->page_shift = ksps->page_shift;
375 qsps->slb_enc = ksps->slb_enc;
376 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
377 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
378 ksps->enc[jk].page_shift)) {
379 continue;
381 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
382 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
383 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
384 break;
387 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
388 break;
391 env->slb_nr = smmu_info.slb_size;
392 if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
393 env->mmu_model &= ~POWERPC_MMU_1TSEG;
396 #else /* defined (TARGET_PPC64) */
398 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
402 #endif /* !defined (TARGET_PPC64) */
404 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
406 return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
409 int kvm_arch_init_vcpu(CPUState *cs)
411 PowerPCCPU *cpu = POWERPC_CPU(cs);
412 CPUPPCState *cenv = &cpu->env;
413 int ret;
415 /* Gather server mmu info from KVM and update the CPU state */
416 kvm_fixup_page_sizes(cpu);
418 /* Synchronize sregs with kvm */
419 ret = kvm_arch_sync_sregs(cpu);
420 if (ret) {
421 return ret;
424 idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
426 /* Some targets support access to KVM's guest TLB. */
427 switch (cenv->mmu_model) {
428 case POWERPC_MMU_BOOKE206:
429 ret = kvm_booke206_tlb_init(cpu);
430 break;
431 default:
432 break;
435 return ret;
438 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
440 CPUPPCState *env = &cpu->env;
441 CPUState *cs = CPU(cpu);
442 struct kvm_dirty_tlb dirty_tlb;
443 unsigned char *bitmap;
444 int ret;
446 if (!env->kvm_sw_tlb) {
447 return;
450 bitmap = g_malloc((env->nb_tlb + 7) / 8);
451 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
453 dirty_tlb.bitmap = (uintptr_t)bitmap;
454 dirty_tlb.num_dirty = env->nb_tlb;
456 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
457 if (ret) {
458 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
459 __func__, strerror(-ret));
462 g_free(bitmap);
465 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
467 PowerPCCPU *cpu = POWERPC_CPU(cs);
468 CPUPPCState *env = &cpu->env;
469 union {
470 uint32_t u32;
471 uint64_t u64;
472 } val;
473 struct kvm_one_reg reg = {
474 .id = id,
475 .addr = (uintptr_t) &val,
477 int ret;
479 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
480 if (ret != 0) {
481 trace_kvm_failed_spr_get(spr, strerror(errno));
482 } else {
483 switch (id & KVM_REG_SIZE_MASK) {
484 case KVM_REG_SIZE_U32:
485 env->spr[spr] = val.u32;
486 break;
488 case KVM_REG_SIZE_U64:
489 env->spr[spr] = val.u64;
490 break;
492 default:
493 /* Don't handle this size yet */
494 abort();
499 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
501 PowerPCCPU *cpu = POWERPC_CPU(cs);
502 CPUPPCState *env = &cpu->env;
503 union {
504 uint32_t u32;
505 uint64_t u64;
506 } val;
507 struct kvm_one_reg reg = {
508 .id = id,
509 .addr = (uintptr_t) &val,
511 int ret;
513 switch (id & KVM_REG_SIZE_MASK) {
514 case KVM_REG_SIZE_U32:
515 val.u32 = env->spr[spr];
516 break;
518 case KVM_REG_SIZE_U64:
519 val.u64 = env->spr[spr];
520 break;
522 default:
523 /* Don't handle this size yet */
524 abort();
527 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
528 if (ret != 0) {
529 trace_kvm_failed_spr_set(spr, strerror(errno));
533 static int kvm_put_fp(CPUState *cs)
535 PowerPCCPU *cpu = POWERPC_CPU(cs);
536 CPUPPCState *env = &cpu->env;
537 struct kvm_one_reg reg;
538 int i;
539 int ret;
541 if (env->insns_flags & PPC_FLOAT) {
542 uint64_t fpscr = env->fpscr;
543 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
545 reg.id = KVM_REG_PPC_FPSCR;
546 reg.addr = (uintptr_t)&fpscr;
547 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
548 if (ret < 0) {
549 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
550 return ret;
553 for (i = 0; i < 32; i++) {
554 uint64_t vsr[2];
556 vsr[0] = float64_val(env->fpr[i]);
557 vsr[1] = env->vsr[i];
558 reg.addr = (uintptr_t) &vsr;
559 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
561 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
562 if (ret < 0) {
563 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
564 i, strerror(errno));
565 return ret;
570 if (env->insns_flags & PPC_ALTIVEC) {
571 reg.id = KVM_REG_PPC_VSCR;
572 reg.addr = (uintptr_t)&env->vscr;
573 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
574 if (ret < 0) {
575 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
576 return ret;
579 for (i = 0; i < 32; i++) {
580 reg.id = KVM_REG_PPC_VR(i);
581 reg.addr = (uintptr_t)&env->avr[i];
582 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
583 if (ret < 0) {
584 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
585 return ret;
590 return 0;
593 static int kvm_get_fp(CPUState *cs)
595 PowerPCCPU *cpu = POWERPC_CPU(cs);
596 CPUPPCState *env = &cpu->env;
597 struct kvm_one_reg reg;
598 int i;
599 int ret;
601 if (env->insns_flags & PPC_FLOAT) {
602 uint64_t fpscr;
603 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
605 reg.id = KVM_REG_PPC_FPSCR;
606 reg.addr = (uintptr_t)&fpscr;
607 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
608 if (ret < 0) {
609 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
610 return ret;
611 } else {
612 env->fpscr = fpscr;
615 for (i = 0; i < 32; i++) {
616 uint64_t vsr[2];
618 reg.addr = (uintptr_t) &vsr;
619 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
621 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
622 if (ret < 0) {
623 DPRINTF("Unable to get %s%d from KVM: %s\n",
624 vsx ? "VSR" : "FPR", i, strerror(errno));
625 return ret;
626 } else {
627 env->fpr[i] = vsr[0];
628 if (vsx) {
629 env->vsr[i] = vsr[1];
635 if (env->insns_flags & PPC_ALTIVEC) {
636 reg.id = KVM_REG_PPC_VSCR;
637 reg.addr = (uintptr_t)&env->vscr;
638 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
639 if (ret < 0) {
640 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
641 return ret;
644 for (i = 0; i < 32; i++) {
645 reg.id = KVM_REG_PPC_VR(i);
646 reg.addr = (uintptr_t)&env->avr[i];
647 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
648 if (ret < 0) {
649 DPRINTF("Unable to get VR%d from KVM: %s\n",
650 i, strerror(errno));
651 return ret;
656 return 0;
659 #if defined(TARGET_PPC64)
660 static int kvm_get_vpa(CPUState *cs)
662 PowerPCCPU *cpu = POWERPC_CPU(cs);
663 CPUPPCState *env = &cpu->env;
664 struct kvm_one_reg reg;
665 int ret;
667 reg.id = KVM_REG_PPC_VPA_ADDR;
668 reg.addr = (uintptr_t)&env->vpa_addr;
669 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
670 if (ret < 0) {
671 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
672 return ret;
675 assert((uintptr_t)&env->slb_shadow_size
676 == ((uintptr_t)&env->slb_shadow_addr + 8));
677 reg.id = KVM_REG_PPC_VPA_SLB;
678 reg.addr = (uintptr_t)&env->slb_shadow_addr;
679 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
680 if (ret < 0) {
681 DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
682 strerror(errno));
683 return ret;
686 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
687 reg.id = KVM_REG_PPC_VPA_DTL;
688 reg.addr = (uintptr_t)&env->dtl_addr;
689 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
690 if (ret < 0) {
691 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
692 strerror(errno));
693 return ret;
696 return 0;
699 static int kvm_put_vpa(CPUState *cs)
701 PowerPCCPU *cpu = POWERPC_CPU(cs);
702 CPUPPCState *env = &cpu->env;
703 struct kvm_one_reg reg;
704 int ret;
706 /* SLB shadow or DTL can't be registered unless a master VPA is
707 * registered. That means when restoring state, if a VPA *is*
708 * registered, we need to set that up first. If not, we need to
709 * deregister the others before deregistering the master VPA */
710 assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
712 if (env->vpa_addr) {
713 reg.id = KVM_REG_PPC_VPA_ADDR;
714 reg.addr = (uintptr_t)&env->vpa_addr;
715 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
716 if (ret < 0) {
717 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
718 return ret;
722 assert((uintptr_t)&env->slb_shadow_size
723 == ((uintptr_t)&env->slb_shadow_addr + 8));
724 reg.id = KVM_REG_PPC_VPA_SLB;
725 reg.addr = (uintptr_t)&env->slb_shadow_addr;
726 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
727 if (ret < 0) {
728 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
729 return ret;
732 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
733 reg.id = KVM_REG_PPC_VPA_DTL;
734 reg.addr = (uintptr_t)&env->dtl_addr;
735 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
736 if (ret < 0) {
737 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
738 strerror(errno));
739 return ret;
742 if (!env->vpa_addr) {
743 reg.id = KVM_REG_PPC_VPA_ADDR;
744 reg.addr = (uintptr_t)&env->vpa_addr;
745 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
746 if (ret < 0) {
747 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
748 return ret;
752 return 0;
754 #endif /* TARGET_PPC64 */
756 int kvm_arch_put_registers(CPUState *cs, int level)
758 PowerPCCPU *cpu = POWERPC_CPU(cs);
759 CPUPPCState *env = &cpu->env;
760 struct kvm_regs regs;
761 int ret;
762 int i;
764 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
765 if (ret < 0) {
766 return ret;
769 regs.ctr = env->ctr;
770 regs.lr = env->lr;
771 regs.xer = cpu_read_xer(env);
772 regs.msr = env->msr;
773 regs.pc = env->nip;
775 regs.srr0 = env->spr[SPR_SRR0];
776 regs.srr1 = env->spr[SPR_SRR1];
778 regs.sprg0 = env->spr[SPR_SPRG0];
779 regs.sprg1 = env->spr[SPR_SPRG1];
780 regs.sprg2 = env->spr[SPR_SPRG2];
781 regs.sprg3 = env->spr[SPR_SPRG3];
782 regs.sprg4 = env->spr[SPR_SPRG4];
783 regs.sprg5 = env->spr[SPR_SPRG5];
784 regs.sprg6 = env->spr[SPR_SPRG6];
785 regs.sprg7 = env->spr[SPR_SPRG7];
787 regs.pid = env->spr[SPR_BOOKE_PID];
789 for (i = 0;i < 32; i++)
790 regs.gpr[i] = env->gpr[i];
792 regs.cr = 0;
793 for (i = 0; i < 8; i++) {
794 regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
797 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
798 if (ret < 0)
799 return ret;
801 kvm_put_fp(cs);
803 if (env->tlb_dirty) {
804 kvm_sw_tlb_put(cpu);
805 env->tlb_dirty = false;
808 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
809 struct kvm_sregs sregs;
811 sregs.pvr = env->spr[SPR_PVR];
813 sregs.u.s.sdr1 = env->spr[SPR_SDR1];
815 /* Sync SLB */
816 #ifdef TARGET_PPC64
817 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
818 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
819 if (env->slb[i].esid & SLB_ESID_V) {
820 sregs.u.s.ppc64.slb[i].slbe |= i;
822 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
824 #endif
826 /* Sync SRs */
827 for (i = 0; i < 16; i++) {
828 sregs.u.s.ppc32.sr[i] = env->sr[i];
831 /* Sync BATs */
832 for (i = 0; i < 8; i++) {
833 /* Beware. We have to swap upper and lower bits here */
834 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
835 | env->DBAT[1][i];
836 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
837 | env->IBAT[1][i];
840 ret = kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
841 if (ret) {
842 return ret;
846 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
847 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
850 if (cap_one_reg) {
851 int i;
853 /* We deliberately ignore errors here, for kernels which have
854 * the ONE_REG calls, but don't support the specific
855 * registers, there's a reasonable chance things will still
856 * work, at least until we try to migrate. */
857 for (i = 0; i < 1024; i++) {
858 uint64_t id = env->spr_cb[i].one_reg_id;
860 if (id != 0) {
861 kvm_put_one_spr(cs, id, i);
865 #ifdef TARGET_PPC64
866 if (cap_papr) {
867 if (kvm_put_vpa(cs) < 0) {
868 DPRINTF("Warning: Unable to set VPA information to KVM\n");
872 kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
873 #endif /* TARGET_PPC64 */
876 return ret;
879 int kvm_arch_get_registers(CPUState *cs)
881 PowerPCCPU *cpu = POWERPC_CPU(cs);
882 CPUPPCState *env = &cpu->env;
883 struct kvm_regs regs;
884 struct kvm_sregs sregs;
885 uint32_t cr;
886 int i, ret;
888 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
889 if (ret < 0)
890 return ret;
892 cr = regs.cr;
893 for (i = 7; i >= 0; i--) {
894 env->crf[i] = cr & 15;
895 cr >>= 4;
898 env->ctr = regs.ctr;
899 env->lr = regs.lr;
900 cpu_write_xer(env, regs.xer);
901 env->msr = regs.msr;
902 env->nip = regs.pc;
904 env->spr[SPR_SRR0] = regs.srr0;
905 env->spr[SPR_SRR1] = regs.srr1;
907 env->spr[SPR_SPRG0] = regs.sprg0;
908 env->spr[SPR_SPRG1] = regs.sprg1;
909 env->spr[SPR_SPRG2] = regs.sprg2;
910 env->spr[SPR_SPRG3] = regs.sprg3;
911 env->spr[SPR_SPRG4] = regs.sprg4;
912 env->spr[SPR_SPRG5] = regs.sprg5;
913 env->spr[SPR_SPRG6] = regs.sprg6;
914 env->spr[SPR_SPRG7] = regs.sprg7;
916 env->spr[SPR_BOOKE_PID] = regs.pid;
918 for (i = 0;i < 32; i++)
919 env->gpr[i] = regs.gpr[i];
921 kvm_get_fp(cs);
923 if (cap_booke_sregs) {
924 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
925 if (ret < 0) {
926 return ret;
929 if (sregs.u.e.features & KVM_SREGS_E_BASE) {
930 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
931 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
932 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
933 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
934 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
935 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
936 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
937 env->spr[SPR_DECR] = sregs.u.e.dec;
938 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
939 env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
940 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
943 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
944 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
945 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
946 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
947 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
948 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
951 if (sregs.u.e.features & KVM_SREGS_E_64) {
952 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
955 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
956 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
959 if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
960 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
961 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
962 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
963 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
964 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
965 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
966 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
967 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
968 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
969 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
970 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
971 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
972 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
973 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
974 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
975 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
977 if (sregs.u.e.features & KVM_SREGS_E_SPE) {
978 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
979 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
980 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
983 if (sregs.u.e.features & KVM_SREGS_E_PM) {
984 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
987 if (sregs.u.e.features & KVM_SREGS_E_PC) {
988 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
989 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
993 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
994 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
995 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
996 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
997 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
998 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
999 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1000 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1001 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1002 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1003 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1006 if (sregs.u.e.features & KVM_SREGS_EXP) {
1007 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1010 if (sregs.u.e.features & KVM_SREGS_E_PD) {
1011 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1012 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1015 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1016 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1017 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1018 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1020 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1021 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1022 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1027 if (cap_segstate) {
1028 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
1029 if (ret < 0) {
1030 return ret;
1033 if (!env->external_htab) {
1034 ppc_store_sdr1(env, sregs.u.s.sdr1);
1037 /* Sync SLB */
1038 #ifdef TARGET_PPC64
1040 * The packed SLB array we get from KVM_GET_SREGS only contains
1041 * information about valid entries. So we flush our internal
1042 * copy to get rid of stale ones, then put all valid SLB entries
1043 * back in.
1045 memset(env->slb, 0, sizeof(env->slb));
1046 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1047 target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1048 target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1050 * Only restore valid entries
1052 if (rb & SLB_ESID_V) {
1053 ppc_store_slb(env, rb, rs);
1056 #endif
1058 /* Sync SRs */
1059 for (i = 0; i < 16; i++) {
1060 env->sr[i] = sregs.u.s.ppc32.sr[i];
1063 /* Sync BATs */
1064 for (i = 0; i < 8; i++) {
1065 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1066 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1067 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1068 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1072 if (cap_hior) {
1073 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1076 if (cap_one_reg) {
1077 int i;
1079 /* We deliberately ignore errors here, for kernels which have
1080 * the ONE_REG calls, but don't support the specific
1081 * registers, there's a reasonable chance things will still
1082 * work, at least until we try to migrate. */
1083 for (i = 0; i < 1024; i++) {
1084 uint64_t id = env->spr_cb[i].one_reg_id;
1086 if (id != 0) {
1087 kvm_get_one_spr(cs, id, i);
1091 #ifdef TARGET_PPC64
1092 if (cap_papr) {
1093 if (kvm_get_vpa(cs) < 0) {
1094 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1098 kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1099 #endif
1102 return 0;
1105 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1107 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1109 if (irq != PPC_INTERRUPT_EXT) {
1110 return 0;
1113 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1114 return 0;
1117 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1119 return 0;
1122 #if defined(TARGET_PPCEMB)
1123 #define PPC_INPUT_INT PPC40x_INPUT_INT
1124 #elif defined(TARGET_PPC64)
1125 #define PPC_INPUT_INT PPC970_INPUT_INT
1126 #else
1127 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1128 #endif
1130 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1132 PowerPCCPU *cpu = POWERPC_CPU(cs);
1133 CPUPPCState *env = &cpu->env;
1134 int r;
1135 unsigned irq;
1137 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1138 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1139 if (!cap_interrupt_level &&
1140 run->ready_for_interrupt_injection &&
1141 (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1142 (env->irq_input_state & (1<<PPC_INPUT_INT)))
1144 /* For now KVM disregards the 'irq' argument. However, in the
1145 * future KVM could cache it in-kernel to avoid a heavyweight exit
1146 * when reading the UIC.
1148 irq = KVM_INTERRUPT_SET;
1150 DPRINTF("injected interrupt %d\n", irq);
1151 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1152 if (r < 0) {
1153 printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1156 /* Always wake up soon in case the interrupt was level based */
1157 timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1158 (get_ticks_per_sec() / 50));
1161 /* We don't know if there are more interrupts pending after this. However,
1162 * the guest will return to userspace in the course of handling this one
1163 * anyways, so we will get a chance to deliver the rest. */
1166 void kvm_arch_post_run(CPUState *cpu, struct kvm_run *run)
1170 int kvm_arch_process_async_events(CPUState *cs)
1172 return cs->halted;
1175 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1177 CPUState *cs = CPU(cpu);
1178 CPUPPCState *env = &cpu->env;
1180 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1181 cs->halted = 1;
1182 cs->exception_index = EXCP_HLT;
1185 return 0;
1188 /* map dcr access to existing qemu dcr emulation */
1189 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1191 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1192 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1194 return 0;
1197 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1199 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1200 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1202 return 0;
1205 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1207 PowerPCCPU *cpu = POWERPC_CPU(cs);
1208 CPUPPCState *env = &cpu->env;
1209 int ret;
1211 switch (run->exit_reason) {
1212 case KVM_EXIT_DCR:
1213 if (run->dcr.is_write) {
1214 DPRINTF("handle dcr write\n");
1215 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1216 } else {
1217 DPRINTF("handle dcr read\n");
1218 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1220 break;
1221 case KVM_EXIT_HLT:
1222 DPRINTF("handle halt\n");
1223 ret = kvmppc_handle_halt(cpu);
1224 break;
1225 #if defined(TARGET_PPC64)
1226 case KVM_EXIT_PAPR_HCALL:
1227 DPRINTF("handle PAPR hypercall\n");
1228 run->papr_hcall.ret = spapr_hypercall(cpu,
1229 run->papr_hcall.nr,
1230 run->papr_hcall.args);
1231 ret = 0;
1232 break;
1233 #endif
1234 case KVM_EXIT_EPR:
1235 DPRINTF("handle epr\n");
1236 run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1237 ret = 0;
1238 break;
1239 case KVM_EXIT_WATCHDOG:
1240 DPRINTF("handle watchdog expiry\n");
1241 watchdog_perform_action();
1242 ret = 0;
1243 break;
1245 default:
1246 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1247 ret = -1;
1248 break;
1251 return ret;
1254 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1256 CPUState *cs = CPU(cpu);
1257 uint32_t bits = tsr_bits;
1258 struct kvm_one_reg reg = {
1259 .id = KVM_REG_PPC_OR_TSR,
1260 .addr = (uintptr_t) &bits,
1263 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1266 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1269 CPUState *cs = CPU(cpu);
1270 uint32_t bits = tsr_bits;
1271 struct kvm_one_reg reg = {
1272 .id = KVM_REG_PPC_CLEAR_TSR,
1273 .addr = (uintptr_t) &bits,
1276 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1279 int kvmppc_set_tcr(PowerPCCPU *cpu)
1281 CPUState *cs = CPU(cpu);
1282 CPUPPCState *env = &cpu->env;
1283 uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1285 struct kvm_one_reg reg = {
1286 .id = KVM_REG_PPC_TCR,
1287 .addr = (uintptr_t) &tcr,
1290 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1293 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1295 CPUState *cs = CPU(cpu);
1296 int ret;
1298 if (!kvm_enabled()) {
1299 return -1;
1302 if (!cap_ppc_watchdog) {
1303 printf("warning: KVM does not support watchdog");
1304 return -1;
1307 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1308 if (ret < 0) {
1309 fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1310 __func__, strerror(-ret));
1311 return ret;
1314 return ret;
1317 static int read_cpuinfo(const char *field, char *value, int len)
1319 FILE *f;
1320 int ret = -1;
1321 int field_len = strlen(field);
1322 char line[512];
1324 f = fopen("/proc/cpuinfo", "r");
1325 if (!f) {
1326 return -1;
1329 do {
1330 if(!fgets(line, sizeof(line), f)) {
1331 break;
1333 if (!strncmp(line, field, field_len)) {
1334 pstrcpy(value, len, line);
1335 ret = 0;
1336 break;
1338 } while(*line);
1340 fclose(f);
1342 return ret;
1345 uint32_t kvmppc_get_tbfreq(void)
1347 char line[512];
1348 char *ns;
1349 uint32_t retval = get_ticks_per_sec();
1351 if (read_cpuinfo("timebase", line, sizeof(line))) {
1352 return retval;
1355 if (!(ns = strchr(line, ':'))) {
1356 return retval;
1359 ns++;
1361 retval = atoi(ns);
1362 return retval;
1365 /* Try to find a device tree node for a CPU with clock-frequency property */
1366 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1368 struct dirent *dirp;
1369 DIR *dp;
1371 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1372 printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1373 return -1;
1376 buf[0] = '\0';
1377 while ((dirp = readdir(dp)) != NULL) {
1378 FILE *f;
1379 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1380 dirp->d_name);
1381 f = fopen(buf, "r");
1382 if (f) {
1383 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1384 fclose(f);
1385 break;
1387 buf[0] = '\0';
1389 closedir(dp);
1390 if (buf[0] == '\0') {
1391 printf("Unknown host!\n");
1392 return -1;
1395 return 0;
1398 /* Read a CPU node property from the host device tree that's a single
1399 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1400 * (can't find or open the property, or doesn't understand the
1401 * format) */
1402 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1404 char buf[PATH_MAX];
1405 union {
1406 uint32_t v32;
1407 uint64_t v64;
1408 } u;
1409 FILE *f;
1410 int len;
1412 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1413 return -1;
1416 strncat(buf, "/", sizeof(buf) - strlen(buf));
1417 strncat(buf, propname, sizeof(buf) - strlen(buf));
1419 f = fopen(buf, "rb");
1420 if (!f) {
1421 return -1;
1424 len = fread(&u, 1, sizeof(u), f);
1425 fclose(f);
1426 switch (len) {
1427 case 4:
1428 /* property is a 32-bit quantity */
1429 return be32_to_cpu(u.v32);
1430 case 8:
1431 return be64_to_cpu(u.v64);
1434 return 0;
1437 uint64_t kvmppc_get_clockfreq(void)
1439 return kvmppc_read_int_cpu_dt("clock-frequency");
1442 uint32_t kvmppc_get_vmx(void)
1444 return kvmppc_read_int_cpu_dt("ibm,vmx");
1447 uint32_t kvmppc_get_dfp(void)
1449 return kvmppc_read_int_cpu_dt("ibm,dfp");
1452 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1454 PowerPCCPU *cpu = ppc_env_get_cpu(env);
1455 CPUState *cs = CPU(cpu);
1457 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1458 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1459 return 0;
1462 return 1;
1465 int kvmppc_get_hasidle(CPUPPCState *env)
1467 struct kvm_ppc_pvinfo pvinfo;
1469 if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1470 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1471 return 1;
1474 return 0;
1477 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1479 uint32_t *hc = (uint32_t*)buf;
1480 struct kvm_ppc_pvinfo pvinfo;
1482 if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1483 memcpy(buf, pvinfo.hcall, buf_len);
1484 return 0;
1488 * Fallback to always fail hypercalls:
1490 * li r3, -1
1491 * nop
1492 * nop
1493 * nop
1496 hc[0] = 0x3860ffff;
1497 hc[1] = 0x60000000;
1498 hc[2] = 0x60000000;
1499 hc[3] = 0x60000000;
1501 return 0;
1504 void kvmppc_set_papr(PowerPCCPU *cpu)
1506 CPUState *cs = CPU(cpu);
1507 int ret;
1509 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
1510 if (ret) {
1511 cpu_abort(cs, "This KVM version does not support PAPR\n");
1514 /* Update the capability flag so we sync the right information
1515 * with kvm */
1516 cap_papr = 1;
1519 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t cpu_version)
1521 return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &cpu_version);
1524 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
1526 CPUState *cs = CPU(cpu);
1527 int ret;
1529 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
1530 if (ret && mpic_proxy) {
1531 cpu_abort(cs, "This KVM version does not support EPR\n");
1535 int kvmppc_smt_threads(void)
1537 return cap_ppc_smt ? cap_ppc_smt : 1;
1540 #ifdef TARGET_PPC64
1541 off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem)
1543 void *rma;
1544 off_t size;
1545 int fd;
1546 struct kvm_allocate_rma ret;
1547 MemoryRegion *rma_region;
1549 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
1550 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
1551 * not necessary on this hardware
1552 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
1554 * FIXME: We should allow the user to force contiguous RMA
1555 * allocation in the cap_ppc_rma==1 case.
1557 if (cap_ppc_rma < 2) {
1558 return 0;
1561 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
1562 if (fd < 0) {
1563 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
1564 strerror(errno));
1565 return -1;
1568 size = MIN(ret.rma_size, 256ul << 20);
1570 rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1571 if (rma == MAP_FAILED) {
1572 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
1573 return -1;
1576 rma_region = g_new(MemoryRegion, 1);
1577 memory_region_init_ram_ptr(rma_region, NULL, name, size, rma);
1578 vmstate_register_ram_global(rma_region);
1579 memory_region_add_subregion(sysmem, 0, rma_region);
1581 return size;
1584 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
1586 struct kvm_ppc_smmu_info info;
1587 long rampagesize, best_page_shift;
1588 int i;
1590 if (cap_ppc_rma >= 2) {
1591 return current_size;
1594 /* Find the largest hardware supported page size that's less than
1595 * or equal to the (logical) backing page size of guest RAM */
1596 kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
1597 rampagesize = getrampagesize();
1598 best_page_shift = 0;
1600 for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
1601 struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
1603 if (!sps->page_shift) {
1604 continue;
1607 if ((sps->page_shift > best_page_shift)
1608 && ((1UL << sps->page_shift) <= rampagesize)) {
1609 best_page_shift = sps->page_shift;
1613 return MIN(current_size,
1614 1ULL << (best_page_shift + hash_shift - 7));
1616 #endif
1618 bool kvmppc_spapr_use_multitce(void)
1620 return cap_spapr_multitce;
1623 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd)
1625 struct kvm_create_spapr_tce args = {
1626 .liobn = liobn,
1627 .window_size = window_size,
1629 long len;
1630 int fd;
1631 void *table;
1633 /* Must set fd to -1 so we don't try to munmap when called for
1634 * destroying the table, which the upper layers -will- do
1636 *pfd = -1;
1637 if (!cap_spapr_tce) {
1638 return NULL;
1641 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
1642 if (fd < 0) {
1643 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
1644 liobn);
1645 return NULL;
1648 len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
1649 /* FIXME: round this up to page size */
1651 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1652 if (table == MAP_FAILED) {
1653 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
1654 liobn);
1655 close(fd);
1656 return NULL;
1659 *pfd = fd;
1660 return table;
1663 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
1665 long len;
1667 if (fd < 0) {
1668 return -1;
1671 len = nb_table * sizeof(uint64_t);
1672 if ((munmap(table, len) < 0) ||
1673 (close(fd) < 0)) {
1674 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
1675 strerror(errno));
1676 /* Leak the table */
1679 return 0;
1682 int kvmppc_reset_htab(int shift_hint)
1684 uint32_t shift = shift_hint;
1686 if (!kvm_enabled()) {
1687 /* Full emulation, tell caller to allocate htab itself */
1688 return 0;
1690 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
1691 int ret;
1692 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
1693 if (ret == -ENOTTY) {
1694 /* At least some versions of PR KVM advertise the
1695 * capability, but don't implement the ioctl(). Oops.
1696 * Return 0 so that we allocate the htab in qemu, as is
1697 * correct for PR. */
1698 return 0;
1699 } else if (ret < 0) {
1700 return ret;
1702 return shift;
1705 /* We have a kernel that predates the htab reset calls. For PR
1706 * KVM, we need to allocate the htab ourselves, for an HV KVM of
1707 * this era, it has allocated a 16MB fixed size hash table
1708 * already. Kernels of this era have the GET_PVINFO capability
1709 * only on PR, so we use this hack to determine the right
1710 * answer */
1711 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
1712 /* PR - tell caller to allocate htab */
1713 return 0;
1714 } else {
1715 /* HV - assume 16MB kernel allocated htab */
1716 return 24;
1720 static inline uint32_t mfpvr(void)
1722 uint32_t pvr;
1724 asm ("mfpvr %0"
1725 : "=r"(pvr));
1726 return pvr;
1729 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
1731 if (on) {
1732 *word |= flags;
1733 } else {
1734 *word &= ~flags;
1738 static void kvmppc_host_cpu_initfn(Object *obj)
1740 assert(kvm_enabled());
1743 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
1745 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
1746 uint32_t vmx = kvmppc_get_vmx();
1747 uint32_t dfp = kvmppc_get_dfp();
1748 uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
1749 uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
1751 /* Now fix up the class with information we can query from the host */
1752 pcc->pvr = mfpvr();
1754 if (vmx != -1) {
1755 /* Only override when we know what the host supports */
1756 alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
1757 alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
1759 if (dfp != -1) {
1760 /* Only override when we know what the host supports */
1761 alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
1764 if (dcache_size != -1) {
1765 pcc->l1_dcache_size = dcache_size;
1768 if (icache_size != -1) {
1769 pcc->l1_icache_size = icache_size;
1773 bool kvmppc_has_cap_epr(void)
1775 return cap_epr;
1778 bool kvmppc_has_cap_htab_fd(void)
1780 return cap_htab_fd;
1783 static PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc)
1785 ObjectClass *oc = OBJECT_CLASS(pcc);
1787 while (oc && !object_class_is_abstract(oc)) {
1788 oc = object_class_get_parent(oc);
1790 assert(oc);
1792 return POWERPC_CPU_CLASS(oc);
1795 static int kvm_ppc_register_host_cpu_type(void)
1797 TypeInfo type_info = {
1798 .name = TYPE_HOST_POWERPC_CPU,
1799 .instance_init = kvmppc_host_cpu_initfn,
1800 .class_init = kvmppc_host_cpu_class_init,
1802 uint32_t host_pvr = mfpvr();
1803 PowerPCCPUClass *pvr_pcc;
1804 DeviceClass *dc;
1806 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
1807 if (pvr_pcc == NULL) {
1808 pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
1810 if (pvr_pcc == NULL) {
1811 return -1;
1813 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
1814 type_register(&type_info);
1816 /* Register generic family CPU class for a family */
1817 pvr_pcc = ppc_cpu_get_family_class(pvr_pcc);
1818 dc = DEVICE_CLASS(pvr_pcc);
1819 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
1820 type_info.name = g_strdup_printf("%s-"TYPE_POWERPC_CPU, dc->desc);
1821 type_register(&type_info);
1823 return 0;
1826 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
1828 struct kvm_rtas_token_args args = {
1829 .token = token,
1832 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
1833 return -ENOENT;
1836 strncpy(args.name, function, sizeof(args.name));
1838 return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
1841 int kvmppc_get_htab_fd(bool write)
1843 struct kvm_get_htab_fd s = {
1844 .flags = write ? KVM_GET_HTAB_WRITE : 0,
1845 .start_index = 0,
1848 if (!cap_htab_fd) {
1849 fprintf(stderr, "KVM version doesn't support saving the hash table\n");
1850 return -1;
1853 return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
1856 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
1858 int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
1859 uint8_t buf[bufsize];
1860 ssize_t rc;
1862 do {
1863 rc = read(fd, buf, bufsize);
1864 if (rc < 0) {
1865 fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
1866 strerror(errno));
1867 return rc;
1868 } else if (rc) {
1869 /* Kernel already retuns data in BE format for the file */
1870 qemu_put_buffer(f, buf, rc);
1872 } while ((rc != 0)
1873 && ((max_ns < 0)
1874 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
1876 return (rc == 0) ? 1 : 0;
1879 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
1880 uint16_t n_valid, uint16_t n_invalid)
1882 struct kvm_get_htab_header *buf;
1883 size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
1884 ssize_t rc;
1886 buf = alloca(chunksize);
1887 /* This is KVM on ppc, so this is all big-endian */
1888 buf->index = index;
1889 buf->n_valid = n_valid;
1890 buf->n_invalid = n_invalid;
1892 qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
1894 rc = write(fd, buf, chunksize);
1895 if (rc < 0) {
1896 fprintf(stderr, "Error writing KVM hash table: %s\n",
1897 strerror(errno));
1898 return rc;
1900 if (rc != chunksize) {
1901 /* We should never get a short write on a single chunk */
1902 fprintf(stderr, "Short write, restoring KVM hash table\n");
1903 return -1;
1905 return 0;
1908 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
1910 return true;
1913 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
1915 return 1;
1918 int kvm_arch_on_sigbus(int code, void *addr)
1920 return 1;
1923 void kvm_arch_init_irq_routing(KVMState *s)
1927 int kvm_arch_insert_sw_breakpoint(CPUState *cpu, struct kvm_sw_breakpoint *bp)
1929 return -EINVAL;
1932 int kvm_arch_remove_sw_breakpoint(CPUState *cpu, struct kvm_sw_breakpoint *bp)
1934 return -EINVAL;
1937 int kvm_arch_insert_hw_breakpoint(target_ulong addr, target_ulong len, int type)
1939 return -EINVAL;
1942 int kvm_arch_remove_hw_breakpoint(target_ulong addr, target_ulong len, int type)
1944 return -EINVAL;
1947 void kvm_arch_remove_all_hw_breakpoints(void)
1951 void kvm_arch_update_guest_debug(CPUState *cpu, struct kvm_guest_debug *dbg)
1955 struct kvm_get_htab_buf {
1956 struct kvm_get_htab_header header;
1958 * We require one extra byte for read
1960 target_ulong hpte[(HPTES_PER_GROUP * 2) + 1];
1963 uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index)
1965 int htab_fd;
1966 struct kvm_get_htab_fd ghf;
1967 struct kvm_get_htab_buf *hpte_buf;
1969 ghf.flags = 0;
1970 ghf.start_index = pte_index;
1971 htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
1972 if (htab_fd < 0) {
1973 goto error_out;
1976 hpte_buf = g_malloc0(sizeof(*hpte_buf));
1978 * Read the hpte group
1980 if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) {
1981 goto out_close;
1984 close(htab_fd);
1985 return (uint64_t)(uintptr_t) hpte_buf->hpte;
1987 out_close:
1988 g_free(hpte_buf);
1989 close(htab_fd);
1990 error_out:
1991 return 0;
1994 void kvmppc_hash64_free_pteg(uint64_t token)
1996 struct kvm_get_htab_buf *htab_buf;
1998 htab_buf = container_of((void *)(uintptr_t) token, struct kvm_get_htab_buf,
1999 hpte);
2000 g_free(htab_buf);
2001 return;
2004 void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
2005 target_ulong pte0, target_ulong pte1)
2007 int htab_fd;
2008 struct kvm_get_htab_fd ghf;
2009 struct kvm_get_htab_buf hpte_buf;
2011 ghf.flags = 0;
2012 ghf.start_index = 0; /* Ignored */
2013 htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2014 if (htab_fd < 0) {
2015 goto error_out;
2018 hpte_buf.header.n_valid = 1;
2019 hpte_buf.header.n_invalid = 0;
2020 hpte_buf.header.index = pte_index;
2021 hpte_buf.hpte[0] = pte0;
2022 hpte_buf.hpte[1] = pte1;
2024 * Write the hpte entry.
2025 * CAUTION: write() has the warn_unused_result attribute. Hence we
2026 * need to check the return value, even though we do nothing.
2028 if (write(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) {
2029 goto out_close;
2032 out_close:
2033 close(htab_fd);
2034 return;
2036 error_out:
2037 return;