virtio fix cfg endian-ness for BE targets
[qemu/ar7.git] / target-ppc / kvm.c
blobddf469fe096ee1d622165b4ac465145985b89aa3
1 /*
2 * PowerPC implementation of KVM hooks
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
7 * Authors:
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
17 #include <dirent.h>
18 #include <sys/types.h>
19 #include <sys/ioctl.h>
20 #include <sys/mman.h>
21 #include <sys/vfs.h>
23 #include <linux/kvm.h>
25 #include "qemu-common.h"
26 #include "qemu/timer.h"
27 #include "sysemu/sysemu.h"
28 #include "sysemu/kvm.h"
29 #include "kvm_ppc.h"
30 #include "cpu.h"
31 #include "sysemu/cpus.h"
32 #include "sysemu/device_tree.h"
33 #include "mmu-hash64.h"
35 #include "hw/sysbus.h"
36 #include "hw/ppc/spapr.h"
37 #include "hw/ppc/spapr_vio.h"
38 #include "hw/ppc/ppc.h"
39 #include "sysemu/watchdog.h"
40 #include "trace.h"
41 #include "exec/gdbstub.h"
42 #include "exec/memattrs.h"
44 //#define DEBUG_KVM
46 #ifdef DEBUG_KVM
47 #define DPRINTF(fmt, ...) \
48 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
49 #else
50 #define DPRINTF(fmt, ...) \
51 do { } while (0)
52 #endif
54 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
56 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
57 KVM_CAP_LAST_INFO
60 static int cap_interrupt_unset = false;
61 static int cap_interrupt_level = false;
62 static int cap_segstate;
63 static int cap_booke_sregs;
64 static int cap_ppc_smt;
65 static int cap_ppc_rma;
66 static int cap_spapr_tce;
67 static int cap_spapr_multitce;
68 static int cap_spapr_vfio;
69 static int cap_hior;
70 static int cap_one_reg;
71 static int cap_epr;
72 static int cap_ppc_watchdog;
73 static int cap_papr;
74 static int cap_htab_fd;
75 static int cap_fixup_hcalls;
77 static uint32_t debug_inst_opcode;
79 /* XXX We have a race condition where we actually have a level triggered
80 * interrupt, but the infrastructure can't expose that yet, so the guest
81 * takes but ignores it, goes to sleep and never gets notified that there's
82 * still an interrupt pending.
84 * As a quick workaround, let's just wake up again 20 ms after we injected
85 * an interrupt. That way we can assure that we're always reinjecting
86 * interrupts in case the guest swallowed them.
88 static QEMUTimer *idle_timer;
90 static void kvm_kick_cpu(void *opaque)
92 PowerPCCPU *cpu = opaque;
94 qemu_cpu_kick(CPU(cpu));
97 static int kvm_ppc_register_host_cpu_type(void);
99 int kvm_arch_init(MachineState *ms, KVMState *s)
101 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
102 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
103 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
104 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
105 cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
106 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
107 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
108 cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
109 cap_spapr_vfio = false;
110 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
111 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
112 cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
113 cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
114 /* Note: we don't set cap_papr here, because this capability is
115 * only activated after this by kvmppc_set_papr() */
116 cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
117 cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
119 if (!cap_interrupt_level) {
120 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
121 "VM to stall at times!\n");
124 kvm_ppc_register_host_cpu_type();
126 return 0;
129 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
131 CPUPPCState *cenv = &cpu->env;
132 CPUState *cs = CPU(cpu);
133 struct kvm_sregs sregs;
134 int ret;
136 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
137 /* What we're really trying to say is "if we're on BookE, we use
138 the native PVR for now". This is the only sane way to check
139 it though, so we potentially confuse users that they can run
140 BookE guests on BookS. Let's hope nobody dares enough :) */
141 return 0;
142 } else {
143 if (!cap_segstate) {
144 fprintf(stderr, "kvm error: missing PVR setting capability\n");
145 return -ENOSYS;
149 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
150 if (ret) {
151 return ret;
154 sregs.pvr = cenv->spr[SPR_PVR];
155 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
158 /* Set up a shared TLB array with KVM */
159 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
161 CPUPPCState *env = &cpu->env;
162 CPUState *cs = CPU(cpu);
163 struct kvm_book3e_206_tlb_params params = {};
164 struct kvm_config_tlb cfg = {};
165 unsigned int entries = 0;
166 int ret, i;
168 if (!kvm_enabled() ||
169 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
170 return 0;
173 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
175 for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
176 params.tlb_sizes[i] = booke206_tlb_size(env, i);
177 params.tlb_ways[i] = booke206_tlb_ways(env, i);
178 entries += params.tlb_sizes[i];
181 assert(entries == env->nb_tlb);
182 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
184 env->tlb_dirty = true;
186 cfg.array = (uintptr_t)env->tlb.tlbm;
187 cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
188 cfg.params = (uintptr_t)&params;
189 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
191 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
192 if (ret < 0) {
193 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
194 __func__, strerror(-ret));
195 return ret;
198 env->kvm_sw_tlb = true;
199 return 0;
203 #if defined(TARGET_PPC64)
204 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
205 struct kvm_ppc_smmu_info *info)
207 CPUPPCState *env = &cpu->env;
208 CPUState *cs = CPU(cpu);
210 memset(info, 0, sizeof(*info));
212 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
213 * need to "guess" what the supported page sizes are.
215 * For that to work we make a few assumptions:
217 * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
218 * KVM which only supports 4K and 16M pages, but supports them
219 * regardless of the backing store characteritics. We also don't
220 * support 1T segments.
222 * This is safe as if HV KVM ever supports that capability or PR
223 * KVM grows supports for more page/segment sizes, those versions
224 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
225 * will not hit this fallback
227 * - Else we are running HV KVM. This means we only support page
228 * sizes that fit in the backing store. Additionally we only
229 * advertize 64K pages if the processor is ARCH 2.06 and we assume
230 * P7 encodings for the SLB and hash table. Here too, we assume
231 * support for any newer processor will mean a kernel that
232 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
233 * this fallback.
235 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
236 /* No flags */
237 info->flags = 0;
238 info->slb_size = 64;
240 /* Standard 4k base page size segment */
241 info->sps[0].page_shift = 12;
242 info->sps[0].slb_enc = 0;
243 info->sps[0].enc[0].page_shift = 12;
244 info->sps[0].enc[0].pte_enc = 0;
246 /* Standard 16M large page size segment */
247 info->sps[1].page_shift = 24;
248 info->sps[1].slb_enc = SLB_VSID_L;
249 info->sps[1].enc[0].page_shift = 24;
250 info->sps[1].enc[0].pte_enc = 0;
251 } else {
252 int i = 0;
254 /* HV KVM has backing store size restrictions */
255 info->flags = KVM_PPC_PAGE_SIZES_REAL;
257 if (env->mmu_model & POWERPC_MMU_1TSEG) {
258 info->flags |= KVM_PPC_1T_SEGMENTS;
261 if (env->mmu_model == POWERPC_MMU_2_06) {
262 info->slb_size = 32;
263 } else {
264 info->slb_size = 64;
267 /* Standard 4k base page size segment */
268 info->sps[i].page_shift = 12;
269 info->sps[i].slb_enc = 0;
270 info->sps[i].enc[0].page_shift = 12;
271 info->sps[i].enc[0].pte_enc = 0;
272 i++;
274 /* 64K on MMU 2.06 */
275 if (env->mmu_model == POWERPC_MMU_2_06) {
276 info->sps[i].page_shift = 16;
277 info->sps[i].slb_enc = 0x110;
278 info->sps[i].enc[0].page_shift = 16;
279 info->sps[i].enc[0].pte_enc = 1;
280 i++;
283 /* Standard 16M large page size segment */
284 info->sps[i].page_shift = 24;
285 info->sps[i].slb_enc = SLB_VSID_L;
286 info->sps[i].enc[0].page_shift = 24;
287 info->sps[i].enc[0].pte_enc = 0;
291 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
293 CPUState *cs = CPU(cpu);
294 int ret;
296 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
297 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
298 if (ret == 0) {
299 return;
303 kvm_get_fallback_smmu_info(cpu, info);
306 static long getrampagesize(void)
308 struct statfs fs;
309 int ret;
311 if (!mem_path) {
312 /* guest RAM is backed by normal anonymous pages */
313 return getpagesize();
316 do {
317 ret = statfs(mem_path, &fs);
318 } while (ret != 0 && errno == EINTR);
320 if (ret != 0) {
321 fprintf(stderr, "Couldn't statfs() memory path: %s\n",
322 strerror(errno));
323 exit(1);
326 #define HUGETLBFS_MAGIC 0x958458f6
328 if (fs.f_type != HUGETLBFS_MAGIC) {
329 /* Explicit mempath, but it's ordinary pages */
330 return getpagesize();
333 /* It's hugepage, return the huge page size */
334 return fs.f_bsize;
337 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
339 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
340 return true;
343 return (1ul << shift) <= rampgsize;
346 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
348 static struct kvm_ppc_smmu_info smmu_info;
349 static bool has_smmu_info;
350 CPUPPCState *env = &cpu->env;
351 long rampagesize;
352 int iq, ik, jq, jk;
354 /* We only handle page sizes for 64-bit server guests for now */
355 if (!(env->mmu_model & POWERPC_MMU_64)) {
356 return;
359 /* Collect MMU info from kernel if not already */
360 if (!has_smmu_info) {
361 kvm_get_smmu_info(cpu, &smmu_info);
362 has_smmu_info = true;
365 rampagesize = getrampagesize();
367 /* Convert to QEMU form */
368 memset(&env->sps, 0, sizeof(env->sps));
371 * XXX This loop should be an entry wide AND of the capabilities that
372 * the selected CPU has with the capabilities that KVM supports.
374 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
375 struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
376 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
378 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
379 ksps->page_shift)) {
380 continue;
382 qsps->page_shift = ksps->page_shift;
383 qsps->slb_enc = ksps->slb_enc;
384 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
385 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
386 ksps->enc[jk].page_shift)) {
387 continue;
389 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
390 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
391 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
392 break;
395 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
396 break;
399 env->slb_nr = smmu_info.slb_size;
400 if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
401 env->mmu_model &= ~POWERPC_MMU_1TSEG;
404 #else /* defined (TARGET_PPC64) */
406 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
410 #endif /* !defined (TARGET_PPC64) */
412 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
414 return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
417 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
418 * book3s supports only 1 watchpoint, so array size
419 * of 4 is sufficient for now.
421 #define MAX_HW_BKPTS 4
423 static struct HWBreakpoint {
424 target_ulong addr;
425 int type;
426 } hw_debug_points[MAX_HW_BKPTS];
428 static CPUWatchpoint hw_watchpoint;
430 /* Default there is no breakpoint and watchpoint supported */
431 static int max_hw_breakpoint;
432 static int max_hw_watchpoint;
433 static int nb_hw_breakpoint;
434 static int nb_hw_watchpoint;
436 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
438 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
439 max_hw_breakpoint = 2;
440 max_hw_watchpoint = 2;
443 if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
444 fprintf(stderr, "Error initializing h/w breakpoints\n");
445 return;
449 int kvm_arch_init_vcpu(CPUState *cs)
451 PowerPCCPU *cpu = POWERPC_CPU(cs);
452 CPUPPCState *cenv = &cpu->env;
453 int ret;
455 /* Gather server mmu info from KVM and update the CPU state */
456 kvm_fixup_page_sizes(cpu);
458 /* Synchronize sregs with kvm */
459 ret = kvm_arch_sync_sregs(cpu);
460 if (ret) {
461 return ret;
464 idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
466 /* Some targets support access to KVM's guest TLB. */
467 switch (cenv->mmu_model) {
468 case POWERPC_MMU_BOOKE206:
469 ret = kvm_booke206_tlb_init(cpu);
470 break;
471 default:
472 break;
475 kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
476 kvmppc_hw_debug_points_init(cenv);
478 return ret;
481 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
483 CPUPPCState *env = &cpu->env;
484 CPUState *cs = CPU(cpu);
485 struct kvm_dirty_tlb dirty_tlb;
486 unsigned char *bitmap;
487 int ret;
489 if (!env->kvm_sw_tlb) {
490 return;
493 bitmap = g_malloc((env->nb_tlb + 7) / 8);
494 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
496 dirty_tlb.bitmap = (uintptr_t)bitmap;
497 dirty_tlb.num_dirty = env->nb_tlb;
499 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
500 if (ret) {
501 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
502 __func__, strerror(-ret));
505 g_free(bitmap);
508 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
510 PowerPCCPU *cpu = POWERPC_CPU(cs);
511 CPUPPCState *env = &cpu->env;
512 union {
513 uint32_t u32;
514 uint64_t u64;
515 } val;
516 struct kvm_one_reg reg = {
517 .id = id,
518 .addr = (uintptr_t) &val,
520 int ret;
522 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
523 if (ret != 0) {
524 trace_kvm_failed_spr_get(spr, strerror(errno));
525 } else {
526 switch (id & KVM_REG_SIZE_MASK) {
527 case KVM_REG_SIZE_U32:
528 env->spr[spr] = val.u32;
529 break;
531 case KVM_REG_SIZE_U64:
532 env->spr[spr] = val.u64;
533 break;
535 default:
536 /* Don't handle this size yet */
537 abort();
542 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
544 PowerPCCPU *cpu = POWERPC_CPU(cs);
545 CPUPPCState *env = &cpu->env;
546 union {
547 uint32_t u32;
548 uint64_t u64;
549 } val;
550 struct kvm_one_reg reg = {
551 .id = id,
552 .addr = (uintptr_t) &val,
554 int ret;
556 switch (id & KVM_REG_SIZE_MASK) {
557 case KVM_REG_SIZE_U32:
558 val.u32 = env->spr[spr];
559 break;
561 case KVM_REG_SIZE_U64:
562 val.u64 = env->spr[spr];
563 break;
565 default:
566 /* Don't handle this size yet */
567 abort();
570 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
571 if (ret != 0) {
572 trace_kvm_failed_spr_set(spr, strerror(errno));
576 static int kvm_put_fp(CPUState *cs)
578 PowerPCCPU *cpu = POWERPC_CPU(cs);
579 CPUPPCState *env = &cpu->env;
580 struct kvm_one_reg reg;
581 int i;
582 int ret;
584 if (env->insns_flags & PPC_FLOAT) {
585 uint64_t fpscr = env->fpscr;
586 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
588 reg.id = KVM_REG_PPC_FPSCR;
589 reg.addr = (uintptr_t)&fpscr;
590 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
591 if (ret < 0) {
592 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
593 return ret;
596 for (i = 0; i < 32; i++) {
597 uint64_t vsr[2];
599 vsr[0] = float64_val(env->fpr[i]);
600 vsr[1] = env->vsr[i];
601 reg.addr = (uintptr_t) &vsr;
602 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
604 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
605 if (ret < 0) {
606 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
607 i, strerror(errno));
608 return ret;
613 if (env->insns_flags & PPC_ALTIVEC) {
614 reg.id = KVM_REG_PPC_VSCR;
615 reg.addr = (uintptr_t)&env->vscr;
616 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
617 if (ret < 0) {
618 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
619 return ret;
622 for (i = 0; i < 32; i++) {
623 reg.id = KVM_REG_PPC_VR(i);
624 reg.addr = (uintptr_t)&env->avr[i];
625 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
626 if (ret < 0) {
627 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
628 return ret;
633 return 0;
636 static int kvm_get_fp(CPUState *cs)
638 PowerPCCPU *cpu = POWERPC_CPU(cs);
639 CPUPPCState *env = &cpu->env;
640 struct kvm_one_reg reg;
641 int i;
642 int ret;
644 if (env->insns_flags & PPC_FLOAT) {
645 uint64_t fpscr;
646 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
648 reg.id = KVM_REG_PPC_FPSCR;
649 reg.addr = (uintptr_t)&fpscr;
650 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
651 if (ret < 0) {
652 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
653 return ret;
654 } else {
655 env->fpscr = fpscr;
658 for (i = 0; i < 32; i++) {
659 uint64_t vsr[2];
661 reg.addr = (uintptr_t) &vsr;
662 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
664 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
665 if (ret < 0) {
666 DPRINTF("Unable to get %s%d from KVM: %s\n",
667 vsx ? "VSR" : "FPR", i, strerror(errno));
668 return ret;
669 } else {
670 env->fpr[i] = vsr[0];
671 if (vsx) {
672 env->vsr[i] = vsr[1];
678 if (env->insns_flags & PPC_ALTIVEC) {
679 reg.id = KVM_REG_PPC_VSCR;
680 reg.addr = (uintptr_t)&env->vscr;
681 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
682 if (ret < 0) {
683 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
684 return ret;
687 for (i = 0; i < 32; i++) {
688 reg.id = KVM_REG_PPC_VR(i);
689 reg.addr = (uintptr_t)&env->avr[i];
690 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
691 if (ret < 0) {
692 DPRINTF("Unable to get VR%d from KVM: %s\n",
693 i, strerror(errno));
694 return ret;
699 return 0;
702 #if defined(TARGET_PPC64)
703 static int kvm_get_vpa(CPUState *cs)
705 PowerPCCPU *cpu = POWERPC_CPU(cs);
706 CPUPPCState *env = &cpu->env;
707 struct kvm_one_reg reg;
708 int ret;
710 reg.id = KVM_REG_PPC_VPA_ADDR;
711 reg.addr = (uintptr_t)&env->vpa_addr;
712 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
713 if (ret < 0) {
714 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
715 return ret;
718 assert((uintptr_t)&env->slb_shadow_size
719 == ((uintptr_t)&env->slb_shadow_addr + 8));
720 reg.id = KVM_REG_PPC_VPA_SLB;
721 reg.addr = (uintptr_t)&env->slb_shadow_addr;
722 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
723 if (ret < 0) {
724 DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
725 strerror(errno));
726 return ret;
729 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
730 reg.id = KVM_REG_PPC_VPA_DTL;
731 reg.addr = (uintptr_t)&env->dtl_addr;
732 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
733 if (ret < 0) {
734 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
735 strerror(errno));
736 return ret;
739 return 0;
742 static int kvm_put_vpa(CPUState *cs)
744 PowerPCCPU *cpu = POWERPC_CPU(cs);
745 CPUPPCState *env = &cpu->env;
746 struct kvm_one_reg reg;
747 int ret;
749 /* SLB shadow or DTL can't be registered unless a master VPA is
750 * registered. That means when restoring state, if a VPA *is*
751 * registered, we need to set that up first. If not, we need to
752 * deregister the others before deregistering the master VPA */
753 assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
755 if (env->vpa_addr) {
756 reg.id = KVM_REG_PPC_VPA_ADDR;
757 reg.addr = (uintptr_t)&env->vpa_addr;
758 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
759 if (ret < 0) {
760 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
761 return ret;
765 assert((uintptr_t)&env->slb_shadow_size
766 == ((uintptr_t)&env->slb_shadow_addr + 8));
767 reg.id = KVM_REG_PPC_VPA_SLB;
768 reg.addr = (uintptr_t)&env->slb_shadow_addr;
769 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
770 if (ret < 0) {
771 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
772 return ret;
775 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
776 reg.id = KVM_REG_PPC_VPA_DTL;
777 reg.addr = (uintptr_t)&env->dtl_addr;
778 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
779 if (ret < 0) {
780 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
781 strerror(errno));
782 return ret;
785 if (!env->vpa_addr) {
786 reg.id = KVM_REG_PPC_VPA_ADDR;
787 reg.addr = (uintptr_t)&env->vpa_addr;
788 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
789 if (ret < 0) {
790 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
791 return ret;
795 return 0;
797 #endif /* TARGET_PPC64 */
799 int kvm_arch_put_registers(CPUState *cs, int level)
801 PowerPCCPU *cpu = POWERPC_CPU(cs);
802 CPUPPCState *env = &cpu->env;
803 struct kvm_regs regs;
804 int ret;
805 int i;
807 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
808 if (ret < 0) {
809 return ret;
812 regs.ctr = env->ctr;
813 regs.lr = env->lr;
814 regs.xer = cpu_read_xer(env);
815 regs.msr = env->msr;
816 regs.pc = env->nip;
818 regs.srr0 = env->spr[SPR_SRR0];
819 regs.srr1 = env->spr[SPR_SRR1];
821 regs.sprg0 = env->spr[SPR_SPRG0];
822 regs.sprg1 = env->spr[SPR_SPRG1];
823 regs.sprg2 = env->spr[SPR_SPRG2];
824 regs.sprg3 = env->spr[SPR_SPRG3];
825 regs.sprg4 = env->spr[SPR_SPRG4];
826 regs.sprg5 = env->spr[SPR_SPRG5];
827 regs.sprg6 = env->spr[SPR_SPRG6];
828 regs.sprg7 = env->spr[SPR_SPRG7];
830 regs.pid = env->spr[SPR_BOOKE_PID];
832 for (i = 0;i < 32; i++)
833 regs.gpr[i] = env->gpr[i];
835 regs.cr = 0;
836 for (i = 0; i < 8; i++) {
837 regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
840 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
841 if (ret < 0)
842 return ret;
844 kvm_put_fp(cs);
846 if (env->tlb_dirty) {
847 kvm_sw_tlb_put(cpu);
848 env->tlb_dirty = false;
851 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
852 struct kvm_sregs sregs;
854 sregs.pvr = env->spr[SPR_PVR];
856 sregs.u.s.sdr1 = env->spr[SPR_SDR1];
858 /* Sync SLB */
859 #ifdef TARGET_PPC64
860 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
861 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
862 if (env->slb[i].esid & SLB_ESID_V) {
863 sregs.u.s.ppc64.slb[i].slbe |= i;
865 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
867 #endif
869 /* Sync SRs */
870 for (i = 0; i < 16; i++) {
871 sregs.u.s.ppc32.sr[i] = env->sr[i];
874 /* Sync BATs */
875 for (i = 0; i < 8; i++) {
876 /* Beware. We have to swap upper and lower bits here */
877 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
878 | env->DBAT[1][i];
879 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
880 | env->IBAT[1][i];
883 ret = kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
884 if (ret) {
885 return ret;
889 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
890 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
893 if (cap_one_reg) {
894 int i;
896 /* We deliberately ignore errors here, for kernels which have
897 * the ONE_REG calls, but don't support the specific
898 * registers, there's a reasonable chance things will still
899 * work, at least until we try to migrate. */
900 for (i = 0; i < 1024; i++) {
901 uint64_t id = env->spr_cb[i].one_reg_id;
903 if (id != 0) {
904 kvm_put_one_spr(cs, id, i);
908 #ifdef TARGET_PPC64
909 if (msr_ts) {
910 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
911 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
913 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
914 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
916 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
917 kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
918 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
919 kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
920 kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
921 kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
922 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
923 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
924 kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
925 kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
928 if (cap_papr) {
929 if (kvm_put_vpa(cs) < 0) {
930 DPRINTF("Warning: Unable to set VPA information to KVM\n");
934 kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
935 #endif /* TARGET_PPC64 */
938 return ret;
941 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
943 env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
946 int kvm_arch_get_registers(CPUState *cs)
948 PowerPCCPU *cpu = POWERPC_CPU(cs);
949 CPUPPCState *env = &cpu->env;
950 struct kvm_regs regs;
951 struct kvm_sregs sregs;
952 uint32_t cr;
953 int i, ret;
955 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
956 if (ret < 0)
957 return ret;
959 cr = regs.cr;
960 for (i = 7; i >= 0; i--) {
961 env->crf[i] = cr & 15;
962 cr >>= 4;
965 env->ctr = regs.ctr;
966 env->lr = regs.lr;
967 cpu_write_xer(env, regs.xer);
968 env->msr = regs.msr;
969 env->nip = regs.pc;
971 env->spr[SPR_SRR0] = regs.srr0;
972 env->spr[SPR_SRR1] = regs.srr1;
974 env->spr[SPR_SPRG0] = regs.sprg0;
975 env->spr[SPR_SPRG1] = regs.sprg1;
976 env->spr[SPR_SPRG2] = regs.sprg2;
977 env->spr[SPR_SPRG3] = regs.sprg3;
978 env->spr[SPR_SPRG4] = regs.sprg4;
979 env->spr[SPR_SPRG5] = regs.sprg5;
980 env->spr[SPR_SPRG6] = regs.sprg6;
981 env->spr[SPR_SPRG7] = regs.sprg7;
983 env->spr[SPR_BOOKE_PID] = regs.pid;
985 for (i = 0;i < 32; i++)
986 env->gpr[i] = regs.gpr[i];
988 kvm_get_fp(cs);
990 if (cap_booke_sregs) {
991 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
992 if (ret < 0) {
993 return ret;
996 if (sregs.u.e.features & KVM_SREGS_E_BASE) {
997 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
998 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
999 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1000 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1001 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1002 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1003 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1004 env->spr[SPR_DECR] = sregs.u.e.dec;
1005 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1006 env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1007 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1010 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1011 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1012 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1013 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1014 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1015 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1018 if (sregs.u.e.features & KVM_SREGS_E_64) {
1019 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1022 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1023 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1026 if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1027 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1028 kvm_sync_excp(env, POWERPC_EXCP_CRITICAL, SPR_BOOKE_IVOR0);
1029 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1030 kvm_sync_excp(env, POWERPC_EXCP_MCHECK, SPR_BOOKE_IVOR1);
1031 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1032 kvm_sync_excp(env, POWERPC_EXCP_DSI, SPR_BOOKE_IVOR2);
1033 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1034 kvm_sync_excp(env, POWERPC_EXCP_ISI, SPR_BOOKE_IVOR3);
1035 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1036 kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL, SPR_BOOKE_IVOR4);
1037 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1038 kvm_sync_excp(env, POWERPC_EXCP_ALIGN, SPR_BOOKE_IVOR5);
1039 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1040 kvm_sync_excp(env, POWERPC_EXCP_PROGRAM, SPR_BOOKE_IVOR6);
1041 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1042 kvm_sync_excp(env, POWERPC_EXCP_FPU, SPR_BOOKE_IVOR7);
1043 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1044 kvm_sync_excp(env, POWERPC_EXCP_SYSCALL, SPR_BOOKE_IVOR8);
1045 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1046 kvm_sync_excp(env, POWERPC_EXCP_APU, SPR_BOOKE_IVOR9);
1047 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1048 kvm_sync_excp(env, POWERPC_EXCP_DECR, SPR_BOOKE_IVOR10);
1049 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1050 kvm_sync_excp(env, POWERPC_EXCP_FIT, SPR_BOOKE_IVOR11);
1051 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1052 kvm_sync_excp(env, POWERPC_EXCP_WDT, SPR_BOOKE_IVOR12);
1053 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1054 kvm_sync_excp(env, POWERPC_EXCP_DTLB, SPR_BOOKE_IVOR13);
1055 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1056 kvm_sync_excp(env, POWERPC_EXCP_ITLB, SPR_BOOKE_IVOR14);
1057 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1058 kvm_sync_excp(env, POWERPC_EXCP_DEBUG, SPR_BOOKE_IVOR15);
1060 if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1061 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1062 kvm_sync_excp(env, POWERPC_EXCP_SPEU, SPR_BOOKE_IVOR32);
1063 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1064 kvm_sync_excp(env, POWERPC_EXCP_EFPDI, SPR_BOOKE_IVOR33);
1065 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1066 kvm_sync_excp(env, POWERPC_EXCP_EFPRI, SPR_BOOKE_IVOR34);
1069 if (sregs.u.e.features & KVM_SREGS_E_PM) {
1070 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1071 kvm_sync_excp(env, POWERPC_EXCP_EPERFM, SPR_BOOKE_IVOR35);
1074 if (sregs.u.e.features & KVM_SREGS_E_PC) {
1075 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1076 kvm_sync_excp(env, POWERPC_EXCP_DOORI, SPR_BOOKE_IVOR36);
1077 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1078 kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1082 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1083 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1084 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1085 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1086 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1087 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1088 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1089 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1090 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1091 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1092 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1095 if (sregs.u.e.features & KVM_SREGS_EXP) {
1096 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1099 if (sregs.u.e.features & KVM_SREGS_E_PD) {
1100 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1101 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1104 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1105 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1106 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1107 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1109 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1110 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1111 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1116 if (cap_segstate) {
1117 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
1118 if (ret < 0) {
1119 return ret;
1122 if (!env->external_htab) {
1123 ppc_store_sdr1(env, sregs.u.s.sdr1);
1126 /* Sync SLB */
1127 #ifdef TARGET_PPC64
1129 * The packed SLB array we get from KVM_GET_SREGS only contains
1130 * information about valid entries. So we flush our internal
1131 * copy to get rid of stale ones, then put all valid SLB entries
1132 * back in.
1134 memset(env->slb, 0, sizeof(env->slb));
1135 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1136 target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1137 target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1139 * Only restore valid entries
1141 if (rb & SLB_ESID_V) {
1142 ppc_store_slb(env, rb, rs);
1145 #endif
1147 /* Sync SRs */
1148 for (i = 0; i < 16; i++) {
1149 env->sr[i] = sregs.u.s.ppc32.sr[i];
1152 /* Sync BATs */
1153 for (i = 0; i < 8; i++) {
1154 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1155 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1156 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1157 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1161 if (cap_hior) {
1162 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1165 if (cap_one_reg) {
1166 int i;
1168 /* We deliberately ignore errors here, for kernels which have
1169 * the ONE_REG calls, but don't support the specific
1170 * registers, there's a reasonable chance things will still
1171 * work, at least until we try to migrate. */
1172 for (i = 0; i < 1024; i++) {
1173 uint64_t id = env->spr_cb[i].one_reg_id;
1175 if (id != 0) {
1176 kvm_get_one_spr(cs, id, i);
1180 #ifdef TARGET_PPC64
1181 if (msr_ts) {
1182 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1183 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1185 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1186 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1188 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1189 kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1190 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1191 kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1192 kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1193 kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1194 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1195 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1196 kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1197 kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1200 if (cap_papr) {
1201 if (kvm_get_vpa(cs) < 0) {
1202 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1206 kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1207 #endif
1210 return 0;
1213 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1215 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1217 if (irq != PPC_INTERRUPT_EXT) {
1218 return 0;
1221 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1222 return 0;
1225 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1227 return 0;
1230 #if defined(TARGET_PPCEMB)
1231 #define PPC_INPUT_INT PPC40x_INPUT_INT
1232 #elif defined(TARGET_PPC64)
1233 #define PPC_INPUT_INT PPC970_INPUT_INT
1234 #else
1235 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1236 #endif
1238 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1240 PowerPCCPU *cpu = POWERPC_CPU(cs);
1241 CPUPPCState *env = &cpu->env;
1242 int r;
1243 unsigned irq;
1245 qemu_mutex_lock_iothread();
1247 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1248 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1249 if (!cap_interrupt_level &&
1250 run->ready_for_interrupt_injection &&
1251 (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1252 (env->irq_input_state & (1<<PPC_INPUT_INT)))
1254 /* For now KVM disregards the 'irq' argument. However, in the
1255 * future KVM could cache it in-kernel to avoid a heavyweight exit
1256 * when reading the UIC.
1258 irq = KVM_INTERRUPT_SET;
1260 DPRINTF("injected interrupt %d\n", irq);
1261 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1262 if (r < 0) {
1263 printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1266 /* Always wake up soon in case the interrupt was level based */
1267 timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1268 (get_ticks_per_sec() / 50));
1271 /* We don't know if there are more interrupts pending after this. However,
1272 * the guest will return to userspace in the course of handling this one
1273 * anyways, so we will get a chance to deliver the rest. */
1275 qemu_mutex_unlock_iothread();
1278 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1280 return MEMTXATTRS_UNSPECIFIED;
1283 int kvm_arch_process_async_events(CPUState *cs)
1285 return cs->halted;
1288 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1290 CPUState *cs = CPU(cpu);
1291 CPUPPCState *env = &cpu->env;
1293 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1294 cs->halted = 1;
1295 cs->exception_index = EXCP_HLT;
1298 return 0;
1301 /* map dcr access to existing qemu dcr emulation */
1302 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1304 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1305 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1307 return 0;
1310 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1312 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1313 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1315 return 0;
1318 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1320 /* Mixed endian case is not handled */
1321 uint32_t sc = debug_inst_opcode;
1323 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1324 sizeof(sc), 0) ||
1325 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1326 return -EINVAL;
1329 return 0;
1332 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1334 uint32_t sc;
1336 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1337 sc != debug_inst_opcode ||
1338 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1339 sizeof(sc), 1)) {
1340 return -EINVAL;
1343 return 0;
1346 static int find_hw_breakpoint(target_ulong addr, int type)
1348 int n;
1350 assert((nb_hw_breakpoint + nb_hw_watchpoint)
1351 <= ARRAY_SIZE(hw_debug_points));
1353 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1354 if (hw_debug_points[n].addr == addr &&
1355 hw_debug_points[n].type == type) {
1356 return n;
1360 return -1;
1363 static int find_hw_watchpoint(target_ulong addr, int *flag)
1365 int n;
1367 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1368 if (n >= 0) {
1369 *flag = BP_MEM_ACCESS;
1370 return n;
1373 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1374 if (n >= 0) {
1375 *flag = BP_MEM_WRITE;
1376 return n;
1379 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1380 if (n >= 0) {
1381 *flag = BP_MEM_READ;
1382 return n;
1385 return -1;
1388 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1389 target_ulong len, int type)
1391 if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1392 return -ENOBUFS;
1395 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1396 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1398 switch (type) {
1399 case GDB_BREAKPOINT_HW:
1400 if (nb_hw_breakpoint >= max_hw_breakpoint) {
1401 return -ENOBUFS;
1404 if (find_hw_breakpoint(addr, type) >= 0) {
1405 return -EEXIST;
1408 nb_hw_breakpoint++;
1409 break;
1411 case GDB_WATCHPOINT_WRITE:
1412 case GDB_WATCHPOINT_READ:
1413 case GDB_WATCHPOINT_ACCESS:
1414 if (nb_hw_watchpoint >= max_hw_watchpoint) {
1415 return -ENOBUFS;
1418 if (find_hw_breakpoint(addr, type) >= 0) {
1419 return -EEXIST;
1422 nb_hw_watchpoint++;
1423 break;
1425 default:
1426 return -ENOSYS;
1429 return 0;
1432 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1433 target_ulong len, int type)
1435 int n;
1437 n = find_hw_breakpoint(addr, type);
1438 if (n < 0) {
1439 return -ENOENT;
1442 switch (type) {
1443 case GDB_BREAKPOINT_HW:
1444 nb_hw_breakpoint--;
1445 break;
1447 case GDB_WATCHPOINT_WRITE:
1448 case GDB_WATCHPOINT_READ:
1449 case GDB_WATCHPOINT_ACCESS:
1450 nb_hw_watchpoint--;
1451 break;
1453 default:
1454 return -ENOSYS;
1456 hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1458 return 0;
1461 void kvm_arch_remove_all_hw_breakpoints(void)
1463 nb_hw_breakpoint = nb_hw_watchpoint = 0;
1466 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1468 int n;
1470 /* Software Breakpoint updates */
1471 if (kvm_sw_breakpoints_active(cs)) {
1472 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1475 assert((nb_hw_breakpoint + nb_hw_watchpoint)
1476 <= ARRAY_SIZE(hw_debug_points));
1477 assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1479 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1480 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1481 memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1482 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1483 switch (hw_debug_points[n].type) {
1484 case GDB_BREAKPOINT_HW:
1485 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1486 break;
1487 case GDB_WATCHPOINT_WRITE:
1488 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1489 break;
1490 case GDB_WATCHPOINT_READ:
1491 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1492 break;
1493 case GDB_WATCHPOINT_ACCESS:
1494 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1495 KVMPPC_DEBUG_WATCH_READ;
1496 break;
1497 default:
1498 cpu_abort(cs, "Unsupported breakpoint type\n");
1500 dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1505 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1507 CPUState *cs = CPU(cpu);
1508 CPUPPCState *env = &cpu->env;
1509 struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1510 int handle = 0;
1511 int n;
1512 int flag = 0;
1514 if (cs->singlestep_enabled) {
1515 handle = 1;
1516 } else if (arch_info->status) {
1517 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1518 if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1519 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1520 if (n >= 0) {
1521 handle = 1;
1523 } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1524 KVMPPC_DEBUG_WATCH_WRITE)) {
1525 n = find_hw_watchpoint(arch_info->address, &flag);
1526 if (n >= 0) {
1527 handle = 1;
1528 cs->watchpoint_hit = &hw_watchpoint;
1529 hw_watchpoint.vaddr = hw_debug_points[n].addr;
1530 hw_watchpoint.flags = flag;
1534 } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1535 handle = 1;
1536 } else {
1537 /* QEMU is not able to handle debug exception, so inject
1538 * program exception to guest;
1539 * Yes program exception NOT debug exception !!
1540 * When QEMU is using debug resources then debug exception must
1541 * be always set. To achieve this we set MSR_DE and also set
1542 * MSRP_DEP so guest cannot change MSR_DE.
1543 * When emulating debug resource for guest we want guest
1544 * to control MSR_DE (enable/disable debug interrupt on need).
1545 * Supporting both configurations are NOT possible.
1546 * So the result is that we cannot share debug resources
1547 * between QEMU and Guest on BOOKE architecture.
1548 * In the current design QEMU gets the priority over guest,
1549 * this means that if QEMU is using debug resources then guest
1550 * cannot use them;
1551 * For software breakpoint QEMU uses a privileged instruction;
1552 * So there cannot be any reason that we are here for guest
1553 * set debug exception, only possibility is guest executed a
1554 * privileged / illegal instruction and that's why we are
1555 * injecting a program interrupt.
1558 cpu_synchronize_state(cs);
1559 /* env->nip is PC, so increment this by 4 to use
1560 * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1562 env->nip += 4;
1563 cs->exception_index = POWERPC_EXCP_PROGRAM;
1564 env->error_code = POWERPC_EXCP_INVAL;
1565 ppc_cpu_do_interrupt(cs);
1568 return handle;
1571 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1573 PowerPCCPU *cpu = POWERPC_CPU(cs);
1574 CPUPPCState *env = &cpu->env;
1575 int ret;
1577 qemu_mutex_lock_iothread();
1579 switch (run->exit_reason) {
1580 case KVM_EXIT_DCR:
1581 if (run->dcr.is_write) {
1582 DPRINTF("handle dcr write\n");
1583 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1584 } else {
1585 DPRINTF("handle dcr read\n");
1586 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1588 break;
1589 case KVM_EXIT_HLT:
1590 DPRINTF("handle halt\n");
1591 ret = kvmppc_handle_halt(cpu);
1592 break;
1593 #if defined(TARGET_PPC64)
1594 case KVM_EXIT_PAPR_HCALL:
1595 DPRINTF("handle PAPR hypercall\n");
1596 run->papr_hcall.ret = spapr_hypercall(cpu,
1597 run->papr_hcall.nr,
1598 run->papr_hcall.args);
1599 ret = 0;
1600 break;
1601 #endif
1602 case KVM_EXIT_EPR:
1603 DPRINTF("handle epr\n");
1604 run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1605 ret = 0;
1606 break;
1607 case KVM_EXIT_WATCHDOG:
1608 DPRINTF("handle watchdog expiry\n");
1609 watchdog_perform_action();
1610 ret = 0;
1611 break;
1613 case KVM_EXIT_DEBUG:
1614 DPRINTF("handle debug exception\n");
1615 if (kvm_handle_debug(cpu, run)) {
1616 ret = EXCP_DEBUG;
1617 break;
1619 /* re-enter, this exception was guest-internal */
1620 ret = 0;
1621 break;
1623 default:
1624 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1625 ret = -1;
1626 break;
1629 qemu_mutex_unlock_iothread();
1630 return ret;
1633 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1635 CPUState *cs = CPU(cpu);
1636 uint32_t bits = tsr_bits;
1637 struct kvm_one_reg reg = {
1638 .id = KVM_REG_PPC_OR_TSR,
1639 .addr = (uintptr_t) &bits,
1642 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1645 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1648 CPUState *cs = CPU(cpu);
1649 uint32_t bits = tsr_bits;
1650 struct kvm_one_reg reg = {
1651 .id = KVM_REG_PPC_CLEAR_TSR,
1652 .addr = (uintptr_t) &bits,
1655 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1658 int kvmppc_set_tcr(PowerPCCPU *cpu)
1660 CPUState *cs = CPU(cpu);
1661 CPUPPCState *env = &cpu->env;
1662 uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1664 struct kvm_one_reg reg = {
1665 .id = KVM_REG_PPC_TCR,
1666 .addr = (uintptr_t) &tcr,
1669 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1672 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1674 CPUState *cs = CPU(cpu);
1675 int ret;
1677 if (!kvm_enabled()) {
1678 return -1;
1681 if (!cap_ppc_watchdog) {
1682 printf("warning: KVM does not support watchdog");
1683 return -1;
1686 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1687 if (ret < 0) {
1688 fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1689 __func__, strerror(-ret));
1690 return ret;
1693 return ret;
1696 static int read_cpuinfo(const char *field, char *value, int len)
1698 FILE *f;
1699 int ret = -1;
1700 int field_len = strlen(field);
1701 char line[512];
1703 f = fopen("/proc/cpuinfo", "r");
1704 if (!f) {
1705 return -1;
1708 do {
1709 if (!fgets(line, sizeof(line), f)) {
1710 break;
1712 if (!strncmp(line, field, field_len)) {
1713 pstrcpy(value, len, line);
1714 ret = 0;
1715 break;
1717 } while(*line);
1719 fclose(f);
1721 return ret;
1724 uint32_t kvmppc_get_tbfreq(void)
1726 char line[512];
1727 char *ns;
1728 uint32_t retval = get_ticks_per_sec();
1730 if (read_cpuinfo("timebase", line, sizeof(line))) {
1731 return retval;
1734 if (!(ns = strchr(line, ':'))) {
1735 return retval;
1738 ns++;
1740 retval = atoi(ns);
1741 return retval;
1744 bool kvmppc_get_host_serial(char **value)
1746 return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1747 NULL);
1750 bool kvmppc_get_host_model(char **value)
1752 return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1755 /* Try to find a device tree node for a CPU with clock-frequency property */
1756 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1758 struct dirent *dirp;
1759 DIR *dp;
1761 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1762 printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1763 return -1;
1766 buf[0] = '\0';
1767 while ((dirp = readdir(dp)) != NULL) {
1768 FILE *f;
1769 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1770 dirp->d_name);
1771 f = fopen(buf, "r");
1772 if (f) {
1773 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1774 fclose(f);
1775 break;
1777 buf[0] = '\0';
1779 closedir(dp);
1780 if (buf[0] == '\0') {
1781 printf("Unknown host!\n");
1782 return -1;
1785 return 0;
1788 /* Read a CPU node property from the host device tree that's a single
1789 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1790 * (can't find or open the property, or doesn't understand the
1791 * format) */
1792 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1794 char buf[PATH_MAX], *tmp;
1795 union {
1796 uint32_t v32;
1797 uint64_t v64;
1798 } u;
1799 FILE *f;
1800 int len;
1802 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1803 return -1;
1806 tmp = g_strdup_printf("%s/%s", buf, propname);
1808 f = fopen(tmp, "rb");
1809 g_free(tmp);
1810 if (!f) {
1811 return -1;
1814 len = fread(&u, 1, sizeof(u), f);
1815 fclose(f);
1816 switch (len) {
1817 case 4:
1818 /* property is a 32-bit quantity */
1819 return be32_to_cpu(u.v32);
1820 case 8:
1821 return be64_to_cpu(u.v64);
1824 return 0;
1827 uint64_t kvmppc_get_clockfreq(void)
1829 return kvmppc_read_int_cpu_dt("clock-frequency");
1832 uint32_t kvmppc_get_vmx(void)
1834 return kvmppc_read_int_cpu_dt("ibm,vmx");
1837 uint32_t kvmppc_get_dfp(void)
1839 return kvmppc_read_int_cpu_dt("ibm,dfp");
1842 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1844 PowerPCCPU *cpu = ppc_env_get_cpu(env);
1845 CPUState *cs = CPU(cpu);
1847 if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1848 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1849 return 0;
1852 return 1;
1855 int kvmppc_get_hasidle(CPUPPCState *env)
1857 struct kvm_ppc_pvinfo pvinfo;
1859 if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1860 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1861 return 1;
1864 return 0;
1867 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1869 uint32_t *hc = (uint32_t*)buf;
1870 struct kvm_ppc_pvinfo pvinfo;
1872 if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1873 memcpy(buf, pvinfo.hcall, buf_len);
1874 return 0;
1878 * Fallback to always fail hypercalls regardless of endianness:
1880 * tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
1881 * li r3, -1
1882 * b .+8 (becomes nop in wrong endian)
1883 * bswap32(li r3, -1)
1886 hc[0] = cpu_to_be32(0x08000048);
1887 hc[1] = cpu_to_be32(0x3860ffff);
1888 hc[2] = cpu_to_be32(0x48000008);
1889 hc[3] = cpu_to_be32(bswap32(0x3860ffff));
1891 return 0;
1894 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
1896 return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
1899 void kvmppc_enable_logical_ci_hcalls(void)
1902 * FIXME: it would be nice if we could detect the cases where
1903 * we're using a device which requires the in kernel
1904 * implementation of these hcalls, but the kernel lacks them and
1905 * produce a warning.
1907 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
1908 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
1911 void kvmppc_set_papr(PowerPCCPU *cpu)
1913 CPUState *cs = CPU(cpu);
1914 int ret;
1916 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
1917 if (ret) {
1918 cpu_abort(cs, "This KVM version does not support PAPR\n");
1921 /* Update the capability flag so we sync the right information
1922 * with kvm */
1923 cap_papr = 1;
1926 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t cpu_version)
1928 return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &cpu_version);
1931 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
1933 CPUState *cs = CPU(cpu);
1934 int ret;
1936 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
1937 if (ret && mpic_proxy) {
1938 cpu_abort(cs, "This KVM version does not support EPR\n");
1942 int kvmppc_smt_threads(void)
1944 return cap_ppc_smt ? cap_ppc_smt : 1;
1947 #ifdef TARGET_PPC64
1948 off_t kvmppc_alloc_rma(void **rma)
1950 off_t size;
1951 int fd;
1952 struct kvm_allocate_rma ret;
1954 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
1955 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
1956 * not necessary on this hardware
1957 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
1959 * FIXME: We should allow the user to force contiguous RMA
1960 * allocation in the cap_ppc_rma==1 case.
1962 if (cap_ppc_rma < 2) {
1963 return 0;
1966 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
1967 if (fd < 0) {
1968 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
1969 strerror(errno));
1970 return -1;
1973 size = MIN(ret.rma_size, 256ul << 20);
1975 *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1976 if (*rma == MAP_FAILED) {
1977 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
1978 return -1;
1981 return size;
1984 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
1986 struct kvm_ppc_smmu_info info;
1987 long rampagesize, best_page_shift;
1988 int i;
1990 if (cap_ppc_rma >= 2) {
1991 return current_size;
1994 /* Find the largest hardware supported page size that's less than
1995 * or equal to the (logical) backing page size of guest RAM */
1996 kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
1997 rampagesize = getrampagesize();
1998 best_page_shift = 0;
2000 for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2001 struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2003 if (!sps->page_shift) {
2004 continue;
2007 if ((sps->page_shift > best_page_shift)
2008 && ((1UL << sps->page_shift) <= rampagesize)) {
2009 best_page_shift = sps->page_shift;
2013 return MIN(current_size,
2014 1ULL << (best_page_shift + hash_shift - 7));
2016 #endif
2018 bool kvmppc_spapr_use_multitce(void)
2020 return cap_spapr_multitce;
2023 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
2024 bool vfio_accel)
2026 struct kvm_create_spapr_tce args = {
2027 .liobn = liobn,
2028 .window_size = window_size,
2030 long len;
2031 int fd;
2032 void *table;
2034 /* Must set fd to -1 so we don't try to munmap when called for
2035 * destroying the table, which the upper layers -will- do
2037 *pfd = -1;
2038 if (!cap_spapr_tce || (vfio_accel && !cap_spapr_vfio)) {
2039 return NULL;
2042 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2043 if (fd < 0) {
2044 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2045 liobn);
2046 return NULL;
2049 len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
2050 /* FIXME: round this up to page size */
2052 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2053 if (table == MAP_FAILED) {
2054 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2055 liobn);
2056 close(fd);
2057 return NULL;
2060 *pfd = fd;
2061 return table;
2064 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2066 long len;
2068 if (fd < 0) {
2069 return -1;
2072 len = nb_table * sizeof(uint64_t);
2073 if ((munmap(table, len) < 0) ||
2074 (close(fd) < 0)) {
2075 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2076 strerror(errno));
2077 /* Leak the table */
2080 return 0;
2083 int kvmppc_reset_htab(int shift_hint)
2085 uint32_t shift = shift_hint;
2087 if (!kvm_enabled()) {
2088 /* Full emulation, tell caller to allocate htab itself */
2089 return 0;
2091 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2092 int ret;
2093 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2094 if (ret == -ENOTTY) {
2095 /* At least some versions of PR KVM advertise the
2096 * capability, but don't implement the ioctl(). Oops.
2097 * Return 0 so that we allocate the htab in qemu, as is
2098 * correct for PR. */
2099 return 0;
2100 } else if (ret < 0) {
2101 return ret;
2103 return shift;
2106 /* We have a kernel that predates the htab reset calls. For PR
2107 * KVM, we need to allocate the htab ourselves, for an HV KVM of
2108 * this era, it has allocated a 16MB fixed size hash table
2109 * already. Kernels of this era have the GET_PVINFO capability
2110 * only on PR, so we use this hack to determine the right
2111 * answer */
2112 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
2113 /* PR - tell caller to allocate htab */
2114 return 0;
2115 } else {
2116 /* HV - assume 16MB kernel allocated htab */
2117 return 24;
2121 static inline uint32_t mfpvr(void)
2123 uint32_t pvr;
2125 asm ("mfpvr %0"
2126 : "=r"(pvr));
2127 return pvr;
2130 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2132 if (on) {
2133 *word |= flags;
2134 } else {
2135 *word &= ~flags;
2139 static void kvmppc_host_cpu_initfn(Object *obj)
2141 assert(kvm_enabled());
2144 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2146 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2147 uint32_t vmx = kvmppc_get_vmx();
2148 uint32_t dfp = kvmppc_get_dfp();
2149 uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2150 uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2152 /* Now fix up the class with information we can query from the host */
2153 pcc->pvr = mfpvr();
2155 if (vmx != -1) {
2156 /* Only override when we know what the host supports */
2157 alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
2158 alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
2160 if (dfp != -1) {
2161 /* Only override when we know what the host supports */
2162 alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
2165 if (dcache_size != -1) {
2166 pcc->l1_dcache_size = dcache_size;
2169 if (icache_size != -1) {
2170 pcc->l1_icache_size = icache_size;
2174 bool kvmppc_has_cap_epr(void)
2176 return cap_epr;
2179 bool kvmppc_has_cap_htab_fd(void)
2181 return cap_htab_fd;
2184 bool kvmppc_has_cap_fixup_hcalls(void)
2186 return cap_fixup_hcalls;
2189 static PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc)
2191 ObjectClass *oc = OBJECT_CLASS(pcc);
2193 while (oc && !object_class_is_abstract(oc)) {
2194 oc = object_class_get_parent(oc);
2196 assert(oc);
2198 return POWERPC_CPU_CLASS(oc);
2201 static int kvm_ppc_register_host_cpu_type(void)
2203 TypeInfo type_info = {
2204 .name = TYPE_HOST_POWERPC_CPU,
2205 .instance_init = kvmppc_host_cpu_initfn,
2206 .class_init = kvmppc_host_cpu_class_init,
2208 uint32_t host_pvr = mfpvr();
2209 PowerPCCPUClass *pvr_pcc;
2210 DeviceClass *dc;
2212 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2213 if (pvr_pcc == NULL) {
2214 pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2216 if (pvr_pcc == NULL) {
2217 return -1;
2219 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2220 type_register(&type_info);
2222 /* Register generic family CPU class for a family */
2223 pvr_pcc = ppc_cpu_get_family_class(pvr_pcc);
2224 dc = DEVICE_CLASS(pvr_pcc);
2225 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2226 type_info.name = g_strdup_printf("%s-"TYPE_POWERPC_CPU, dc->desc);
2227 type_register(&type_info);
2229 return 0;
2232 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2234 struct kvm_rtas_token_args args = {
2235 .token = token,
2238 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2239 return -ENOENT;
2242 strncpy(args.name, function, sizeof(args.name));
2244 return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2247 int kvmppc_get_htab_fd(bool write)
2249 struct kvm_get_htab_fd s = {
2250 .flags = write ? KVM_GET_HTAB_WRITE : 0,
2251 .start_index = 0,
2254 if (!cap_htab_fd) {
2255 fprintf(stderr, "KVM version doesn't support saving the hash table\n");
2256 return -1;
2259 return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2262 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2264 int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2265 uint8_t buf[bufsize];
2266 ssize_t rc;
2268 do {
2269 rc = read(fd, buf, bufsize);
2270 if (rc < 0) {
2271 fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2272 strerror(errno));
2273 return rc;
2274 } else if (rc) {
2275 uint8_t *buffer = buf;
2276 ssize_t n = rc;
2277 while (n) {
2278 struct kvm_get_htab_header *head =
2279 (struct kvm_get_htab_header *) buffer;
2280 size_t chunksize = sizeof(*head) +
2281 HASH_PTE_SIZE_64 * head->n_valid;
2283 qemu_put_be32(f, head->index);
2284 qemu_put_be16(f, head->n_valid);
2285 qemu_put_be16(f, head->n_invalid);
2286 qemu_put_buffer(f, (void *)(head + 1),
2287 HASH_PTE_SIZE_64 * head->n_valid);
2289 buffer += chunksize;
2290 n -= chunksize;
2293 } while ((rc != 0)
2294 && ((max_ns < 0)
2295 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2297 return (rc == 0) ? 1 : 0;
2300 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2301 uint16_t n_valid, uint16_t n_invalid)
2303 struct kvm_get_htab_header *buf;
2304 size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2305 ssize_t rc;
2307 buf = alloca(chunksize);
2308 buf->index = index;
2309 buf->n_valid = n_valid;
2310 buf->n_invalid = n_invalid;
2312 qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2314 rc = write(fd, buf, chunksize);
2315 if (rc < 0) {
2316 fprintf(stderr, "Error writing KVM hash table: %s\n",
2317 strerror(errno));
2318 return rc;
2320 if (rc != chunksize) {
2321 /* We should never get a short write on a single chunk */
2322 fprintf(stderr, "Short write, restoring KVM hash table\n");
2323 return -1;
2325 return 0;
2328 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2330 return true;
2333 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
2335 return 1;
2338 int kvm_arch_on_sigbus(int code, void *addr)
2340 return 1;
2343 void kvm_arch_init_irq_routing(KVMState *s)
2347 struct kvm_get_htab_buf {
2348 struct kvm_get_htab_header header;
2350 * We require one extra byte for read
2352 target_ulong hpte[(HPTES_PER_GROUP * 2) + 1];
2355 uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index)
2357 int htab_fd;
2358 struct kvm_get_htab_fd ghf;
2359 struct kvm_get_htab_buf *hpte_buf;
2361 ghf.flags = 0;
2362 ghf.start_index = pte_index;
2363 htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2364 if (htab_fd < 0) {
2365 goto error_out;
2368 hpte_buf = g_malloc0(sizeof(*hpte_buf));
2370 * Read the hpte group
2372 if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) {
2373 goto out_close;
2376 close(htab_fd);
2377 return (uint64_t)(uintptr_t) hpte_buf->hpte;
2379 out_close:
2380 g_free(hpte_buf);
2381 close(htab_fd);
2382 error_out:
2383 return 0;
2386 void kvmppc_hash64_free_pteg(uint64_t token)
2388 struct kvm_get_htab_buf *htab_buf;
2390 htab_buf = container_of((void *)(uintptr_t) token, struct kvm_get_htab_buf,
2391 hpte);
2392 g_free(htab_buf);
2393 return;
2396 void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
2397 target_ulong pte0, target_ulong pte1)
2399 int htab_fd;
2400 struct kvm_get_htab_fd ghf;
2401 struct kvm_get_htab_buf hpte_buf;
2403 ghf.flags = 0;
2404 ghf.start_index = 0; /* Ignored */
2405 htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2406 if (htab_fd < 0) {
2407 goto error_out;
2410 hpte_buf.header.n_valid = 1;
2411 hpte_buf.header.n_invalid = 0;
2412 hpte_buf.header.index = pte_index;
2413 hpte_buf.hpte[0] = pte0;
2414 hpte_buf.hpte[1] = pte1;
2416 * Write the hpte entry.
2417 * CAUTION: write() has the warn_unused_result attribute. Hence we
2418 * need to check the return value, even though we do nothing.
2420 if (write(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) {
2421 goto out_close;
2424 out_close:
2425 close(htab_fd);
2426 return;
2428 error_out:
2429 return;
2432 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2433 uint64_t address, uint32_t data)
2435 return 0;
2438 int kvm_arch_msi_data_to_gsi(uint32_t data)
2440 return data & 0xffff;