ivshmem: Replace int role_val by OnOffAuto master
[qemu/ar7.git] / target-ppc / kvm.c
blob2fc993143e98d9943a42c67548294980f5431c3d
1 /*
2 * PowerPC implementation of KVM hooks
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
7 * Authors:
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
17 #include "qemu/osdep.h"
18 #include <dirent.h>
19 #include <sys/ioctl.h>
20 #include <sys/mman.h>
21 #include <sys/vfs.h>
23 #include <linux/kvm.h>
25 #include "qemu-common.h"
26 #include "qemu/error-report.h"
27 #include "qemu/timer.h"
28 #include "sysemu/sysemu.h"
29 #include "sysemu/kvm.h"
30 #include "kvm_ppc.h"
31 #include "cpu.h"
32 #include "sysemu/cpus.h"
33 #include "sysemu/device_tree.h"
34 #include "mmu-hash64.h"
36 #include "hw/sysbus.h"
37 #include "hw/ppc/spapr.h"
38 #include "hw/ppc/spapr_vio.h"
39 #include "hw/ppc/ppc.h"
40 #include "sysemu/watchdog.h"
41 #include "trace.h"
42 #include "exec/gdbstub.h"
43 #include "exec/memattrs.h"
44 #include "sysemu/hostmem.h"
46 //#define DEBUG_KVM
48 #ifdef DEBUG_KVM
49 #define DPRINTF(fmt, ...) \
50 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
51 #else
52 #define DPRINTF(fmt, ...) \
53 do { } while (0)
54 #endif
56 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
58 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
59 KVM_CAP_LAST_INFO
62 static int cap_interrupt_unset = false;
63 static int cap_interrupt_level = false;
64 static int cap_segstate;
65 static int cap_booke_sregs;
66 static int cap_ppc_smt;
67 static int cap_ppc_rma;
68 static int cap_spapr_tce;
69 static int cap_spapr_multitce;
70 static int cap_spapr_vfio;
71 static int cap_hior;
72 static int cap_one_reg;
73 static int cap_epr;
74 static int cap_ppc_watchdog;
75 static int cap_papr;
76 static int cap_htab_fd;
77 static int cap_fixup_hcalls;
79 static uint32_t debug_inst_opcode;
81 /* XXX We have a race condition where we actually have a level triggered
82 * interrupt, but the infrastructure can't expose that yet, so the guest
83 * takes but ignores it, goes to sleep and never gets notified that there's
84 * still an interrupt pending.
86 * As a quick workaround, let's just wake up again 20 ms after we injected
87 * an interrupt. That way we can assure that we're always reinjecting
88 * interrupts in case the guest swallowed them.
90 static QEMUTimer *idle_timer;
92 static void kvm_kick_cpu(void *opaque)
94 PowerPCCPU *cpu = opaque;
96 qemu_cpu_kick(CPU(cpu));
99 static int kvm_ppc_register_host_cpu_type(void);
101 int kvm_arch_init(MachineState *ms, KVMState *s)
103 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
104 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
105 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
106 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
107 cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
108 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
109 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
110 cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
111 cap_spapr_vfio = false;
112 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
113 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
114 cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
115 cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
116 /* Note: we don't set cap_papr here, because this capability is
117 * only activated after this by kvmppc_set_papr() */
118 cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
119 cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
121 if (!cap_interrupt_level) {
122 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
123 "VM to stall at times!\n");
126 kvm_ppc_register_host_cpu_type();
128 return 0;
131 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
133 CPUPPCState *cenv = &cpu->env;
134 CPUState *cs = CPU(cpu);
135 struct kvm_sregs sregs;
136 int ret;
138 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
139 /* What we're really trying to say is "if we're on BookE, we use
140 the native PVR for now". This is the only sane way to check
141 it though, so we potentially confuse users that they can run
142 BookE guests on BookS. Let's hope nobody dares enough :) */
143 return 0;
144 } else {
145 if (!cap_segstate) {
146 fprintf(stderr, "kvm error: missing PVR setting capability\n");
147 return -ENOSYS;
151 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
152 if (ret) {
153 return ret;
156 sregs.pvr = cenv->spr[SPR_PVR];
157 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
160 /* Set up a shared TLB array with KVM */
161 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
163 CPUPPCState *env = &cpu->env;
164 CPUState *cs = CPU(cpu);
165 struct kvm_book3e_206_tlb_params params = {};
166 struct kvm_config_tlb cfg = {};
167 unsigned int entries = 0;
168 int ret, i;
170 if (!kvm_enabled() ||
171 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
172 return 0;
175 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
177 for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
178 params.tlb_sizes[i] = booke206_tlb_size(env, i);
179 params.tlb_ways[i] = booke206_tlb_ways(env, i);
180 entries += params.tlb_sizes[i];
183 assert(entries == env->nb_tlb);
184 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
186 env->tlb_dirty = true;
188 cfg.array = (uintptr_t)env->tlb.tlbm;
189 cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
190 cfg.params = (uintptr_t)&params;
191 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
193 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
194 if (ret < 0) {
195 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
196 __func__, strerror(-ret));
197 return ret;
200 env->kvm_sw_tlb = true;
201 return 0;
205 #if defined(TARGET_PPC64)
206 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
207 struct kvm_ppc_smmu_info *info)
209 CPUPPCState *env = &cpu->env;
210 CPUState *cs = CPU(cpu);
212 memset(info, 0, sizeof(*info));
214 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
215 * need to "guess" what the supported page sizes are.
217 * For that to work we make a few assumptions:
219 * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
220 * KVM which only supports 4K and 16M pages, but supports them
221 * regardless of the backing store characteritics. We also don't
222 * support 1T segments.
224 * This is safe as if HV KVM ever supports that capability or PR
225 * KVM grows supports for more page/segment sizes, those versions
226 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
227 * will not hit this fallback
229 * - Else we are running HV KVM. This means we only support page
230 * sizes that fit in the backing store. Additionally we only
231 * advertize 64K pages if the processor is ARCH 2.06 and we assume
232 * P7 encodings for the SLB and hash table. Here too, we assume
233 * support for any newer processor will mean a kernel that
234 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
235 * this fallback.
237 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
238 /* No flags */
239 info->flags = 0;
240 info->slb_size = 64;
242 /* Standard 4k base page size segment */
243 info->sps[0].page_shift = 12;
244 info->sps[0].slb_enc = 0;
245 info->sps[0].enc[0].page_shift = 12;
246 info->sps[0].enc[0].pte_enc = 0;
248 /* Standard 16M large page size segment */
249 info->sps[1].page_shift = 24;
250 info->sps[1].slb_enc = SLB_VSID_L;
251 info->sps[1].enc[0].page_shift = 24;
252 info->sps[1].enc[0].pte_enc = 0;
253 } else {
254 int i = 0;
256 /* HV KVM has backing store size restrictions */
257 info->flags = KVM_PPC_PAGE_SIZES_REAL;
259 if (env->mmu_model & POWERPC_MMU_1TSEG) {
260 info->flags |= KVM_PPC_1T_SEGMENTS;
263 if (env->mmu_model == POWERPC_MMU_2_06 ||
264 env->mmu_model == POWERPC_MMU_2_07) {
265 info->slb_size = 32;
266 } else {
267 info->slb_size = 64;
270 /* Standard 4k base page size segment */
271 info->sps[i].page_shift = 12;
272 info->sps[i].slb_enc = 0;
273 info->sps[i].enc[0].page_shift = 12;
274 info->sps[i].enc[0].pte_enc = 0;
275 i++;
277 /* 64K on MMU 2.06 and later */
278 if (env->mmu_model == POWERPC_MMU_2_06 ||
279 env->mmu_model == POWERPC_MMU_2_07) {
280 info->sps[i].page_shift = 16;
281 info->sps[i].slb_enc = 0x110;
282 info->sps[i].enc[0].page_shift = 16;
283 info->sps[i].enc[0].pte_enc = 1;
284 i++;
287 /* Standard 16M large page size segment */
288 info->sps[i].page_shift = 24;
289 info->sps[i].slb_enc = SLB_VSID_L;
290 info->sps[i].enc[0].page_shift = 24;
291 info->sps[i].enc[0].pte_enc = 0;
295 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
297 CPUState *cs = CPU(cpu);
298 int ret;
300 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
301 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
302 if (ret == 0) {
303 return;
307 kvm_get_fallback_smmu_info(cpu, info);
310 static long gethugepagesize(const char *mem_path)
312 struct statfs fs;
313 int ret;
315 do {
316 ret = statfs(mem_path, &fs);
317 } while (ret != 0 && errno == EINTR);
319 if (ret != 0) {
320 fprintf(stderr, "Couldn't statfs() memory path: %s\n",
321 strerror(errno));
322 exit(1);
325 #define HUGETLBFS_MAGIC 0x958458f6
327 if (fs.f_type != HUGETLBFS_MAGIC) {
328 /* Explicit mempath, but it's ordinary pages */
329 return getpagesize();
332 /* It's hugepage, return the huge page size */
333 return fs.f_bsize;
337 * FIXME TOCTTOU: this iterates over memory backends' mem-path, which
338 * may or may not name the same files / on the same filesystem now as
339 * when we actually open and map them. Iterate over the file
340 * descriptors instead, and use qemu_fd_getpagesize().
342 static int find_max_supported_pagesize(Object *obj, void *opaque)
344 char *mem_path;
345 long *hpsize_min = opaque;
347 if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) {
348 mem_path = object_property_get_str(obj, "mem-path", NULL);
349 if (mem_path) {
350 long hpsize = gethugepagesize(mem_path);
351 if (hpsize < *hpsize_min) {
352 *hpsize_min = hpsize;
354 } else {
355 *hpsize_min = getpagesize();
359 return 0;
362 static long getrampagesize(void)
364 long hpsize = LONG_MAX;
365 Object *memdev_root;
367 if (mem_path) {
368 return gethugepagesize(mem_path);
371 /* it's possible we have memory-backend objects with
372 * hugepage-backed RAM. these may get mapped into system
373 * address space via -numa parameters or memory hotplug
374 * hooks. we want to take these into account, but we
375 * also want to make sure these supported hugepage
376 * sizes are applicable across the entire range of memory
377 * we may boot from, so we take the min across all
378 * backends, and assume normal pages in cases where a
379 * backend isn't backed by hugepages.
381 memdev_root = object_resolve_path("/objects", NULL);
382 if (!memdev_root) {
383 return getpagesize();
386 object_child_foreach(memdev_root, find_max_supported_pagesize, &hpsize);
388 return (hpsize == LONG_MAX) ? getpagesize() : hpsize;
391 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
393 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
394 return true;
397 return (1ul << shift) <= rampgsize;
400 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
402 static struct kvm_ppc_smmu_info smmu_info;
403 static bool has_smmu_info;
404 CPUPPCState *env = &cpu->env;
405 long rampagesize;
406 int iq, ik, jq, jk;
408 /* We only handle page sizes for 64-bit server guests for now */
409 if (!(env->mmu_model & POWERPC_MMU_64)) {
410 return;
413 /* Collect MMU info from kernel if not already */
414 if (!has_smmu_info) {
415 kvm_get_smmu_info(cpu, &smmu_info);
416 has_smmu_info = true;
419 rampagesize = getrampagesize();
421 /* Convert to QEMU form */
422 memset(&env->sps, 0, sizeof(env->sps));
424 /* If we have HV KVM, we need to forbid CI large pages if our
425 * host page size is smaller than 64K.
427 if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
428 env->ci_large_pages = getpagesize() >= 0x10000;
432 * XXX This loop should be an entry wide AND of the capabilities that
433 * the selected CPU has with the capabilities that KVM supports.
435 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
436 struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
437 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
439 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
440 ksps->page_shift)) {
441 continue;
443 qsps->page_shift = ksps->page_shift;
444 qsps->slb_enc = ksps->slb_enc;
445 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
446 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
447 ksps->enc[jk].page_shift)) {
448 continue;
450 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
451 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
452 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
453 break;
456 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
457 break;
460 env->slb_nr = smmu_info.slb_size;
461 if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
462 env->mmu_model &= ~POWERPC_MMU_1TSEG;
465 #else /* defined (TARGET_PPC64) */
467 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
471 #endif /* !defined (TARGET_PPC64) */
473 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
475 return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
478 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
479 * book3s supports only 1 watchpoint, so array size
480 * of 4 is sufficient for now.
482 #define MAX_HW_BKPTS 4
484 static struct HWBreakpoint {
485 target_ulong addr;
486 int type;
487 } hw_debug_points[MAX_HW_BKPTS];
489 static CPUWatchpoint hw_watchpoint;
491 /* Default there is no breakpoint and watchpoint supported */
492 static int max_hw_breakpoint;
493 static int max_hw_watchpoint;
494 static int nb_hw_breakpoint;
495 static int nb_hw_watchpoint;
497 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
499 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
500 max_hw_breakpoint = 2;
501 max_hw_watchpoint = 2;
504 if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
505 fprintf(stderr, "Error initializing h/w breakpoints\n");
506 return;
510 int kvm_arch_init_vcpu(CPUState *cs)
512 PowerPCCPU *cpu = POWERPC_CPU(cs);
513 CPUPPCState *cenv = &cpu->env;
514 int ret;
516 /* Gather server mmu info from KVM and update the CPU state */
517 kvm_fixup_page_sizes(cpu);
519 /* Synchronize sregs with kvm */
520 ret = kvm_arch_sync_sregs(cpu);
521 if (ret) {
522 if (ret == -EINVAL) {
523 error_report("Register sync failed... If you're using kvm-hv.ko,"
524 " only \"-cpu host\" is possible");
526 return ret;
529 idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
531 /* Some targets support access to KVM's guest TLB. */
532 switch (cenv->mmu_model) {
533 case POWERPC_MMU_BOOKE206:
534 ret = kvm_booke206_tlb_init(cpu);
535 break;
536 default:
537 break;
540 kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
541 kvmppc_hw_debug_points_init(cenv);
543 return ret;
546 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
548 CPUPPCState *env = &cpu->env;
549 CPUState *cs = CPU(cpu);
550 struct kvm_dirty_tlb dirty_tlb;
551 unsigned char *bitmap;
552 int ret;
554 if (!env->kvm_sw_tlb) {
555 return;
558 bitmap = g_malloc((env->nb_tlb + 7) / 8);
559 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
561 dirty_tlb.bitmap = (uintptr_t)bitmap;
562 dirty_tlb.num_dirty = env->nb_tlb;
564 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
565 if (ret) {
566 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
567 __func__, strerror(-ret));
570 g_free(bitmap);
573 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
575 PowerPCCPU *cpu = POWERPC_CPU(cs);
576 CPUPPCState *env = &cpu->env;
577 union {
578 uint32_t u32;
579 uint64_t u64;
580 } val;
581 struct kvm_one_reg reg = {
582 .id = id,
583 .addr = (uintptr_t) &val,
585 int ret;
587 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
588 if (ret != 0) {
589 trace_kvm_failed_spr_get(spr, strerror(errno));
590 } else {
591 switch (id & KVM_REG_SIZE_MASK) {
592 case KVM_REG_SIZE_U32:
593 env->spr[spr] = val.u32;
594 break;
596 case KVM_REG_SIZE_U64:
597 env->spr[spr] = val.u64;
598 break;
600 default:
601 /* Don't handle this size yet */
602 abort();
607 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
609 PowerPCCPU *cpu = POWERPC_CPU(cs);
610 CPUPPCState *env = &cpu->env;
611 union {
612 uint32_t u32;
613 uint64_t u64;
614 } val;
615 struct kvm_one_reg reg = {
616 .id = id,
617 .addr = (uintptr_t) &val,
619 int ret;
621 switch (id & KVM_REG_SIZE_MASK) {
622 case KVM_REG_SIZE_U32:
623 val.u32 = env->spr[spr];
624 break;
626 case KVM_REG_SIZE_U64:
627 val.u64 = env->spr[spr];
628 break;
630 default:
631 /* Don't handle this size yet */
632 abort();
635 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
636 if (ret != 0) {
637 trace_kvm_failed_spr_set(spr, strerror(errno));
641 static int kvm_put_fp(CPUState *cs)
643 PowerPCCPU *cpu = POWERPC_CPU(cs);
644 CPUPPCState *env = &cpu->env;
645 struct kvm_one_reg reg;
646 int i;
647 int ret;
649 if (env->insns_flags & PPC_FLOAT) {
650 uint64_t fpscr = env->fpscr;
651 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
653 reg.id = KVM_REG_PPC_FPSCR;
654 reg.addr = (uintptr_t)&fpscr;
655 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
656 if (ret < 0) {
657 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
658 return ret;
661 for (i = 0; i < 32; i++) {
662 uint64_t vsr[2];
664 #ifdef HOST_WORDS_BIGENDIAN
665 vsr[0] = float64_val(env->fpr[i]);
666 vsr[1] = env->vsr[i];
667 #else
668 vsr[0] = env->vsr[i];
669 vsr[1] = float64_val(env->fpr[i]);
670 #endif
671 reg.addr = (uintptr_t) &vsr;
672 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
674 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
675 if (ret < 0) {
676 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
677 i, strerror(errno));
678 return ret;
683 if (env->insns_flags & PPC_ALTIVEC) {
684 reg.id = KVM_REG_PPC_VSCR;
685 reg.addr = (uintptr_t)&env->vscr;
686 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
687 if (ret < 0) {
688 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
689 return ret;
692 for (i = 0; i < 32; i++) {
693 reg.id = KVM_REG_PPC_VR(i);
694 reg.addr = (uintptr_t)&env->avr[i];
695 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
696 if (ret < 0) {
697 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
698 return ret;
703 return 0;
706 static int kvm_get_fp(CPUState *cs)
708 PowerPCCPU *cpu = POWERPC_CPU(cs);
709 CPUPPCState *env = &cpu->env;
710 struct kvm_one_reg reg;
711 int i;
712 int ret;
714 if (env->insns_flags & PPC_FLOAT) {
715 uint64_t fpscr;
716 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
718 reg.id = KVM_REG_PPC_FPSCR;
719 reg.addr = (uintptr_t)&fpscr;
720 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
721 if (ret < 0) {
722 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
723 return ret;
724 } else {
725 env->fpscr = fpscr;
728 for (i = 0; i < 32; i++) {
729 uint64_t vsr[2];
731 reg.addr = (uintptr_t) &vsr;
732 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
734 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
735 if (ret < 0) {
736 DPRINTF("Unable to get %s%d from KVM: %s\n",
737 vsx ? "VSR" : "FPR", i, strerror(errno));
738 return ret;
739 } else {
740 #ifdef HOST_WORDS_BIGENDIAN
741 env->fpr[i] = vsr[0];
742 if (vsx) {
743 env->vsr[i] = vsr[1];
745 #else
746 env->fpr[i] = vsr[1];
747 if (vsx) {
748 env->vsr[i] = vsr[0];
750 #endif
755 if (env->insns_flags & PPC_ALTIVEC) {
756 reg.id = KVM_REG_PPC_VSCR;
757 reg.addr = (uintptr_t)&env->vscr;
758 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
759 if (ret < 0) {
760 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
761 return ret;
764 for (i = 0; i < 32; i++) {
765 reg.id = KVM_REG_PPC_VR(i);
766 reg.addr = (uintptr_t)&env->avr[i];
767 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
768 if (ret < 0) {
769 DPRINTF("Unable to get VR%d from KVM: %s\n",
770 i, strerror(errno));
771 return ret;
776 return 0;
779 #if defined(TARGET_PPC64)
780 static int kvm_get_vpa(CPUState *cs)
782 PowerPCCPU *cpu = POWERPC_CPU(cs);
783 CPUPPCState *env = &cpu->env;
784 struct kvm_one_reg reg;
785 int ret;
787 reg.id = KVM_REG_PPC_VPA_ADDR;
788 reg.addr = (uintptr_t)&env->vpa_addr;
789 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
790 if (ret < 0) {
791 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
792 return ret;
795 assert((uintptr_t)&env->slb_shadow_size
796 == ((uintptr_t)&env->slb_shadow_addr + 8));
797 reg.id = KVM_REG_PPC_VPA_SLB;
798 reg.addr = (uintptr_t)&env->slb_shadow_addr;
799 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
800 if (ret < 0) {
801 DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
802 strerror(errno));
803 return ret;
806 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
807 reg.id = KVM_REG_PPC_VPA_DTL;
808 reg.addr = (uintptr_t)&env->dtl_addr;
809 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
810 if (ret < 0) {
811 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
812 strerror(errno));
813 return ret;
816 return 0;
819 static int kvm_put_vpa(CPUState *cs)
821 PowerPCCPU *cpu = POWERPC_CPU(cs);
822 CPUPPCState *env = &cpu->env;
823 struct kvm_one_reg reg;
824 int ret;
826 /* SLB shadow or DTL can't be registered unless a master VPA is
827 * registered. That means when restoring state, if a VPA *is*
828 * registered, we need to set that up first. If not, we need to
829 * deregister the others before deregistering the master VPA */
830 assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
832 if (env->vpa_addr) {
833 reg.id = KVM_REG_PPC_VPA_ADDR;
834 reg.addr = (uintptr_t)&env->vpa_addr;
835 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
836 if (ret < 0) {
837 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
838 return ret;
842 assert((uintptr_t)&env->slb_shadow_size
843 == ((uintptr_t)&env->slb_shadow_addr + 8));
844 reg.id = KVM_REG_PPC_VPA_SLB;
845 reg.addr = (uintptr_t)&env->slb_shadow_addr;
846 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
847 if (ret < 0) {
848 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
849 return ret;
852 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
853 reg.id = KVM_REG_PPC_VPA_DTL;
854 reg.addr = (uintptr_t)&env->dtl_addr;
855 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
856 if (ret < 0) {
857 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
858 strerror(errno));
859 return ret;
862 if (!env->vpa_addr) {
863 reg.id = KVM_REG_PPC_VPA_ADDR;
864 reg.addr = (uintptr_t)&env->vpa_addr;
865 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
866 if (ret < 0) {
867 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
868 return ret;
872 return 0;
874 #endif /* TARGET_PPC64 */
876 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
878 CPUPPCState *env = &cpu->env;
879 struct kvm_sregs sregs;
880 int i;
882 sregs.pvr = env->spr[SPR_PVR];
884 sregs.u.s.sdr1 = env->spr[SPR_SDR1];
886 /* Sync SLB */
887 #ifdef TARGET_PPC64
888 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
889 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
890 if (env->slb[i].esid & SLB_ESID_V) {
891 sregs.u.s.ppc64.slb[i].slbe |= i;
893 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
895 #endif
897 /* Sync SRs */
898 for (i = 0; i < 16; i++) {
899 sregs.u.s.ppc32.sr[i] = env->sr[i];
902 /* Sync BATs */
903 for (i = 0; i < 8; i++) {
904 /* Beware. We have to swap upper and lower bits here */
905 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
906 | env->DBAT[1][i];
907 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
908 | env->IBAT[1][i];
911 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
914 int kvm_arch_put_registers(CPUState *cs, int level)
916 PowerPCCPU *cpu = POWERPC_CPU(cs);
917 CPUPPCState *env = &cpu->env;
918 struct kvm_regs regs;
919 int ret;
920 int i;
922 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
923 if (ret < 0) {
924 return ret;
927 regs.ctr = env->ctr;
928 regs.lr = env->lr;
929 regs.xer = cpu_read_xer(env);
930 regs.msr = env->msr;
931 regs.pc = env->nip;
933 regs.srr0 = env->spr[SPR_SRR0];
934 regs.srr1 = env->spr[SPR_SRR1];
936 regs.sprg0 = env->spr[SPR_SPRG0];
937 regs.sprg1 = env->spr[SPR_SPRG1];
938 regs.sprg2 = env->spr[SPR_SPRG2];
939 regs.sprg3 = env->spr[SPR_SPRG3];
940 regs.sprg4 = env->spr[SPR_SPRG4];
941 regs.sprg5 = env->spr[SPR_SPRG5];
942 regs.sprg6 = env->spr[SPR_SPRG6];
943 regs.sprg7 = env->spr[SPR_SPRG7];
945 regs.pid = env->spr[SPR_BOOKE_PID];
947 for (i = 0;i < 32; i++)
948 regs.gpr[i] = env->gpr[i];
950 regs.cr = 0;
951 for (i = 0; i < 8; i++) {
952 regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
955 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
956 if (ret < 0)
957 return ret;
959 kvm_put_fp(cs);
961 if (env->tlb_dirty) {
962 kvm_sw_tlb_put(cpu);
963 env->tlb_dirty = false;
966 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
967 ret = kvmppc_put_books_sregs(cpu);
968 if (ret < 0) {
969 return ret;
973 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
974 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
977 if (cap_one_reg) {
978 int i;
980 /* We deliberately ignore errors here, for kernels which have
981 * the ONE_REG calls, but don't support the specific
982 * registers, there's a reasonable chance things will still
983 * work, at least until we try to migrate. */
984 for (i = 0; i < 1024; i++) {
985 uint64_t id = env->spr_cb[i].one_reg_id;
987 if (id != 0) {
988 kvm_put_one_spr(cs, id, i);
992 #ifdef TARGET_PPC64
993 if (msr_ts) {
994 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
995 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
997 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
998 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1000 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1001 kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1002 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1003 kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1004 kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1005 kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1006 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1007 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1008 kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1009 kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1012 if (cap_papr) {
1013 if (kvm_put_vpa(cs) < 0) {
1014 DPRINTF("Warning: Unable to set VPA information to KVM\n");
1018 kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1019 #endif /* TARGET_PPC64 */
1022 return ret;
1025 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1027 env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1030 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1032 CPUPPCState *env = &cpu->env;
1033 struct kvm_sregs sregs;
1034 int ret;
1036 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1037 if (ret < 0) {
1038 return ret;
1041 if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1042 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1043 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1044 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1045 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1046 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1047 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1048 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1049 env->spr[SPR_DECR] = sregs.u.e.dec;
1050 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1051 env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1052 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1055 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1056 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1057 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1058 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1059 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1060 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1063 if (sregs.u.e.features & KVM_SREGS_E_64) {
1064 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1067 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1068 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1071 if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1072 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1073 kvm_sync_excp(env, POWERPC_EXCP_CRITICAL, SPR_BOOKE_IVOR0);
1074 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1075 kvm_sync_excp(env, POWERPC_EXCP_MCHECK, SPR_BOOKE_IVOR1);
1076 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1077 kvm_sync_excp(env, POWERPC_EXCP_DSI, SPR_BOOKE_IVOR2);
1078 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1079 kvm_sync_excp(env, POWERPC_EXCP_ISI, SPR_BOOKE_IVOR3);
1080 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1081 kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL, SPR_BOOKE_IVOR4);
1082 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1083 kvm_sync_excp(env, POWERPC_EXCP_ALIGN, SPR_BOOKE_IVOR5);
1084 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1085 kvm_sync_excp(env, POWERPC_EXCP_PROGRAM, SPR_BOOKE_IVOR6);
1086 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1087 kvm_sync_excp(env, POWERPC_EXCP_FPU, SPR_BOOKE_IVOR7);
1088 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1089 kvm_sync_excp(env, POWERPC_EXCP_SYSCALL, SPR_BOOKE_IVOR8);
1090 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1091 kvm_sync_excp(env, POWERPC_EXCP_APU, SPR_BOOKE_IVOR9);
1092 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1093 kvm_sync_excp(env, POWERPC_EXCP_DECR, SPR_BOOKE_IVOR10);
1094 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1095 kvm_sync_excp(env, POWERPC_EXCP_FIT, SPR_BOOKE_IVOR11);
1096 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1097 kvm_sync_excp(env, POWERPC_EXCP_WDT, SPR_BOOKE_IVOR12);
1098 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1099 kvm_sync_excp(env, POWERPC_EXCP_DTLB, SPR_BOOKE_IVOR13);
1100 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1101 kvm_sync_excp(env, POWERPC_EXCP_ITLB, SPR_BOOKE_IVOR14);
1102 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1103 kvm_sync_excp(env, POWERPC_EXCP_DEBUG, SPR_BOOKE_IVOR15);
1105 if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1106 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1107 kvm_sync_excp(env, POWERPC_EXCP_SPEU, SPR_BOOKE_IVOR32);
1108 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1109 kvm_sync_excp(env, POWERPC_EXCP_EFPDI, SPR_BOOKE_IVOR33);
1110 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1111 kvm_sync_excp(env, POWERPC_EXCP_EFPRI, SPR_BOOKE_IVOR34);
1114 if (sregs.u.e.features & KVM_SREGS_E_PM) {
1115 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1116 kvm_sync_excp(env, POWERPC_EXCP_EPERFM, SPR_BOOKE_IVOR35);
1119 if (sregs.u.e.features & KVM_SREGS_E_PC) {
1120 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1121 kvm_sync_excp(env, POWERPC_EXCP_DOORI, SPR_BOOKE_IVOR36);
1122 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1123 kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1127 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1128 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1129 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1130 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1131 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1132 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1133 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1134 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1135 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1136 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1137 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1140 if (sregs.u.e.features & KVM_SREGS_EXP) {
1141 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1144 if (sregs.u.e.features & KVM_SREGS_E_PD) {
1145 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1146 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1149 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1150 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1151 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1152 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1154 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1155 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1156 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1160 return 0;
1163 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1165 CPUPPCState *env = &cpu->env;
1166 struct kvm_sregs sregs;
1167 int ret;
1168 int i;
1170 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1171 if (ret < 0) {
1172 return ret;
1175 if (!env->external_htab) {
1176 ppc_store_sdr1(env, sregs.u.s.sdr1);
1179 /* Sync SLB */
1180 #ifdef TARGET_PPC64
1182 * The packed SLB array we get from KVM_GET_SREGS only contains
1183 * information about valid entries. So we flush our internal copy
1184 * to get rid of stale ones, then put all valid SLB entries back
1185 * in.
1187 memset(env->slb, 0, sizeof(env->slb));
1188 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1189 target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1190 target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1192 * Only restore valid entries
1194 if (rb & SLB_ESID_V) {
1195 ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1198 #endif
1200 /* Sync SRs */
1201 for (i = 0; i < 16; i++) {
1202 env->sr[i] = sregs.u.s.ppc32.sr[i];
1205 /* Sync BATs */
1206 for (i = 0; i < 8; i++) {
1207 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1208 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1209 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1210 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1213 return 0;
1216 int kvm_arch_get_registers(CPUState *cs)
1218 PowerPCCPU *cpu = POWERPC_CPU(cs);
1219 CPUPPCState *env = &cpu->env;
1220 struct kvm_regs regs;
1221 uint32_t cr;
1222 int i, ret;
1224 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1225 if (ret < 0)
1226 return ret;
1228 cr = regs.cr;
1229 for (i = 7; i >= 0; i--) {
1230 env->crf[i] = cr & 15;
1231 cr >>= 4;
1234 env->ctr = regs.ctr;
1235 env->lr = regs.lr;
1236 cpu_write_xer(env, regs.xer);
1237 env->msr = regs.msr;
1238 env->nip = regs.pc;
1240 env->spr[SPR_SRR0] = regs.srr0;
1241 env->spr[SPR_SRR1] = regs.srr1;
1243 env->spr[SPR_SPRG0] = regs.sprg0;
1244 env->spr[SPR_SPRG1] = regs.sprg1;
1245 env->spr[SPR_SPRG2] = regs.sprg2;
1246 env->spr[SPR_SPRG3] = regs.sprg3;
1247 env->spr[SPR_SPRG4] = regs.sprg4;
1248 env->spr[SPR_SPRG5] = regs.sprg5;
1249 env->spr[SPR_SPRG6] = regs.sprg6;
1250 env->spr[SPR_SPRG7] = regs.sprg7;
1252 env->spr[SPR_BOOKE_PID] = regs.pid;
1254 for (i = 0;i < 32; i++)
1255 env->gpr[i] = regs.gpr[i];
1257 kvm_get_fp(cs);
1259 if (cap_booke_sregs) {
1260 ret = kvmppc_get_booke_sregs(cpu);
1261 if (ret < 0) {
1262 return ret;
1266 if (cap_segstate) {
1267 ret = kvmppc_get_books_sregs(cpu);
1268 if (ret < 0) {
1269 return ret;
1273 if (cap_hior) {
1274 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1277 if (cap_one_reg) {
1278 int i;
1280 /* We deliberately ignore errors here, for kernels which have
1281 * the ONE_REG calls, but don't support the specific
1282 * registers, there's a reasonable chance things will still
1283 * work, at least until we try to migrate. */
1284 for (i = 0; i < 1024; i++) {
1285 uint64_t id = env->spr_cb[i].one_reg_id;
1287 if (id != 0) {
1288 kvm_get_one_spr(cs, id, i);
1292 #ifdef TARGET_PPC64
1293 if (msr_ts) {
1294 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1295 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1297 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1298 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1300 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1301 kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1302 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1303 kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1304 kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1305 kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1306 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1307 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1308 kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1309 kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1312 if (cap_papr) {
1313 if (kvm_get_vpa(cs) < 0) {
1314 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1318 kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1319 #endif
1322 return 0;
1325 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1327 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1329 if (irq != PPC_INTERRUPT_EXT) {
1330 return 0;
1333 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1334 return 0;
1337 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1339 return 0;
1342 #if defined(TARGET_PPCEMB)
1343 #define PPC_INPUT_INT PPC40x_INPUT_INT
1344 #elif defined(TARGET_PPC64)
1345 #define PPC_INPUT_INT PPC970_INPUT_INT
1346 #else
1347 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1348 #endif
1350 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1352 PowerPCCPU *cpu = POWERPC_CPU(cs);
1353 CPUPPCState *env = &cpu->env;
1354 int r;
1355 unsigned irq;
1357 qemu_mutex_lock_iothread();
1359 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1360 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1361 if (!cap_interrupt_level &&
1362 run->ready_for_interrupt_injection &&
1363 (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1364 (env->irq_input_state & (1<<PPC_INPUT_INT)))
1366 /* For now KVM disregards the 'irq' argument. However, in the
1367 * future KVM could cache it in-kernel to avoid a heavyweight exit
1368 * when reading the UIC.
1370 irq = KVM_INTERRUPT_SET;
1372 DPRINTF("injected interrupt %d\n", irq);
1373 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1374 if (r < 0) {
1375 printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1378 /* Always wake up soon in case the interrupt was level based */
1379 timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1380 (get_ticks_per_sec() / 50));
1383 /* We don't know if there are more interrupts pending after this. However,
1384 * the guest will return to userspace in the course of handling this one
1385 * anyways, so we will get a chance to deliver the rest. */
1387 qemu_mutex_unlock_iothread();
1390 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1392 return MEMTXATTRS_UNSPECIFIED;
1395 int kvm_arch_process_async_events(CPUState *cs)
1397 return cs->halted;
1400 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1402 CPUState *cs = CPU(cpu);
1403 CPUPPCState *env = &cpu->env;
1405 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1406 cs->halted = 1;
1407 cs->exception_index = EXCP_HLT;
1410 return 0;
1413 /* map dcr access to existing qemu dcr emulation */
1414 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1416 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1417 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1419 return 0;
1422 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1424 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1425 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1427 return 0;
1430 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1432 /* Mixed endian case is not handled */
1433 uint32_t sc = debug_inst_opcode;
1435 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1436 sizeof(sc), 0) ||
1437 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1438 return -EINVAL;
1441 return 0;
1444 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1446 uint32_t sc;
1448 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1449 sc != debug_inst_opcode ||
1450 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1451 sizeof(sc), 1)) {
1452 return -EINVAL;
1455 return 0;
1458 static int find_hw_breakpoint(target_ulong addr, int type)
1460 int n;
1462 assert((nb_hw_breakpoint + nb_hw_watchpoint)
1463 <= ARRAY_SIZE(hw_debug_points));
1465 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1466 if (hw_debug_points[n].addr == addr &&
1467 hw_debug_points[n].type == type) {
1468 return n;
1472 return -1;
1475 static int find_hw_watchpoint(target_ulong addr, int *flag)
1477 int n;
1479 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1480 if (n >= 0) {
1481 *flag = BP_MEM_ACCESS;
1482 return n;
1485 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1486 if (n >= 0) {
1487 *flag = BP_MEM_WRITE;
1488 return n;
1491 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1492 if (n >= 0) {
1493 *flag = BP_MEM_READ;
1494 return n;
1497 return -1;
1500 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1501 target_ulong len, int type)
1503 if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1504 return -ENOBUFS;
1507 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1508 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1510 switch (type) {
1511 case GDB_BREAKPOINT_HW:
1512 if (nb_hw_breakpoint >= max_hw_breakpoint) {
1513 return -ENOBUFS;
1516 if (find_hw_breakpoint(addr, type) >= 0) {
1517 return -EEXIST;
1520 nb_hw_breakpoint++;
1521 break;
1523 case GDB_WATCHPOINT_WRITE:
1524 case GDB_WATCHPOINT_READ:
1525 case GDB_WATCHPOINT_ACCESS:
1526 if (nb_hw_watchpoint >= max_hw_watchpoint) {
1527 return -ENOBUFS;
1530 if (find_hw_breakpoint(addr, type) >= 0) {
1531 return -EEXIST;
1534 nb_hw_watchpoint++;
1535 break;
1537 default:
1538 return -ENOSYS;
1541 return 0;
1544 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1545 target_ulong len, int type)
1547 int n;
1549 n = find_hw_breakpoint(addr, type);
1550 if (n < 0) {
1551 return -ENOENT;
1554 switch (type) {
1555 case GDB_BREAKPOINT_HW:
1556 nb_hw_breakpoint--;
1557 break;
1559 case GDB_WATCHPOINT_WRITE:
1560 case GDB_WATCHPOINT_READ:
1561 case GDB_WATCHPOINT_ACCESS:
1562 nb_hw_watchpoint--;
1563 break;
1565 default:
1566 return -ENOSYS;
1568 hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1570 return 0;
1573 void kvm_arch_remove_all_hw_breakpoints(void)
1575 nb_hw_breakpoint = nb_hw_watchpoint = 0;
1578 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1580 int n;
1582 /* Software Breakpoint updates */
1583 if (kvm_sw_breakpoints_active(cs)) {
1584 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1587 assert((nb_hw_breakpoint + nb_hw_watchpoint)
1588 <= ARRAY_SIZE(hw_debug_points));
1589 assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1591 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1592 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1593 memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1594 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1595 switch (hw_debug_points[n].type) {
1596 case GDB_BREAKPOINT_HW:
1597 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1598 break;
1599 case GDB_WATCHPOINT_WRITE:
1600 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1601 break;
1602 case GDB_WATCHPOINT_READ:
1603 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1604 break;
1605 case GDB_WATCHPOINT_ACCESS:
1606 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1607 KVMPPC_DEBUG_WATCH_READ;
1608 break;
1609 default:
1610 cpu_abort(cs, "Unsupported breakpoint type\n");
1612 dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1617 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1619 CPUState *cs = CPU(cpu);
1620 CPUPPCState *env = &cpu->env;
1621 struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1622 int handle = 0;
1623 int n;
1624 int flag = 0;
1626 if (cs->singlestep_enabled) {
1627 handle = 1;
1628 } else if (arch_info->status) {
1629 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1630 if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1631 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1632 if (n >= 0) {
1633 handle = 1;
1635 } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1636 KVMPPC_DEBUG_WATCH_WRITE)) {
1637 n = find_hw_watchpoint(arch_info->address, &flag);
1638 if (n >= 0) {
1639 handle = 1;
1640 cs->watchpoint_hit = &hw_watchpoint;
1641 hw_watchpoint.vaddr = hw_debug_points[n].addr;
1642 hw_watchpoint.flags = flag;
1646 } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1647 handle = 1;
1648 } else {
1649 /* QEMU is not able to handle debug exception, so inject
1650 * program exception to guest;
1651 * Yes program exception NOT debug exception !!
1652 * When QEMU is using debug resources then debug exception must
1653 * be always set. To achieve this we set MSR_DE and also set
1654 * MSRP_DEP so guest cannot change MSR_DE.
1655 * When emulating debug resource for guest we want guest
1656 * to control MSR_DE (enable/disable debug interrupt on need).
1657 * Supporting both configurations are NOT possible.
1658 * So the result is that we cannot share debug resources
1659 * between QEMU and Guest on BOOKE architecture.
1660 * In the current design QEMU gets the priority over guest,
1661 * this means that if QEMU is using debug resources then guest
1662 * cannot use them;
1663 * For software breakpoint QEMU uses a privileged instruction;
1664 * So there cannot be any reason that we are here for guest
1665 * set debug exception, only possibility is guest executed a
1666 * privileged / illegal instruction and that's why we are
1667 * injecting a program interrupt.
1670 cpu_synchronize_state(cs);
1671 /* env->nip is PC, so increment this by 4 to use
1672 * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1674 env->nip += 4;
1675 cs->exception_index = POWERPC_EXCP_PROGRAM;
1676 env->error_code = POWERPC_EXCP_INVAL;
1677 ppc_cpu_do_interrupt(cs);
1680 return handle;
1683 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1685 PowerPCCPU *cpu = POWERPC_CPU(cs);
1686 CPUPPCState *env = &cpu->env;
1687 int ret;
1689 qemu_mutex_lock_iothread();
1691 switch (run->exit_reason) {
1692 case KVM_EXIT_DCR:
1693 if (run->dcr.is_write) {
1694 DPRINTF("handle dcr write\n");
1695 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1696 } else {
1697 DPRINTF("handle dcr read\n");
1698 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1700 break;
1701 case KVM_EXIT_HLT:
1702 DPRINTF("handle halt\n");
1703 ret = kvmppc_handle_halt(cpu);
1704 break;
1705 #if defined(TARGET_PPC64)
1706 case KVM_EXIT_PAPR_HCALL:
1707 DPRINTF("handle PAPR hypercall\n");
1708 run->papr_hcall.ret = spapr_hypercall(cpu,
1709 run->papr_hcall.nr,
1710 run->papr_hcall.args);
1711 ret = 0;
1712 break;
1713 #endif
1714 case KVM_EXIT_EPR:
1715 DPRINTF("handle epr\n");
1716 run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1717 ret = 0;
1718 break;
1719 case KVM_EXIT_WATCHDOG:
1720 DPRINTF("handle watchdog expiry\n");
1721 watchdog_perform_action();
1722 ret = 0;
1723 break;
1725 case KVM_EXIT_DEBUG:
1726 DPRINTF("handle debug exception\n");
1727 if (kvm_handle_debug(cpu, run)) {
1728 ret = EXCP_DEBUG;
1729 break;
1731 /* re-enter, this exception was guest-internal */
1732 ret = 0;
1733 break;
1735 default:
1736 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1737 ret = -1;
1738 break;
1741 qemu_mutex_unlock_iothread();
1742 return ret;
1745 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1747 CPUState *cs = CPU(cpu);
1748 uint32_t bits = tsr_bits;
1749 struct kvm_one_reg reg = {
1750 .id = KVM_REG_PPC_OR_TSR,
1751 .addr = (uintptr_t) &bits,
1754 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1757 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1760 CPUState *cs = CPU(cpu);
1761 uint32_t bits = tsr_bits;
1762 struct kvm_one_reg reg = {
1763 .id = KVM_REG_PPC_CLEAR_TSR,
1764 .addr = (uintptr_t) &bits,
1767 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1770 int kvmppc_set_tcr(PowerPCCPU *cpu)
1772 CPUState *cs = CPU(cpu);
1773 CPUPPCState *env = &cpu->env;
1774 uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1776 struct kvm_one_reg reg = {
1777 .id = KVM_REG_PPC_TCR,
1778 .addr = (uintptr_t) &tcr,
1781 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1784 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1786 CPUState *cs = CPU(cpu);
1787 int ret;
1789 if (!kvm_enabled()) {
1790 return -1;
1793 if (!cap_ppc_watchdog) {
1794 printf("warning: KVM does not support watchdog");
1795 return -1;
1798 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1799 if (ret < 0) {
1800 fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1801 __func__, strerror(-ret));
1802 return ret;
1805 return ret;
1808 static int read_cpuinfo(const char *field, char *value, int len)
1810 FILE *f;
1811 int ret = -1;
1812 int field_len = strlen(field);
1813 char line[512];
1815 f = fopen("/proc/cpuinfo", "r");
1816 if (!f) {
1817 return -1;
1820 do {
1821 if (!fgets(line, sizeof(line), f)) {
1822 break;
1824 if (!strncmp(line, field, field_len)) {
1825 pstrcpy(value, len, line);
1826 ret = 0;
1827 break;
1829 } while(*line);
1831 fclose(f);
1833 return ret;
1836 uint32_t kvmppc_get_tbfreq(void)
1838 char line[512];
1839 char *ns;
1840 uint32_t retval = get_ticks_per_sec();
1842 if (read_cpuinfo("timebase", line, sizeof(line))) {
1843 return retval;
1846 if (!(ns = strchr(line, ':'))) {
1847 return retval;
1850 ns++;
1852 return atoi(ns);
1855 bool kvmppc_get_host_serial(char **value)
1857 return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1858 NULL);
1861 bool kvmppc_get_host_model(char **value)
1863 return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1866 /* Try to find a device tree node for a CPU with clock-frequency property */
1867 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1869 struct dirent *dirp;
1870 DIR *dp;
1872 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1873 printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1874 return -1;
1877 buf[0] = '\0';
1878 while ((dirp = readdir(dp)) != NULL) {
1879 FILE *f;
1880 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1881 dirp->d_name);
1882 f = fopen(buf, "r");
1883 if (f) {
1884 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1885 fclose(f);
1886 break;
1888 buf[0] = '\0';
1890 closedir(dp);
1891 if (buf[0] == '\0') {
1892 printf("Unknown host!\n");
1893 return -1;
1896 return 0;
1899 static uint64_t kvmppc_read_int_dt(const char *filename)
1901 union {
1902 uint32_t v32;
1903 uint64_t v64;
1904 } u;
1905 FILE *f;
1906 int len;
1908 f = fopen(filename, "rb");
1909 if (!f) {
1910 return -1;
1913 len = fread(&u, 1, sizeof(u), f);
1914 fclose(f);
1915 switch (len) {
1916 case 4:
1917 /* property is a 32-bit quantity */
1918 return be32_to_cpu(u.v32);
1919 case 8:
1920 return be64_to_cpu(u.v64);
1923 return 0;
1926 /* Read a CPU node property from the host device tree that's a single
1927 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1928 * (can't find or open the property, or doesn't understand the
1929 * format) */
1930 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1932 char buf[PATH_MAX], *tmp;
1933 uint64_t val;
1935 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1936 return -1;
1939 tmp = g_strdup_printf("%s/%s", buf, propname);
1940 val = kvmppc_read_int_dt(tmp);
1941 g_free(tmp);
1943 return val;
1946 uint64_t kvmppc_get_clockfreq(void)
1948 return kvmppc_read_int_cpu_dt("clock-frequency");
1951 uint32_t kvmppc_get_vmx(void)
1953 return kvmppc_read_int_cpu_dt("ibm,vmx");
1956 uint32_t kvmppc_get_dfp(void)
1958 return kvmppc_read_int_cpu_dt("ibm,dfp");
1961 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1963 PowerPCCPU *cpu = ppc_env_get_cpu(env);
1964 CPUState *cs = CPU(cpu);
1966 if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1967 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1968 return 0;
1971 return 1;
1974 int kvmppc_get_hasidle(CPUPPCState *env)
1976 struct kvm_ppc_pvinfo pvinfo;
1978 if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1979 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1980 return 1;
1983 return 0;
1986 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1988 uint32_t *hc = (uint32_t*)buf;
1989 struct kvm_ppc_pvinfo pvinfo;
1991 if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1992 memcpy(buf, pvinfo.hcall, buf_len);
1993 return 0;
1997 * Fallback to always fail hypercalls regardless of endianness:
1999 * tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
2000 * li r3, -1
2001 * b .+8 (becomes nop in wrong endian)
2002 * bswap32(li r3, -1)
2005 hc[0] = cpu_to_be32(0x08000048);
2006 hc[1] = cpu_to_be32(0x3860ffff);
2007 hc[2] = cpu_to_be32(0x48000008);
2008 hc[3] = cpu_to_be32(bswap32(0x3860ffff));
2010 return 0;
2013 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
2015 return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2018 void kvmppc_enable_logical_ci_hcalls(void)
2021 * FIXME: it would be nice if we could detect the cases where
2022 * we're using a device which requires the in kernel
2023 * implementation of these hcalls, but the kernel lacks them and
2024 * produce a warning.
2026 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2027 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2030 void kvmppc_enable_set_mode_hcall(void)
2032 kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2035 void kvmppc_set_papr(PowerPCCPU *cpu)
2037 CPUState *cs = CPU(cpu);
2038 int ret;
2040 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2041 if (ret) {
2042 error_report("This vCPU type or KVM version does not support PAPR");
2043 exit(1);
2046 /* Update the capability flag so we sync the right information
2047 * with kvm */
2048 cap_papr = 1;
2051 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t cpu_version)
2053 return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &cpu_version);
2056 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2058 CPUState *cs = CPU(cpu);
2059 int ret;
2061 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2062 if (ret && mpic_proxy) {
2063 error_report("This KVM version does not support EPR");
2064 exit(1);
2068 int kvmppc_smt_threads(void)
2070 return cap_ppc_smt ? cap_ppc_smt : 1;
2073 #ifdef TARGET_PPC64
2074 off_t kvmppc_alloc_rma(void **rma)
2076 off_t size;
2077 int fd;
2078 struct kvm_allocate_rma ret;
2080 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2081 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2082 * not necessary on this hardware
2083 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2085 * FIXME: We should allow the user to force contiguous RMA
2086 * allocation in the cap_ppc_rma==1 case.
2088 if (cap_ppc_rma < 2) {
2089 return 0;
2092 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
2093 if (fd < 0) {
2094 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2095 strerror(errno));
2096 return -1;
2099 size = MIN(ret.rma_size, 256ul << 20);
2101 *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2102 if (*rma == MAP_FAILED) {
2103 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
2104 return -1;
2107 return size;
2110 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2112 struct kvm_ppc_smmu_info info;
2113 long rampagesize, best_page_shift;
2114 int i;
2116 if (cap_ppc_rma >= 2) {
2117 return current_size;
2120 /* Find the largest hardware supported page size that's less than
2121 * or equal to the (logical) backing page size of guest RAM */
2122 kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2123 rampagesize = getrampagesize();
2124 best_page_shift = 0;
2126 for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2127 struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2129 if (!sps->page_shift) {
2130 continue;
2133 if ((sps->page_shift > best_page_shift)
2134 && ((1UL << sps->page_shift) <= rampagesize)) {
2135 best_page_shift = sps->page_shift;
2139 return MIN(current_size,
2140 1ULL << (best_page_shift + hash_shift - 7));
2142 #endif
2144 bool kvmppc_spapr_use_multitce(void)
2146 return cap_spapr_multitce;
2149 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
2150 bool need_vfio)
2152 struct kvm_create_spapr_tce args = {
2153 .liobn = liobn,
2154 .window_size = window_size,
2156 long len;
2157 int fd;
2158 void *table;
2160 /* Must set fd to -1 so we don't try to munmap when called for
2161 * destroying the table, which the upper layers -will- do
2163 *pfd = -1;
2164 if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2165 return NULL;
2168 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2169 if (fd < 0) {
2170 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2171 liobn);
2172 return NULL;
2175 len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
2176 /* FIXME: round this up to page size */
2178 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2179 if (table == MAP_FAILED) {
2180 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2181 liobn);
2182 close(fd);
2183 return NULL;
2186 *pfd = fd;
2187 return table;
2190 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2192 long len;
2194 if (fd < 0) {
2195 return -1;
2198 len = nb_table * sizeof(uint64_t);
2199 if ((munmap(table, len) < 0) ||
2200 (close(fd) < 0)) {
2201 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2202 strerror(errno));
2203 /* Leak the table */
2206 return 0;
2209 int kvmppc_reset_htab(int shift_hint)
2211 uint32_t shift = shift_hint;
2213 if (!kvm_enabled()) {
2214 /* Full emulation, tell caller to allocate htab itself */
2215 return 0;
2217 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2218 int ret;
2219 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2220 if (ret == -ENOTTY) {
2221 /* At least some versions of PR KVM advertise the
2222 * capability, but don't implement the ioctl(). Oops.
2223 * Return 0 so that we allocate the htab in qemu, as is
2224 * correct for PR. */
2225 return 0;
2226 } else if (ret < 0) {
2227 return ret;
2229 return shift;
2232 /* We have a kernel that predates the htab reset calls. For PR
2233 * KVM, we need to allocate the htab ourselves, for an HV KVM of
2234 * this era, it has allocated a 16MB fixed size hash table
2235 * already. Kernels of this era have the GET_PVINFO capability
2236 * only on PR, so we use this hack to determine the right
2237 * answer */
2238 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
2239 /* PR - tell caller to allocate htab */
2240 return 0;
2241 } else {
2242 /* HV - assume 16MB kernel allocated htab */
2243 return 24;
2247 static inline uint32_t mfpvr(void)
2249 uint32_t pvr;
2251 asm ("mfpvr %0"
2252 : "=r"(pvr));
2253 return pvr;
2256 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2258 if (on) {
2259 *word |= flags;
2260 } else {
2261 *word &= ~flags;
2265 static void kvmppc_host_cpu_initfn(Object *obj)
2267 assert(kvm_enabled());
2270 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2272 DeviceClass *dc = DEVICE_CLASS(oc);
2273 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2274 uint32_t vmx = kvmppc_get_vmx();
2275 uint32_t dfp = kvmppc_get_dfp();
2276 uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2277 uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2279 /* Now fix up the class with information we can query from the host */
2280 pcc->pvr = mfpvr();
2282 if (vmx != -1) {
2283 /* Only override when we know what the host supports */
2284 alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
2285 alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
2287 if (dfp != -1) {
2288 /* Only override when we know what the host supports */
2289 alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
2292 if (dcache_size != -1) {
2293 pcc->l1_dcache_size = dcache_size;
2296 if (icache_size != -1) {
2297 pcc->l1_icache_size = icache_size;
2300 /* Reason: kvmppc_host_cpu_initfn() dies when !kvm_enabled() */
2301 dc->cannot_destroy_with_object_finalize_yet = true;
2304 bool kvmppc_has_cap_epr(void)
2306 return cap_epr;
2309 bool kvmppc_has_cap_htab_fd(void)
2311 return cap_htab_fd;
2314 bool kvmppc_has_cap_fixup_hcalls(void)
2316 return cap_fixup_hcalls;
2319 static PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc)
2321 ObjectClass *oc = OBJECT_CLASS(pcc);
2323 while (oc && !object_class_is_abstract(oc)) {
2324 oc = object_class_get_parent(oc);
2326 assert(oc);
2328 return POWERPC_CPU_CLASS(oc);
2331 static int kvm_ppc_register_host_cpu_type(void)
2333 TypeInfo type_info = {
2334 .name = TYPE_HOST_POWERPC_CPU,
2335 .instance_init = kvmppc_host_cpu_initfn,
2336 .class_init = kvmppc_host_cpu_class_init,
2338 uint32_t host_pvr = mfpvr();
2339 PowerPCCPUClass *pvr_pcc;
2340 DeviceClass *dc;
2342 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2343 if (pvr_pcc == NULL) {
2344 pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2346 if (pvr_pcc == NULL) {
2347 return -1;
2349 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2350 type_register(&type_info);
2352 /* Register generic family CPU class for a family */
2353 pvr_pcc = ppc_cpu_get_family_class(pvr_pcc);
2354 dc = DEVICE_CLASS(pvr_pcc);
2355 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2356 type_info.name = g_strdup_printf("%s-"TYPE_POWERPC_CPU, dc->desc);
2357 type_register(&type_info);
2359 return 0;
2362 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2364 struct kvm_rtas_token_args args = {
2365 .token = token,
2368 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2369 return -ENOENT;
2372 strncpy(args.name, function, sizeof(args.name));
2374 return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2377 int kvmppc_get_htab_fd(bool write)
2379 struct kvm_get_htab_fd s = {
2380 .flags = write ? KVM_GET_HTAB_WRITE : 0,
2381 .start_index = 0,
2384 if (!cap_htab_fd) {
2385 fprintf(stderr, "KVM version doesn't support saving the hash table\n");
2386 return -1;
2389 return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2392 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2394 int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2395 uint8_t buf[bufsize];
2396 ssize_t rc;
2398 do {
2399 rc = read(fd, buf, bufsize);
2400 if (rc < 0) {
2401 fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2402 strerror(errno));
2403 return rc;
2404 } else if (rc) {
2405 uint8_t *buffer = buf;
2406 ssize_t n = rc;
2407 while (n) {
2408 struct kvm_get_htab_header *head =
2409 (struct kvm_get_htab_header *) buffer;
2410 size_t chunksize = sizeof(*head) +
2411 HASH_PTE_SIZE_64 * head->n_valid;
2413 qemu_put_be32(f, head->index);
2414 qemu_put_be16(f, head->n_valid);
2415 qemu_put_be16(f, head->n_invalid);
2416 qemu_put_buffer(f, (void *)(head + 1),
2417 HASH_PTE_SIZE_64 * head->n_valid);
2419 buffer += chunksize;
2420 n -= chunksize;
2423 } while ((rc != 0)
2424 && ((max_ns < 0)
2425 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2427 return (rc == 0) ? 1 : 0;
2430 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2431 uint16_t n_valid, uint16_t n_invalid)
2433 struct kvm_get_htab_header *buf;
2434 size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2435 ssize_t rc;
2437 buf = alloca(chunksize);
2438 buf->index = index;
2439 buf->n_valid = n_valid;
2440 buf->n_invalid = n_invalid;
2442 qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2444 rc = write(fd, buf, chunksize);
2445 if (rc < 0) {
2446 fprintf(stderr, "Error writing KVM hash table: %s\n",
2447 strerror(errno));
2448 return rc;
2450 if (rc != chunksize) {
2451 /* We should never get a short write on a single chunk */
2452 fprintf(stderr, "Short write, restoring KVM hash table\n");
2453 return -1;
2455 return 0;
2458 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2460 return true;
2463 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
2465 return 1;
2468 int kvm_arch_on_sigbus(int code, void *addr)
2470 return 1;
2473 void kvm_arch_init_irq_routing(KVMState *s)
2477 struct kvm_get_htab_buf {
2478 struct kvm_get_htab_header header;
2480 * We require one extra byte for read
2482 target_ulong hpte[(HPTES_PER_GROUP * 2) + 1];
2485 uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index)
2487 int htab_fd;
2488 struct kvm_get_htab_fd ghf;
2489 struct kvm_get_htab_buf *hpte_buf;
2491 ghf.flags = 0;
2492 ghf.start_index = pte_index;
2493 htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2494 if (htab_fd < 0) {
2495 goto error_out;
2498 hpte_buf = g_malloc0(sizeof(*hpte_buf));
2500 * Read the hpte group
2502 if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) {
2503 goto out_close;
2506 close(htab_fd);
2507 return (uint64_t)(uintptr_t) hpte_buf->hpte;
2509 out_close:
2510 g_free(hpte_buf);
2511 close(htab_fd);
2512 error_out:
2513 return 0;
2516 void kvmppc_hash64_free_pteg(uint64_t token)
2518 struct kvm_get_htab_buf *htab_buf;
2520 htab_buf = container_of((void *)(uintptr_t) token, struct kvm_get_htab_buf,
2521 hpte);
2522 g_free(htab_buf);
2523 return;
2526 void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
2527 target_ulong pte0, target_ulong pte1)
2529 int htab_fd;
2530 struct kvm_get_htab_fd ghf;
2531 struct kvm_get_htab_buf hpte_buf;
2533 ghf.flags = 0;
2534 ghf.start_index = 0; /* Ignored */
2535 htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2536 if (htab_fd < 0) {
2537 goto error_out;
2540 hpte_buf.header.n_valid = 1;
2541 hpte_buf.header.n_invalid = 0;
2542 hpte_buf.header.index = pte_index;
2543 hpte_buf.hpte[0] = pte0;
2544 hpte_buf.hpte[1] = pte1;
2546 * Write the hpte entry.
2547 * CAUTION: write() has the warn_unused_result attribute. Hence we
2548 * need to check the return value, even though we do nothing.
2550 if (write(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) {
2551 goto out_close;
2554 out_close:
2555 close(htab_fd);
2556 return;
2558 error_out:
2559 return;
2562 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2563 uint64_t address, uint32_t data, PCIDevice *dev)
2565 return 0;
2568 int kvm_arch_msi_data_to_gsi(uint32_t data)
2570 return data & 0xffff;
2573 int kvmppc_enable_hwrng(void)
2575 if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2576 return -1;
2579 return kvmppc_enable_hcall(kvm_state, H_RANDOM);