trace: split out trace events for util/ directory
[qemu/ar7.git] / target-ppc / kvm.c
blobe14da60b77a21aed62e53037abc2d156e85d7037
1 /*
2 * PowerPC implementation of KVM hooks
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
7 * Authors:
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
17 #include "qemu/osdep.h"
18 #include <dirent.h>
19 #include <sys/ioctl.h>
20 #include <sys/vfs.h>
22 #include <linux/kvm.h>
24 #include "qemu-common.h"
25 #include "qemu/error-report.h"
26 #include "cpu.h"
27 #include "qemu/timer.h"
28 #include "sysemu/sysemu.h"
29 #include "sysemu/kvm.h"
30 #include "kvm_ppc.h"
31 #include "sysemu/cpus.h"
32 #include "sysemu/device_tree.h"
33 #include "mmu-hash64.h"
35 #include "hw/sysbus.h"
36 #include "hw/ppc/spapr.h"
37 #include "hw/ppc/spapr_vio.h"
38 #include "hw/ppc/ppc.h"
39 #include "sysemu/watchdog.h"
40 #include "trace.h"
41 #include "exec/gdbstub.h"
42 #include "exec/memattrs.h"
43 #include "sysemu/hostmem.h"
44 #include "qemu/cutils.h"
45 #if defined(TARGET_PPC64)
46 #include "hw/ppc/spapr_cpu_core.h"
47 #endif
49 //#define DEBUG_KVM
51 #ifdef DEBUG_KVM
52 #define DPRINTF(fmt, ...) \
53 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
54 #else
55 #define DPRINTF(fmt, ...) \
56 do { } while (0)
57 #endif
59 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
61 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
62 KVM_CAP_LAST_INFO
65 static int cap_interrupt_unset = false;
66 static int cap_interrupt_level = false;
67 static int cap_segstate;
68 static int cap_booke_sregs;
69 static int cap_ppc_smt;
70 static int cap_ppc_rma;
71 static int cap_spapr_tce;
72 static int cap_spapr_multitce;
73 static int cap_spapr_vfio;
74 static int cap_hior;
75 static int cap_one_reg;
76 static int cap_epr;
77 static int cap_ppc_watchdog;
78 static int cap_papr;
79 static int cap_htab_fd;
80 static int cap_fixup_hcalls;
82 static uint32_t debug_inst_opcode;
84 /* XXX We have a race condition where we actually have a level triggered
85 * interrupt, but the infrastructure can't expose that yet, so the guest
86 * takes but ignores it, goes to sleep and never gets notified that there's
87 * still an interrupt pending.
89 * As a quick workaround, let's just wake up again 20 ms after we injected
90 * an interrupt. That way we can assure that we're always reinjecting
91 * interrupts in case the guest swallowed them.
93 static QEMUTimer *idle_timer;
95 static void kvm_kick_cpu(void *opaque)
97 PowerPCCPU *cpu = opaque;
99 qemu_cpu_kick(CPU(cpu));
102 static int kvm_ppc_register_host_cpu_type(void);
104 int kvm_arch_init(MachineState *ms, KVMState *s)
106 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
107 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
108 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
109 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
110 cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
111 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
112 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
113 cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
114 cap_spapr_vfio = false;
115 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
116 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
117 cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
118 cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
119 /* Note: we don't set cap_papr here, because this capability is
120 * only activated after this by kvmppc_set_papr() */
121 cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
122 cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
124 if (!cap_interrupt_level) {
125 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
126 "VM to stall at times!\n");
129 kvm_ppc_register_host_cpu_type();
131 return 0;
134 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
136 CPUPPCState *cenv = &cpu->env;
137 CPUState *cs = CPU(cpu);
138 struct kvm_sregs sregs;
139 int ret;
141 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
142 /* What we're really trying to say is "if we're on BookE, we use
143 the native PVR for now". This is the only sane way to check
144 it though, so we potentially confuse users that they can run
145 BookE guests on BookS. Let's hope nobody dares enough :) */
146 return 0;
147 } else {
148 if (!cap_segstate) {
149 fprintf(stderr, "kvm error: missing PVR setting capability\n");
150 return -ENOSYS;
154 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
155 if (ret) {
156 return ret;
159 sregs.pvr = cenv->spr[SPR_PVR];
160 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
163 /* Set up a shared TLB array with KVM */
164 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
166 CPUPPCState *env = &cpu->env;
167 CPUState *cs = CPU(cpu);
168 struct kvm_book3e_206_tlb_params params = {};
169 struct kvm_config_tlb cfg = {};
170 unsigned int entries = 0;
171 int ret, i;
173 if (!kvm_enabled() ||
174 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
175 return 0;
178 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
180 for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
181 params.tlb_sizes[i] = booke206_tlb_size(env, i);
182 params.tlb_ways[i] = booke206_tlb_ways(env, i);
183 entries += params.tlb_sizes[i];
186 assert(entries == env->nb_tlb);
187 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
189 env->tlb_dirty = true;
191 cfg.array = (uintptr_t)env->tlb.tlbm;
192 cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
193 cfg.params = (uintptr_t)&params;
194 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
196 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
197 if (ret < 0) {
198 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
199 __func__, strerror(-ret));
200 return ret;
203 env->kvm_sw_tlb = true;
204 return 0;
208 #if defined(TARGET_PPC64)
209 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
210 struct kvm_ppc_smmu_info *info)
212 CPUPPCState *env = &cpu->env;
213 CPUState *cs = CPU(cpu);
215 memset(info, 0, sizeof(*info));
217 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
218 * need to "guess" what the supported page sizes are.
220 * For that to work we make a few assumptions:
222 * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
223 * KVM which only supports 4K and 16M pages, but supports them
224 * regardless of the backing store characteritics. We also don't
225 * support 1T segments.
227 * This is safe as if HV KVM ever supports that capability or PR
228 * KVM grows supports for more page/segment sizes, those versions
229 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
230 * will not hit this fallback
232 * - Else we are running HV KVM. This means we only support page
233 * sizes that fit in the backing store. Additionally we only
234 * advertize 64K pages if the processor is ARCH 2.06 and we assume
235 * P7 encodings for the SLB and hash table. Here too, we assume
236 * support for any newer processor will mean a kernel that
237 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
238 * this fallback.
240 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
241 /* No flags */
242 info->flags = 0;
243 info->slb_size = 64;
245 /* Standard 4k base page size segment */
246 info->sps[0].page_shift = 12;
247 info->sps[0].slb_enc = 0;
248 info->sps[0].enc[0].page_shift = 12;
249 info->sps[0].enc[0].pte_enc = 0;
251 /* Standard 16M large page size segment */
252 info->sps[1].page_shift = 24;
253 info->sps[1].slb_enc = SLB_VSID_L;
254 info->sps[1].enc[0].page_shift = 24;
255 info->sps[1].enc[0].pte_enc = 0;
256 } else {
257 int i = 0;
259 /* HV KVM has backing store size restrictions */
260 info->flags = KVM_PPC_PAGE_SIZES_REAL;
262 if (env->mmu_model & POWERPC_MMU_1TSEG) {
263 info->flags |= KVM_PPC_1T_SEGMENTS;
266 if (env->mmu_model == POWERPC_MMU_2_06 ||
267 env->mmu_model == POWERPC_MMU_2_07) {
268 info->slb_size = 32;
269 } else {
270 info->slb_size = 64;
273 /* Standard 4k base page size segment */
274 info->sps[i].page_shift = 12;
275 info->sps[i].slb_enc = 0;
276 info->sps[i].enc[0].page_shift = 12;
277 info->sps[i].enc[0].pte_enc = 0;
278 i++;
280 /* 64K on MMU 2.06 and later */
281 if (env->mmu_model == POWERPC_MMU_2_06 ||
282 env->mmu_model == POWERPC_MMU_2_07) {
283 info->sps[i].page_shift = 16;
284 info->sps[i].slb_enc = 0x110;
285 info->sps[i].enc[0].page_shift = 16;
286 info->sps[i].enc[0].pte_enc = 1;
287 i++;
290 /* Standard 16M large page size segment */
291 info->sps[i].page_shift = 24;
292 info->sps[i].slb_enc = SLB_VSID_L;
293 info->sps[i].enc[0].page_shift = 24;
294 info->sps[i].enc[0].pte_enc = 0;
298 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
300 CPUState *cs = CPU(cpu);
301 int ret;
303 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
304 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
305 if (ret == 0) {
306 return;
310 kvm_get_fallback_smmu_info(cpu, info);
313 static long gethugepagesize(const char *mem_path)
315 struct statfs fs;
316 int ret;
318 do {
319 ret = statfs(mem_path, &fs);
320 } while (ret != 0 && errno == EINTR);
322 if (ret != 0) {
323 fprintf(stderr, "Couldn't statfs() memory path: %s\n",
324 strerror(errno));
325 exit(1);
328 #define HUGETLBFS_MAGIC 0x958458f6
330 if (fs.f_type != HUGETLBFS_MAGIC) {
331 /* Explicit mempath, but it's ordinary pages */
332 return getpagesize();
335 /* It's hugepage, return the huge page size */
336 return fs.f_bsize;
340 * FIXME TOCTTOU: this iterates over memory backends' mem-path, which
341 * may or may not name the same files / on the same filesystem now as
342 * when we actually open and map them. Iterate over the file
343 * descriptors instead, and use qemu_fd_getpagesize().
345 static int find_max_supported_pagesize(Object *obj, void *opaque)
347 char *mem_path;
348 long *hpsize_min = opaque;
350 if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) {
351 mem_path = object_property_get_str(obj, "mem-path", NULL);
352 if (mem_path) {
353 long hpsize = gethugepagesize(mem_path);
354 if (hpsize < *hpsize_min) {
355 *hpsize_min = hpsize;
357 } else {
358 *hpsize_min = getpagesize();
362 return 0;
365 static long getrampagesize(void)
367 long hpsize = LONG_MAX;
368 Object *memdev_root;
370 if (mem_path) {
371 return gethugepagesize(mem_path);
374 /* it's possible we have memory-backend objects with
375 * hugepage-backed RAM. these may get mapped into system
376 * address space via -numa parameters or memory hotplug
377 * hooks. we want to take these into account, but we
378 * also want to make sure these supported hugepage
379 * sizes are applicable across the entire range of memory
380 * we may boot from, so we take the min across all
381 * backends, and assume normal pages in cases where a
382 * backend isn't backed by hugepages.
384 memdev_root = object_resolve_path("/objects", NULL);
385 if (!memdev_root) {
386 return getpagesize();
389 object_child_foreach(memdev_root, find_max_supported_pagesize, &hpsize);
391 return (hpsize == LONG_MAX) ? getpagesize() : hpsize;
394 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
396 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
397 return true;
400 return (1ul << shift) <= rampgsize;
403 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
405 static struct kvm_ppc_smmu_info smmu_info;
406 static bool has_smmu_info;
407 CPUPPCState *env = &cpu->env;
408 long rampagesize;
409 int iq, ik, jq, jk;
411 /* We only handle page sizes for 64-bit server guests for now */
412 if (!(env->mmu_model & POWERPC_MMU_64)) {
413 return;
416 /* Collect MMU info from kernel if not already */
417 if (!has_smmu_info) {
418 kvm_get_smmu_info(cpu, &smmu_info);
419 has_smmu_info = true;
422 rampagesize = getrampagesize();
424 /* Convert to QEMU form */
425 memset(&env->sps, 0, sizeof(env->sps));
427 /* If we have HV KVM, we need to forbid CI large pages if our
428 * host page size is smaller than 64K.
430 if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
431 env->ci_large_pages = getpagesize() >= 0x10000;
435 * XXX This loop should be an entry wide AND of the capabilities that
436 * the selected CPU has with the capabilities that KVM supports.
438 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
439 struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
440 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
442 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
443 ksps->page_shift)) {
444 continue;
446 qsps->page_shift = ksps->page_shift;
447 qsps->slb_enc = ksps->slb_enc;
448 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
449 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
450 ksps->enc[jk].page_shift)) {
451 continue;
453 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
454 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
455 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
456 break;
459 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
460 break;
463 env->slb_nr = smmu_info.slb_size;
464 if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
465 env->mmu_model &= ~POWERPC_MMU_1TSEG;
468 #else /* defined (TARGET_PPC64) */
470 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
474 #endif /* !defined (TARGET_PPC64) */
476 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
478 return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
481 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
482 * book3s supports only 1 watchpoint, so array size
483 * of 4 is sufficient for now.
485 #define MAX_HW_BKPTS 4
487 static struct HWBreakpoint {
488 target_ulong addr;
489 int type;
490 } hw_debug_points[MAX_HW_BKPTS];
492 static CPUWatchpoint hw_watchpoint;
494 /* Default there is no breakpoint and watchpoint supported */
495 static int max_hw_breakpoint;
496 static int max_hw_watchpoint;
497 static int nb_hw_breakpoint;
498 static int nb_hw_watchpoint;
500 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
502 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
503 max_hw_breakpoint = 2;
504 max_hw_watchpoint = 2;
507 if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
508 fprintf(stderr, "Error initializing h/w breakpoints\n");
509 return;
513 int kvm_arch_init_vcpu(CPUState *cs)
515 PowerPCCPU *cpu = POWERPC_CPU(cs);
516 CPUPPCState *cenv = &cpu->env;
517 int ret;
519 /* Gather server mmu info from KVM and update the CPU state */
520 kvm_fixup_page_sizes(cpu);
522 /* Synchronize sregs with kvm */
523 ret = kvm_arch_sync_sregs(cpu);
524 if (ret) {
525 if (ret == -EINVAL) {
526 error_report("Register sync failed... If you're using kvm-hv.ko,"
527 " only \"-cpu host\" is possible");
529 return ret;
532 idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
534 /* Some targets support access to KVM's guest TLB. */
535 switch (cenv->mmu_model) {
536 case POWERPC_MMU_BOOKE206:
537 ret = kvm_booke206_tlb_init(cpu);
538 break;
539 default:
540 break;
543 kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
544 kvmppc_hw_debug_points_init(cenv);
546 return ret;
549 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
551 CPUPPCState *env = &cpu->env;
552 CPUState *cs = CPU(cpu);
553 struct kvm_dirty_tlb dirty_tlb;
554 unsigned char *bitmap;
555 int ret;
557 if (!env->kvm_sw_tlb) {
558 return;
561 bitmap = g_malloc((env->nb_tlb + 7) / 8);
562 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
564 dirty_tlb.bitmap = (uintptr_t)bitmap;
565 dirty_tlb.num_dirty = env->nb_tlb;
567 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
568 if (ret) {
569 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
570 __func__, strerror(-ret));
573 g_free(bitmap);
576 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
578 PowerPCCPU *cpu = POWERPC_CPU(cs);
579 CPUPPCState *env = &cpu->env;
580 union {
581 uint32_t u32;
582 uint64_t u64;
583 } val;
584 struct kvm_one_reg reg = {
585 .id = id,
586 .addr = (uintptr_t) &val,
588 int ret;
590 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
591 if (ret != 0) {
592 trace_kvm_failed_spr_get(spr, strerror(errno));
593 } else {
594 switch (id & KVM_REG_SIZE_MASK) {
595 case KVM_REG_SIZE_U32:
596 env->spr[spr] = val.u32;
597 break;
599 case KVM_REG_SIZE_U64:
600 env->spr[spr] = val.u64;
601 break;
603 default:
604 /* Don't handle this size yet */
605 abort();
610 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
612 PowerPCCPU *cpu = POWERPC_CPU(cs);
613 CPUPPCState *env = &cpu->env;
614 union {
615 uint32_t u32;
616 uint64_t u64;
617 } val;
618 struct kvm_one_reg reg = {
619 .id = id,
620 .addr = (uintptr_t) &val,
622 int ret;
624 switch (id & KVM_REG_SIZE_MASK) {
625 case KVM_REG_SIZE_U32:
626 val.u32 = env->spr[spr];
627 break;
629 case KVM_REG_SIZE_U64:
630 val.u64 = env->spr[spr];
631 break;
633 default:
634 /* Don't handle this size yet */
635 abort();
638 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
639 if (ret != 0) {
640 trace_kvm_failed_spr_set(spr, strerror(errno));
644 static int kvm_put_fp(CPUState *cs)
646 PowerPCCPU *cpu = POWERPC_CPU(cs);
647 CPUPPCState *env = &cpu->env;
648 struct kvm_one_reg reg;
649 int i;
650 int ret;
652 if (env->insns_flags & PPC_FLOAT) {
653 uint64_t fpscr = env->fpscr;
654 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
656 reg.id = KVM_REG_PPC_FPSCR;
657 reg.addr = (uintptr_t)&fpscr;
658 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
659 if (ret < 0) {
660 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
661 return ret;
664 for (i = 0; i < 32; i++) {
665 uint64_t vsr[2];
667 #ifdef HOST_WORDS_BIGENDIAN
668 vsr[0] = float64_val(env->fpr[i]);
669 vsr[1] = env->vsr[i];
670 #else
671 vsr[0] = env->vsr[i];
672 vsr[1] = float64_val(env->fpr[i]);
673 #endif
674 reg.addr = (uintptr_t) &vsr;
675 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
677 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
678 if (ret < 0) {
679 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
680 i, strerror(errno));
681 return ret;
686 if (env->insns_flags & PPC_ALTIVEC) {
687 reg.id = KVM_REG_PPC_VSCR;
688 reg.addr = (uintptr_t)&env->vscr;
689 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
690 if (ret < 0) {
691 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
692 return ret;
695 for (i = 0; i < 32; i++) {
696 reg.id = KVM_REG_PPC_VR(i);
697 reg.addr = (uintptr_t)&env->avr[i];
698 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
699 if (ret < 0) {
700 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
701 return ret;
706 return 0;
709 static int kvm_get_fp(CPUState *cs)
711 PowerPCCPU *cpu = POWERPC_CPU(cs);
712 CPUPPCState *env = &cpu->env;
713 struct kvm_one_reg reg;
714 int i;
715 int ret;
717 if (env->insns_flags & PPC_FLOAT) {
718 uint64_t fpscr;
719 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
721 reg.id = KVM_REG_PPC_FPSCR;
722 reg.addr = (uintptr_t)&fpscr;
723 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
724 if (ret < 0) {
725 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
726 return ret;
727 } else {
728 env->fpscr = fpscr;
731 for (i = 0; i < 32; i++) {
732 uint64_t vsr[2];
734 reg.addr = (uintptr_t) &vsr;
735 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
737 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
738 if (ret < 0) {
739 DPRINTF("Unable to get %s%d from KVM: %s\n",
740 vsx ? "VSR" : "FPR", i, strerror(errno));
741 return ret;
742 } else {
743 #ifdef HOST_WORDS_BIGENDIAN
744 env->fpr[i] = vsr[0];
745 if (vsx) {
746 env->vsr[i] = vsr[1];
748 #else
749 env->fpr[i] = vsr[1];
750 if (vsx) {
751 env->vsr[i] = vsr[0];
753 #endif
758 if (env->insns_flags & PPC_ALTIVEC) {
759 reg.id = KVM_REG_PPC_VSCR;
760 reg.addr = (uintptr_t)&env->vscr;
761 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
762 if (ret < 0) {
763 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
764 return ret;
767 for (i = 0; i < 32; i++) {
768 reg.id = KVM_REG_PPC_VR(i);
769 reg.addr = (uintptr_t)&env->avr[i];
770 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
771 if (ret < 0) {
772 DPRINTF("Unable to get VR%d from KVM: %s\n",
773 i, strerror(errno));
774 return ret;
779 return 0;
782 #if defined(TARGET_PPC64)
783 static int kvm_get_vpa(CPUState *cs)
785 PowerPCCPU *cpu = POWERPC_CPU(cs);
786 CPUPPCState *env = &cpu->env;
787 struct kvm_one_reg reg;
788 int ret;
790 reg.id = KVM_REG_PPC_VPA_ADDR;
791 reg.addr = (uintptr_t)&env->vpa_addr;
792 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
793 if (ret < 0) {
794 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
795 return ret;
798 assert((uintptr_t)&env->slb_shadow_size
799 == ((uintptr_t)&env->slb_shadow_addr + 8));
800 reg.id = KVM_REG_PPC_VPA_SLB;
801 reg.addr = (uintptr_t)&env->slb_shadow_addr;
802 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
803 if (ret < 0) {
804 DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
805 strerror(errno));
806 return ret;
809 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
810 reg.id = KVM_REG_PPC_VPA_DTL;
811 reg.addr = (uintptr_t)&env->dtl_addr;
812 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
813 if (ret < 0) {
814 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
815 strerror(errno));
816 return ret;
819 return 0;
822 static int kvm_put_vpa(CPUState *cs)
824 PowerPCCPU *cpu = POWERPC_CPU(cs);
825 CPUPPCState *env = &cpu->env;
826 struct kvm_one_reg reg;
827 int ret;
829 /* SLB shadow or DTL can't be registered unless a master VPA is
830 * registered. That means when restoring state, if a VPA *is*
831 * registered, we need to set that up first. If not, we need to
832 * deregister the others before deregistering the master VPA */
833 assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
835 if (env->vpa_addr) {
836 reg.id = KVM_REG_PPC_VPA_ADDR;
837 reg.addr = (uintptr_t)&env->vpa_addr;
838 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
839 if (ret < 0) {
840 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
841 return ret;
845 assert((uintptr_t)&env->slb_shadow_size
846 == ((uintptr_t)&env->slb_shadow_addr + 8));
847 reg.id = KVM_REG_PPC_VPA_SLB;
848 reg.addr = (uintptr_t)&env->slb_shadow_addr;
849 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
850 if (ret < 0) {
851 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
852 return ret;
855 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
856 reg.id = KVM_REG_PPC_VPA_DTL;
857 reg.addr = (uintptr_t)&env->dtl_addr;
858 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
859 if (ret < 0) {
860 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
861 strerror(errno));
862 return ret;
865 if (!env->vpa_addr) {
866 reg.id = KVM_REG_PPC_VPA_ADDR;
867 reg.addr = (uintptr_t)&env->vpa_addr;
868 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
869 if (ret < 0) {
870 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
871 return ret;
875 return 0;
877 #endif /* TARGET_PPC64 */
879 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
881 CPUPPCState *env = &cpu->env;
882 struct kvm_sregs sregs;
883 int i;
885 sregs.pvr = env->spr[SPR_PVR];
887 sregs.u.s.sdr1 = env->spr[SPR_SDR1];
889 /* Sync SLB */
890 #ifdef TARGET_PPC64
891 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
892 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
893 if (env->slb[i].esid & SLB_ESID_V) {
894 sregs.u.s.ppc64.slb[i].slbe |= i;
896 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
898 #endif
900 /* Sync SRs */
901 for (i = 0; i < 16; i++) {
902 sregs.u.s.ppc32.sr[i] = env->sr[i];
905 /* Sync BATs */
906 for (i = 0; i < 8; i++) {
907 /* Beware. We have to swap upper and lower bits here */
908 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
909 | env->DBAT[1][i];
910 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
911 | env->IBAT[1][i];
914 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
917 int kvm_arch_put_registers(CPUState *cs, int level)
919 PowerPCCPU *cpu = POWERPC_CPU(cs);
920 CPUPPCState *env = &cpu->env;
921 struct kvm_regs regs;
922 int ret;
923 int i;
925 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
926 if (ret < 0) {
927 return ret;
930 regs.ctr = env->ctr;
931 regs.lr = env->lr;
932 regs.xer = cpu_read_xer(env);
933 regs.msr = env->msr;
934 regs.pc = env->nip;
936 regs.srr0 = env->spr[SPR_SRR0];
937 regs.srr1 = env->spr[SPR_SRR1];
939 regs.sprg0 = env->spr[SPR_SPRG0];
940 regs.sprg1 = env->spr[SPR_SPRG1];
941 regs.sprg2 = env->spr[SPR_SPRG2];
942 regs.sprg3 = env->spr[SPR_SPRG3];
943 regs.sprg4 = env->spr[SPR_SPRG4];
944 regs.sprg5 = env->spr[SPR_SPRG5];
945 regs.sprg6 = env->spr[SPR_SPRG6];
946 regs.sprg7 = env->spr[SPR_SPRG7];
948 regs.pid = env->spr[SPR_BOOKE_PID];
950 for (i = 0;i < 32; i++)
951 regs.gpr[i] = env->gpr[i];
953 regs.cr = 0;
954 for (i = 0; i < 8; i++) {
955 regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
958 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
959 if (ret < 0)
960 return ret;
962 kvm_put_fp(cs);
964 if (env->tlb_dirty) {
965 kvm_sw_tlb_put(cpu);
966 env->tlb_dirty = false;
969 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
970 ret = kvmppc_put_books_sregs(cpu);
971 if (ret < 0) {
972 return ret;
976 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
977 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
980 if (cap_one_reg) {
981 int i;
983 /* We deliberately ignore errors here, for kernels which have
984 * the ONE_REG calls, but don't support the specific
985 * registers, there's a reasonable chance things will still
986 * work, at least until we try to migrate. */
987 for (i = 0; i < 1024; i++) {
988 uint64_t id = env->spr_cb[i].one_reg_id;
990 if (id != 0) {
991 kvm_put_one_spr(cs, id, i);
995 #ifdef TARGET_PPC64
996 if (msr_ts) {
997 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
998 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1000 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1001 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1003 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1004 kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1005 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1006 kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1007 kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1008 kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1009 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1010 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1011 kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1012 kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1015 if (cap_papr) {
1016 if (kvm_put_vpa(cs) < 0) {
1017 DPRINTF("Warning: Unable to set VPA information to KVM\n");
1021 kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1022 #endif /* TARGET_PPC64 */
1025 return ret;
1028 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1030 env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1033 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1035 CPUPPCState *env = &cpu->env;
1036 struct kvm_sregs sregs;
1037 int ret;
1039 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1040 if (ret < 0) {
1041 return ret;
1044 if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1045 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1046 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1047 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1048 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1049 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1050 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1051 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1052 env->spr[SPR_DECR] = sregs.u.e.dec;
1053 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1054 env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1055 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1058 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1059 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1060 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1061 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1062 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1063 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1066 if (sregs.u.e.features & KVM_SREGS_E_64) {
1067 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1070 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1071 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1074 if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1075 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1076 kvm_sync_excp(env, POWERPC_EXCP_CRITICAL, SPR_BOOKE_IVOR0);
1077 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1078 kvm_sync_excp(env, POWERPC_EXCP_MCHECK, SPR_BOOKE_IVOR1);
1079 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1080 kvm_sync_excp(env, POWERPC_EXCP_DSI, SPR_BOOKE_IVOR2);
1081 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1082 kvm_sync_excp(env, POWERPC_EXCP_ISI, SPR_BOOKE_IVOR3);
1083 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1084 kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL, SPR_BOOKE_IVOR4);
1085 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1086 kvm_sync_excp(env, POWERPC_EXCP_ALIGN, SPR_BOOKE_IVOR5);
1087 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1088 kvm_sync_excp(env, POWERPC_EXCP_PROGRAM, SPR_BOOKE_IVOR6);
1089 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1090 kvm_sync_excp(env, POWERPC_EXCP_FPU, SPR_BOOKE_IVOR7);
1091 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1092 kvm_sync_excp(env, POWERPC_EXCP_SYSCALL, SPR_BOOKE_IVOR8);
1093 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1094 kvm_sync_excp(env, POWERPC_EXCP_APU, SPR_BOOKE_IVOR9);
1095 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1096 kvm_sync_excp(env, POWERPC_EXCP_DECR, SPR_BOOKE_IVOR10);
1097 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1098 kvm_sync_excp(env, POWERPC_EXCP_FIT, SPR_BOOKE_IVOR11);
1099 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1100 kvm_sync_excp(env, POWERPC_EXCP_WDT, SPR_BOOKE_IVOR12);
1101 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1102 kvm_sync_excp(env, POWERPC_EXCP_DTLB, SPR_BOOKE_IVOR13);
1103 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1104 kvm_sync_excp(env, POWERPC_EXCP_ITLB, SPR_BOOKE_IVOR14);
1105 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1106 kvm_sync_excp(env, POWERPC_EXCP_DEBUG, SPR_BOOKE_IVOR15);
1108 if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1109 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1110 kvm_sync_excp(env, POWERPC_EXCP_SPEU, SPR_BOOKE_IVOR32);
1111 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1112 kvm_sync_excp(env, POWERPC_EXCP_EFPDI, SPR_BOOKE_IVOR33);
1113 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1114 kvm_sync_excp(env, POWERPC_EXCP_EFPRI, SPR_BOOKE_IVOR34);
1117 if (sregs.u.e.features & KVM_SREGS_E_PM) {
1118 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1119 kvm_sync_excp(env, POWERPC_EXCP_EPERFM, SPR_BOOKE_IVOR35);
1122 if (sregs.u.e.features & KVM_SREGS_E_PC) {
1123 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1124 kvm_sync_excp(env, POWERPC_EXCP_DOORI, SPR_BOOKE_IVOR36);
1125 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1126 kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1130 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1131 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1132 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1133 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1134 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1135 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1136 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1137 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1138 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1139 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1140 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1143 if (sregs.u.e.features & KVM_SREGS_EXP) {
1144 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1147 if (sregs.u.e.features & KVM_SREGS_E_PD) {
1148 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1149 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1152 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1153 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1154 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1155 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1157 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1158 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1159 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1163 return 0;
1166 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1168 CPUPPCState *env = &cpu->env;
1169 struct kvm_sregs sregs;
1170 int ret;
1171 int i;
1173 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1174 if (ret < 0) {
1175 return ret;
1178 if (!env->external_htab) {
1179 ppc_store_sdr1(env, sregs.u.s.sdr1);
1182 /* Sync SLB */
1183 #ifdef TARGET_PPC64
1185 * The packed SLB array we get from KVM_GET_SREGS only contains
1186 * information about valid entries. So we flush our internal copy
1187 * to get rid of stale ones, then put all valid SLB entries back
1188 * in.
1190 memset(env->slb, 0, sizeof(env->slb));
1191 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1192 target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1193 target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1195 * Only restore valid entries
1197 if (rb & SLB_ESID_V) {
1198 ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1201 #endif
1203 /* Sync SRs */
1204 for (i = 0; i < 16; i++) {
1205 env->sr[i] = sregs.u.s.ppc32.sr[i];
1208 /* Sync BATs */
1209 for (i = 0; i < 8; i++) {
1210 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1211 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1212 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1213 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1216 return 0;
1219 int kvm_arch_get_registers(CPUState *cs)
1221 PowerPCCPU *cpu = POWERPC_CPU(cs);
1222 CPUPPCState *env = &cpu->env;
1223 struct kvm_regs regs;
1224 uint32_t cr;
1225 int i, ret;
1227 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1228 if (ret < 0)
1229 return ret;
1231 cr = regs.cr;
1232 for (i = 7; i >= 0; i--) {
1233 env->crf[i] = cr & 15;
1234 cr >>= 4;
1237 env->ctr = regs.ctr;
1238 env->lr = regs.lr;
1239 cpu_write_xer(env, regs.xer);
1240 env->msr = regs.msr;
1241 env->nip = regs.pc;
1243 env->spr[SPR_SRR0] = regs.srr0;
1244 env->spr[SPR_SRR1] = regs.srr1;
1246 env->spr[SPR_SPRG0] = regs.sprg0;
1247 env->spr[SPR_SPRG1] = regs.sprg1;
1248 env->spr[SPR_SPRG2] = regs.sprg2;
1249 env->spr[SPR_SPRG3] = regs.sprg3;
1250 env->spr[SPR_SPRG4] = regs.sprg4;
1251 env->spr[SPR_SPRG5] = regs.sprg5;
1252 env->spr[SPR_SPRG6] = regs.sprg6;
1253 env->spr[SPR_SPRG7] = regs.sprg7;
1255 env->spr[SPR_BOOKE_PID] = regs.pid;
1257 for (i = 0;i < 32; i++)
1258 env->gpr[i] = regs.gpr[i];
1260 kvm_get_fp(cs);
1262 if (cap_booke_sregs) {
1263 ret = kvmppc_get_booke_sregs(cpu);
1264 if (ret < 0) {
1265 return ret;
1269 if (cap_segstate) {
1270 ret = kvmppc_get_books_sregs(cpu);
1271 if (ret < 0) {
1272 return ret;
1276 if (cap_hior) {
1277 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1280 if (cap_one_reg) {
1281 int i;
1283 /* We deliberately ignore errors here, for kernels which have
1284 * the ONE_REG calls, but don't support the specific
1285 * registers, there's a reasonable chance things will still
1286 * work, at least until we try to migrate. */
1287 for (i = 0; i < 1024; i++) {
1288 uint64_t id = env->spr_cb[i].one_reg_id;
1290 if (id != 0) {
1291 kvm_get_one_spr(cs, id, i);
1295 #ifdef TARGET_PPC64
1296 if (msr_ts) {
1297 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1298 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1300 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1301 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1303 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1304 kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1305 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1306 kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1307 kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1308 kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1309 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1310 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1311 kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1312 kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1315 if (cap_papr) {
1316 if (kvm_get_vpa(cs) < 0) {
1317 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1321 kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1322 #endif
1325 return 0;
1328 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1330 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1332 if (irq != PPC_INTERRUPT_EXT) {
1333 return 0;
1336 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1337 return 0;
1340 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1342 return 0;
1345 #if defined(TARGET_PPCEMB)
1346 #define PPC_INPUT_INT PPC40x_INPUT_INT
1347 #elif defined(TARGET_PPC64)
1348 #define PPC_INPUT_INT PPC970_INPUT_INT
1349 #else
1350 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1351 #endif
1353 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1355 PowerPCCPU *cpu = POWERPC_CPU(cs);
1356 CPUPPCState *env = &cpu->env;
1357 int r;
1358 unsigned irq;
1360 qemu_mutex_lock_iothread();
1362 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1363 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1364 if (!cap_interrupt_level &&
1365 run->ready_for_interrupt_injection &&
1366 (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1367 (env->irq_input_state & (1<<PPC_INPUT_INT)))
1369 /* For now KVM disregards the 'irq' argument. However, in the
1370 * future KVM could cache it in-kernel to avoid a heavyweight exit
1371 * when reading the UIC.
1373 irq = KVM_INTERRUPT_SET;
1375 DPRINTF("injected interrupt %d\n", irq);
1376 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1377 if (r < 0) {
1378 printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1381 /* Always wake up soon in case the interrupt was level based */
1382 timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1383 (NANOSECONDS_PER_SECOND / 50));
1386 /* We don't know if there are more interrupts pending after this. However,
1387 * the guest will return to userspace in the course of handling this one
1388 * anyways, so we will get a chance to deliver the rest. */
1390 qemu_mutex_unlock_iothread();
1393 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1395 return MEMTXATTRS_UNSPECIFIED;
1398 int kvm_arch_process_async_events(CPUState *cs)
1400 return cs->halted;
1403 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1405 CPUState *cs = CPU(cpu);
1406 CPUPPCState *env = &cpu->env;
1408 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1409 cs->halted = 1;
1410 cs->exception_index = EXCP_HLT;
1413 return 0;
1416 /* map dcr access to existing qemu dcr emulation */
1417 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1419 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1420 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1422 return 0;
1425 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1427 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1428 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1430 return 0;
1433 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1435 /* Mixed endian case is not handled */
1436 uint32_t sc = debug_inst_opcode;
1438 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1439 sizeof(sc), 0) ||
1440 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1441 return -EINVAL;
1444 return 0;
1447 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1449 uint32_t sc;
1451 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1452 sc != debug_inst_opcode ||
1453 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1454 sizeof(sc), 1)) {
1455 return -EINVAL;
1458 return 0;
1461 static int find_hw_breakpoint(target_ulong addr, int type)
1463 int n;
1465 assert((nb_hw_breakpoint + nb_hw_watchpoint)
1466 <= ARRAY_SIZE(hw_debug_points));
1468 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1469 if (hw_debug_points[n].addr == addr &&
1470 hw_debug_points[n].type == type) {
1471 return n;
1475 return -1;
1478 static int find_hw_watchpoint(target_ulong addr, int *flag)
1480 int n;
1482 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1483 if (n >= 0) {
1484 *flag = BP_MEM_ACCESS;
1485 return n;
1488 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1489 if (n >= 0) {
1490 *flag = BP_MEM_WRITE;
1491 return n;
1494 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1495 if (n >= 0) {
1496 *flag = BP_MEM_READ;
1497 return n;
1500 return -1;
1503 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1504 target_ulong len, int type)
1506 if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1507 return -ENOBUFS;
1510 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1511 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1513 switch (type) {
1514 case GDB_BREAKPOINT_HW:
1515 if (nb_hw_breakpoint >= max_hw_breakpoint) {
1516 return -ENOBUFS;
1519 if (find_hw_breakpoint(addr, type) >= 0) {
1520 return -EEXIST;
1523 nb_hw_breakpoint++;
1524 break;
1526 case GDB_WATCHPOINT_WRITE:
1527 case GDB_WATCHPOINT_READ:
1528 case GDB_WATCHPOINT_ACCESS:
1529 if (nb_hw_watchpoint >= max_hw_watchpoint) {
1530 return -ENOBUFS;
1533 if (find_hw_breakpoint(addr, type) >= 0) {
1534 return -EEXIST;
1537 nb_hw_watchpoint++;
1538 break;
1540 default:
1541 return -ENOSYS;
1544 return 0;
1547 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1548 target_ulong len, int type)
1550 int n;
1552 n = find_hw_breakpoint(addr, type);
1553 if (n < 0) {
1554 return -ENOENT;
1557 switch (type) {
1558 case GDB_BREAKPOINT_HW:
1559 nb_hw_breakpoint--;
1560 break;
1562 case GDB_WATCHPOINT_WRITE:
1563 case GDB_WATCHPOINT_READ:
1564 case GDB_WATCHPOINT_ACCESS:
1565 nb_hw_watchpoint--;
1566 break;
1568 default:
1569 return -ENOSYS;
1571 hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1573 return 0;
1576 void kvm_arch_remove_all_hw_breakpoints(void)
1578 nb_hw_breakpoint = nb_hw_watchpoint = 0;
1581 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1583 int n;
1585 /* Software Breakpoint updates */
1586 if (kvm_sw_breakpoints_active(cs)) {
1587 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1590 assert((nb_hw_breakpoint + nb_hw_watchpoint)
1591 <= ARRAY_SIZE(hw_debug_points));
1592 assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1594 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1595 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1596 memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1597 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1598 switch (hw_debug_points[n].type) {
1599 case GDB_BREAKPOINT_HW:
1600 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1601 break;
1602 case GDB_WATCHPOINT_WRITE:
1603 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1604 break;
1605 case GDB_WATCHPOINT_READ:
1606 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1607 break;
1608 case GDB_WATCHPOINT_ACCESS:
1609 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1610 KVMPPC_DEBUG_WATCH_READ;
1611 break;
1612 default:
1613 cpu_abort(cs, "Unsupported breakpoint type\n");
1615 dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1620 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1622 CPUState *cs = CPU(cpu);
1623 CPUPPCState *env = &cpu->env;
1624 struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1625 int handle = 0;
1626 int n;
1627 int flag = 0;
1629 if (cs->singlestep_enabled) {
1630 handle = 1;
1631 } else if (arch_info->status) {
1632 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1633 if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1634 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1635 if (n >= 0) {
1636 handle = 1;
1638 } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1639 KVMPPC_DEBUG_WATCH_WRITE)) {
1640 n = find_hw_watchpoint(arch_info->address, &flag);
1641 if (n >= 0) {
1642 handle = 1;
1643 cs->watchpoint_hit = &hw_watchpoint;
1644 hw_watchpoint.vaddr = hw_debug_points[n].addr;
1645 hw_watchpoint.flags = flag;
1649 } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1650 handle = 1;
1651 } else {
1652 /* QEMU is not able to handle debug exception, so inject
1653 * program exception to guest;
1654 * Yes program exception NOT debug exception !!
1655 * When QEMU is using debug resources then debug exception must
1656 * be always set. To achieve this we set MSR_DE and also set
1657 * MSRP_DEP so guest cannot change MSR_DE.
1658 * When emulating debug resource for guest we want guest
1659 * to control MSR_DE (enable/disable debug interrupt on need).
1660 * Supporting both configurations are NOT possible.
1661 * So the result is that we cannot share debug resources
1662 * between QEMU and Guest on BOOKE architecture.
1663 * In the current design QEMU gets the priority over guest,
1664 * this means that if QEMU is using debug resources then guest
1665 * cannot use them;
1666 * For software breakpoint QEMU uses a privileged instruction;
1667 * So there cannot be any reason that we are here for guest
1668 * set debug exception, only possibility is guest executed a
1669 * privileged / illegal instruction and that's why we are
1670 * injecting a program interrupt.
1673 cpu_synchronize_state(cs);
1674 /* env->nip is PC, so increment this by 4 to use
1675 * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1677 env->nip += 4;
1678 cs->exception_index = POWERPC_EXCP_PROGRAM;
1679 env->error_code = POWERPC_EXCP_INVAL;
1680 ppc_cpu_do_interrupt(cs);
1683 return handle;
1686 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1688 PowerPCCPU *cpu = POWERPC_CPU(cs);
1689 CPUPPCState *env = &cpu->env;
1690 int ret;
1692 qemu_mutex_lock_iothread();
1694 switch (run->exit_reason) {
1695 case KVM_EXIT_DCR:
1696 if (run->dcr.is_write) {
1697 DPRINTF("handle dcr write\n");
1698 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1699 } else {
1700 DPRINTF("handle dcr read\n");
1701 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1703 break;
1704 case KVM_EXIT_HLT:
1705 DPRINTF("handle halt\n");
1706 ret = kvmppc_handle_halt(cpu);
1707 break;
1708 #if defined(TARGET_PPC64)
1709 case KVM_EXIT_PAPR_HCALL:
1710 DPRINTF("handle PAPR hypercall\n");
1711 run->papr_hcall.ret = spapr_hypercall(cpu,
1712 run->papr_hcall.nr,
1713 run->papr_hcall.args);
1714 ret = 0;
1715 break;
1716 #endif
1717 case KVM_EXIT_EPR:
1718 DPRINTF("handle epr\n");
1719 run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1720 ret = 0;
1721 break;
1722 case KVM_EXIT_WATCHDOG:
1723 DPRINTF("handle watchdog expiry\n");
1724 watchdog_perform_action();
1725 ret = 0;
1726 break;
1728 case KVM_EXIT_DEBUG:
1729 DPRINTF("handle debug exception\n");
1730 if (kvm_handle_debug(cpu, run)) {
1731 ret = EXCP_DEBUG;
1732 break;
1734 /* re-enter, this exception was guest-internal */
1735 ret = 0;
1736 break;
1738 default:
1739 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1740 ret = -1;
1741 break;
1744 qemu_mutex_unlock_iothread();
1745 return ret;
1748 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1750 CPUState *cs = CPU(cpu);
1751 uint32_t bits = tsr_bits;
1752 struct kvm_one_reg reg = {
1753 .id = KVM_REG_PPC_OR_TSR,
1754 .addr = (uintptr_t) &bits,
1757 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1760 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1763 CPUState *cs = CPU(cpu);
1764 uint32_t bits = tsr_bits;
1765 struct kvm_one_reg reg = {
1766 .id = KVM_REG_PPC_CLEAR_TSR,
1767 .addr = (uintptr_t) &bits,
1770 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1773 int kvmppc_set_tcr(PowerPCCPU *cpu)
1775 CPUState *cs = CPU(cpu);
1776 CPUPPCState *env = &cpu->env;
1777 uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1779 struct kvm_one_reg reg = {
1780 .id = KVM_REG_PPC_TCR,
1781 .addr = (uintptr_t) &tcr,
1784 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1787 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1789 CPUState *cs = CPU(cpu);
1790 int ret;
1792 if (!kvm_enabled()) {
1793 return -1;
1796 if (!cap_ppc_watchdog) {
1797 printf("warning: KVM does not support watchdog");
1798 return -1;
1801 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1802 if (ret < 0) {
1803 fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1804 __func__, strerror(-ret));
1805 return ret;
1808 return ret;
1811 static int read_cpuinfo(const char *field, char *value, int len)
1813 FILE *f;
1814 int ret = -1;
1815 int field_len = strlen(field);
1816 char line[512];
1818 f = fopen("/proc/cpuinfo", "r");
1819 if (!f) {
1820 return -1;
1823 do {
1824 if (!fgets(line, sizeof(line), f)) {
1825 break;
1827 if (!strncmp(line, field, field_len)) {
1828 pstrcpy(value, len, line);
1829 ret = 0;
1830 break;
1832 } while(*line);
1834 fclose(f);
1836 return ret;
1839 uint32_t kvmppc_get_tbfreq(void)
1841 char line[512];
1842 char *ns;
1843 uint32_t retval = NANOSECONDS_PER_SECOND;
1845 if (read_cpuinfo("timebase", line, sizeof(line))) {
1846 return retval;
1849 if (!(ns = strchr(line, ':'))) {
1850 return retval;
1853 ns++;
1855 return atoi(ns);
1858 bool kvmppc_get_host_serial(char **value)
1860 return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1861 NULL);
1864 bool kvmppc_get_host_model(char **value)
1866 return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1869 /* Try to find a device tree node for a CPU with clock-frequency property */
1870 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1872 struct dirent *dirp;
1873 DIR *dp;
1875 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1876 printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1877 return -1;
1880 buf[0] = '\0';
1881 while ((dirp = readdir(dp)) != NULL) {
1882 FILE *f;
1883 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1884 dirp->d_name);
1885 f = fopen(buf, "r");
1886 if (f) {
1887 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1888 fclose(f);
1889 break;
1891 buf[0] = '\0';
1893 closedir(dp);
1894 if (buf[0] == '\0') {
1895 printf("Unknown host!\n");
1896 return -1;
1899 return 0;
1902 static uint64_t kvmppc_read_int_dt(const char *filename)
1904 union {
1905 uint32_t v32;
1906 uint64_t v64;
1907 } u;
1908 FILE *f;
1909 int len;
1911 f = fopen(filename, "rb");
1912 if (!f) {
1913 return -1;
1916 len = fread(&u, 1, sizeof(u), f);
1917 fclose(f);
1918 switch (len) {
1919 case 4:
1920 /* property is a 32-bit quantity */
1921 return be32_to_cpu(u.v32);
1922 case 8:
1923 return be64_to_cpu(u.v64);
1926 return 0;
1929 /* Read a CPU node property from the host device tree that's a single
1930 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1931 * (can't find or open the property, or doesn't understand the
1932 * format) */
1933 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1935 char buf[PATH_MAX], *tmp;
1936 uint64_t val;
1938 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1939 return -1;
1942 tmp = g_strdup_printf("%s/%s", buf, propname);
1943 val = kvmppc_read_int_dt(tmp);
1944 g_free(tmp);
1946 return val;
1949 uint64_t kvmppc_get_clockfreq(void)
1951 return kvmppc_read_int_cpu_dt("clock-frequency");
1954 uint32_t kvmppc_get_vmx(void)
1956 return kvmppc_read_int_cpu_dt("ibm,vmx");
1959 uint32_t kvmppc_get_dfp(void)
1961 return kvmppc_read_int_cpu_dt("ibm,dfp");
1964 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1966 PowerPCCPU *cpu = ppc_env_get_cpu(env);
1967 CPUState *cs = CPU(cpu);
1969 if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1970 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1971 return 0;
1974 return 1;
1977 int kvmppc_get_hasidle(CPUPPCState *env)
1979 struct kvm_ppc_pvinfo pvinfo;
1981 if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1982 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1983 return 1;
1986 return 0;
1989 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1991 uint32_t *hc = (uint32_t*)buf;
1992 struct kvm_ppc_pvinfo pvinfo;
1994 if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1995 memcpy(buf, pvinfo.hcall, buf_len);
1996 return 0;
2000 * Fallback to always fail hypercalls regardless of endianness:
2002 * tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
2003 * li r3, -1
2004 * b .+8 (becomes nop in wrong endian)
2005 * bswap32(li r3, -1)
2008 hc[0] = cpu_to_be32(0x08000048);
2009 hc[1] = cpu_to_be32(0x3860ffff);
2010 hc[2] = cpu_to_be32(0x48000008);
2011 hc[3] = cpu_to_be32(bswap32(0x3860ffff));
2013 return 1;
2016 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
2018 return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2021 void kvmppc_enable_logical_ci_hcalls(void)
2024 * FIXME: it would be nice if we could detect the cases where
2025 * we're using a device which requires the in kernel
2026 * implementation of these hcalls, but the kernel lacks them and
2027 * produce a warning.
2029 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2030 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2033 void kvmppc_enable_set_mode_hcall(void)
2035 kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2038 void kvmppc_set_papr(PowerPCCPU *cpu)
2040 CPUState *cs = CPU(cpu);
2041 int ret;
2043 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2044 if (ret) {
2045 error_report("This vCPU type or KVM version does not support PAPR");
2046 exit(1);
2049 /* Update the capability flag so we sync the right information
2050 * with kvm */
2051 cap_papr = 1;
2054 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t cpu_version)
2056 return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &cpu_version);
2059 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2061 CPUState *cs = CPU(cpu);
2062 int ret;
2064 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2065 if (ret && mpic_proxy) {
2066 error_report("This KVM version does not support EPR");
2067 exit(1);
2071 int kvmppc_smt_threads(void)
2073 return cap_ppc_smt ? cap_ppc_smt : 1;
2076 #ifdef TARGET_PPC64
2077 off_t kvmppc_alloc_rma(void **rma)
2079 off_t size;
2080 int fd;
2081 struct kvm_allocate_rma ret;
2083 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2084 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2085 * not necessary on this hardware
2086 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2088 * FIXME: We should allow the user to force contiguous RMA
2089 * allocation in the cap_ppc_rma==1 case.
2091 if (cap_ppc_rma < 2) {
2092 return 0;
2095 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
2096 if (fd < 0) {
2097 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2098 strerror(errno));
2099 return -1;
2102 size = MIN(ret.rma_size, 256ul << 20);
2104 *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2105 if (*rma == MAP_FAILED) {
2106 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
2107 return -1;
2110 return size;
2113 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2115 struct kvm_ppc_smmu_info info;
2116 long rampagesize, best_page_shift;
2117 int i;
2119 if (cap_ppc_rma >= 2) {
2120 return current_size;
2123 /* Find the largest hardware supported page size that's less than
2124 * or equal to the (logical) backing page size of guest RAM */
2125 kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2126 rampagesize = getrampagesize();
2127 best_page_shift = 0;
2129 for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2130 struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2132 if (!sps->page_shift) {
2133 continue;
2136 if ((sps->page_shift > best_page_shift)
2137 && ((1UL << sps->page_shift) <= rampagesize)) {
2138 best_page_shift = sps->page_shift;
2142 return MIN(current_size,
2143 1ULL << (best_page_shift + hash_shift - 7));
2145 #endif
2147 bool kvmppc_spapr_use_multitce(void)
2149 return cap_spapr_multitce;
2152 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
2153 bool need_vfio)
2155 struct kvm_create_spapr_tce args = {
2156 .liobn = liobn,
2157 .window_size = window_size,
2159 long len;
2160 int fd;
2161 void *table;
2163 /* Must set fd to -1 so we don't try to munmap when called for
2164 * destroying the table, which the upper layers -will- do
2166 *pfd = -1;
2167 if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2168 return NULL;
2171 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2172 if (fd < 0) {
2173 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2174 liobn);
2175 return NULL;
2178 len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
2179 /* FIXME: round this up to page size */
2181 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2182 if (table == MAP_FAILED) {
2183 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2184 liobn);
2185 close(fd);
2186 return NULL;
2189 *pfd = fd;
2190 return table;
2193 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2195 long len;
2197 if (fd < 0) {
2198 return -1;
2201 len = nb_table * sizeof(uint64_t);
2202 if ((munmap(table, len) < 0) ||
2203 (close(fd) < 0)) {
2204 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2205 strerror(errno));
2206 /* Leak the table */
2209 return 0;
2212 int kvmppc_reset_htab(int shift_hint)
2214 uint32_t shift = shift_hint;
2216 if (!kvm_enabled()) {
2217 /* Full emulation, tell caller to allocate htab itself */
2218 return 0;
2220 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2221 int ret;
2222 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2223 if (ret == -ENOTTY) {
2224 /* At least some versions of PR KVM advertise the
2225 * capability, but don't implement the ioctl(). Oops.
2226 * Return 0 so that we allocate the htab in qemu, as is
2227 * correct for PR. */
2228 return 0;
2229 } else if (ret < 0) {
2230 return ret;
2232 return shift;
2235 /* We have a kernel that predates the htab reset calls. For PR
2236 * KVM, we need to allocate the htab ourselves, for an HV KVM of
2237 * this era, it has allocated a 16MB fixed size hash table
2238 * already. Kernels of this era have the GET_PVINFO capability
2239 * only on PR, so we use this hack to determine the right
2240 * answer */
2241 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
2242 /* PR - tell caller to allocate htab */
2243 return 0;
2244 } else {
2245 /* HV - assume 16MB kernel allocated htab */
2246 return 24;
2250 static inline uint32_t mfpvr(void)
2252 uint32_t pvr;
2254 asm ("mfpvr %0"
2255 : "=r"(pvr));
2256 return pvr;
2259 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2261 if (on) {
2262 *word |= flags;
2263 } else {
2264 *word &= ~flags;
2268 static void kvmppc_host_cpu_initfn(Object *obj)
2270 assert(kvm_enabled());
2273 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2275 DeviceClass *dc = DEVICE_CLASS(oc);
2276 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2277 uint32_t vmx = kvmppc_get_vmx();
2278 uint32_t dfp = kvmppc_get_dfp();
2279 uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2280 uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2282 /* Now fix up the class with information we can query from the host */
2283 pcc->pvr = mfpvr();
2285 if (vmx != -1) {
2286 /* Only override when we know what the host supports */
2287 alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
2288 alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
2290 if (dfp != -1) {
2291 /* Only override when we know what the host supports */
2292 alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
2295 if (dcache_size != -1) {
2296 pcc->l1_dcache_size = dcache_size;
2299 if (icache_size != -1) {
2300 pcc->l1_icache_size = icache_size;
2303 /* Reason: kvmppc_host_cpu_initfn() dies when !kvm_enabled() */
2304 dc->cannot_destroy_with_object_finalize_yet = true;
2307 bool kvmppc_has_cap_epr(void)
2309 return cap_epr;
2312 bool kvmppc_has_cap_htab_fd(void)
2314 return cap_htab_fd;
2317 bool kvmppc_has_cap_fixup_hcalls(void)
2319 return cap_fixup_hcalls;
2322 static PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc)
2324 ObjectClass *oc = OBJECT_CLASS(pcc);
2326 while (oc && !object_class_is_abstract(oc)) {
2327 oc = object_class_get_parent(oc);
2329 assert(oc);
2331 return POWERPC_CPU_CLASS(oc);
2334 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2336 uint32_t host_pvr = mfpvr();
2337 PowerPCCPUClass *pvr_pcc;
2339 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2340 if (pvr_pcc == NULL) {
2341 pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2344 return pvr_pcc;
2347 #if defined(TARGET_PPC64)
2348 static void spapr_cpu_core_host_initfn(Object *obj)
2350 sPAPRCPUCore *core = SPAPR_CPU_CORE(obj);
2351 char *name = g_strdup_printf("%s-" TYPE_POWERPC_CPU, "host");
2352 ObjectClass *oc = object_class_by_name(name);
2354 g_assert(oc);
2355 g_free((void *)name);
2356 core->cpu_class = oc;
2358 #endif
2360 static int kvm_ppc_register_host_cpu_type(void)
2362 TypeInfo type_info = {
2363 .name = TYPE_HOST_POWERPC_CPU,
2364 .instance_init = kvmppc_host_cpu_initfn,
2365 .class_init = kvmppc_host_cpu_class_init,
2367 PowerPCCPUClass *pvr_pcc;
2368 DeviceClass *dc;
2370 pvr_pcc = kvm_ppc_get_host_cpu_class();
2371 if (pvr_pcc == NULL) {
2372 return -1;
2374 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2375 type_register(&type_info);
2377 #if defined(TARGET_PPC64)
2378 type_info.name = g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE, "host");
2379 type_info.parent = TYPE_SPAPR_CPU_CORE,
2380 type_info.instance_size = sizeof(sPAPRCPUCore),
2381 type_info.instance_init = spapr_cpu_core_host_initfn,
2382 type_info.class_init = NULL;
2383 type_register(&type_info);
2384 g_free((void *)type_info.name);
2385 type_info.instance_size = 0;
2386 type_info.instance_init = NULL;
2387 #endif
2389 /* Register generic family CPU class for a family */
2390 pvr_pcc = ppc_cpu_get_family_class(pvr_pcc);
2391 dc = DEVICE_CLASS(pvr_pcc);
2392 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2393 type_info.name = g_strdup_printf("%s-"TYPE_POWERPC_CPU, dc->desc);
2394 type_register(&type_info);
2396 return 0;
2399 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2401 struct kvm_rtas_token_args args = {
2402 .token = token,
2405 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2406 return -ENOENT;
2409 strncpy(args.name, function, sizeof(args.name));
2411 return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2414 int kvmppc_get_htab_fd(bool write)
2416 struct kvm_get_htab_fd s = {
2417 .flags = write ? KVM_GET_HTAB_WRITE : 0,
2418 .start_index = 0,
2421 if (!cap_htab_fd) {
2422 fprintf(stderr, "KVM version doesn't support saving the hash table\n");
2423 return -1;
2426 return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2429 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2431 int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2432 uint8_t buf[bufsize];
2433 ssize_t rc;
2435 do {
2436 rc = read(fd, buf, bufsize);
2437 if (rc < 0) {
2438 fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2439 strerror(errno));
2440 return rc;
2441 } else if (rc) {
2442 uint8_t *buffer = buf;
2443 ssize_t n = rc;
2444 while (n) {
2445 struct kvm_get_htab_header *head =
2446 (struct kvm_get_htab_header *) buffer;
2447 size_t chunksize = sizeof(*head) +
2448 HASH_PTE_SIZE_64 * head->n_valid;
2450 qemu_put_be32(f, head->index);
2451 qemu_put_be16(f, head->n_valid);
2452 qemu_put_be16(f, head->n_invalid);
2453 qemu_put_buffer(f, (void *)(head + 1),
2454 HASH_PTE_SIZE_64 * head->n_valid);
2456 buffer += chunksize;
2457 n -= chunksize;
2460 } while ((rc != 0)
2461 && ((max_ns < 0)
2462 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2464 return (rc == 0) ? 1 : 0;
2467 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2468 uint16_t n_valid, uint16_t n_invalid)
2470 struct kvm_get_htab_header *buf;
2471 size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2472 ssize_t rc;
2474 buf = alloca(chunksize);
2475 buf->index = index;
2476 buf->n_valid = n_valid;
2477 buf->n_invalid = n_invalid;
2479 qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2481 rc = write(fd, buf, chunksize);
2482 if (rc < 0) {
2483 fprintf(stderr, "Error writing KVM hash table: %s\n",
2484 strerror(errno));
2485 return rc;
2487 if (rc != chunksize) {
2488 /* We should never get a short write on a single chunk */
2489 fprintf(stderr, "Short write, restoring KVM hash table\n");
2490 return -1;
2492 return 0;
2495 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2497 return true;
2500 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
2502 return 1;
2505 int kvm_arch_on_sigbus(int code, void *addr)
2507 return 1;
2510 void kvm_arch_init_irq_routing(KVMState *s)
2514 struct kvm_get_htab_buf {
2515 struct kvm_get_htab_header header;
2517 * We require one extra byte for read
2519 target_ulong hpte[(HPTES_PER_GROUP * 2) + 1];
2522 uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index)
2524 int htab_fd;
2525 struct kvm_get_htab_fd ghf;
2526 struct kvm_get_htab_buf *hpte_buf;
2528 ghf.flags = 0;
2529 ghf.start_index = pte_index;
2530 htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2531 if (htab_fd < 0) {
2532 goto error_out;
2535 hpte_buf = g_malloc0(sizeof(*hpte_buf));
2537 * Read the hpte group
2539 if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) {
2540 goto out_close;
2543 close(htab_fd);
2544 return (uint64_t)(uintptr_t) hpte_buf->hpte;
2546 out_close:
2547 g_free(hpte_buf);
2548 close(htab_fd);
2549 error_out:
2550 return 0;
2553 void kvmppc_hash64_free_pteg(uint64_t token)
2555 struct kvm_get_htab_buf *htab_buf;
2557 htab_buf = container_of((void *)(uintptr_t) token, struct kvm_get_htab_buf,
2558 hpte);
2559 g_free(htab_buf);
2560 return;
2563 void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
2564 target_ulong pte0, target_ulong pte1)
2566 int htab_fd;
2567 struct kvm_get_htab_fd ghf;
2568 struct kvm_get_htab_buf hpte_buf;
2570 ghf.flags = 0;
2571 ghf.start_index = 0; /* Ignored */
2572 htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2573 if (htab_fd < 0) {
2574 goto error_out;
2577 hpte_buf.header.n_valid = 1;
2578 hpte_buf.header.n_invalid = 0;
2579 hpte_buf.header.index = pte_index;
2580 hpte_buf.hpte[0] = pte0;
2581 hpte_buf.hpte[1] = pte1;
2583 * Write the hpte entry.
2584 * CAUTION: write() has the warn_unused_result attribute. Hence we
2585 * need to check the return value, even though we do nothing.
2587 if (write(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) {
2588 goto out_close;
2591 out_close:
2592 close(htab_fd);
2593 return;
2595 error_out:
2596 return;
2599 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2600 uint64_t address, uint32_t data, PCIDevice *dev)
2602 return 0;
2605 int kvm_arch_msi_data_to_gsi(uint32_t data)
2607 return data & 0xffff;
2610 int kvmppc_enable_hwrng(void)
2612 if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2613 return -1;
2616 return kvmppc_enable_hcall(kvm_state, H_RANDOM);