target/ppc: Improve accuracy of guest HTM availability on P8s
[qemu/ar7.git] / target / ppc / kvm.c
blobc3d426292c345619af1ae88f94861690e1e94aa7
1 /*
2 * PowerPC implementation of KVM hooks
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
7 * Authors:
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
17 #include "qemu/osdep.h"
18 #include <dirent.h>
19 #include <sys/ioctl.h>
20 #include <sys/vfs.h>
22 #include <linux/kvm.h>
24 #include "qemu-common.h"
25 #include "qemu/error-report.h"
26 #include "cpu.h"
27 #include "cpu-models.h"
28 #include "qemu/timer.h"
29 #include "sysemu/sysemu.h"
30 #include "sysemu/hw_accel.h"
31 #include "kvm_ppc.h"
32 #include "sysemu/cpus.h"
33 #include "sysemu/device_tree.h"
34 #include "mmu-hash64.h"
36 #include "hw/sysbus.h"
37 #include "hw/ppc/spapr.h"
38 #include "hw/ppc/spapr_vio.h"
39 #include "hw/ppc/spapr_cpu_core.h"
40 #include "hw/ppc/ppc.h"
41 #include "sysemu/watchdog.h"
42 #include "trace.h"
43 #include "exec/gdbstub.h"
44 #include "exec/memattrs.h"
45 #include "exec/ram_addr.h"
46 #include "sysemu/hostmem.h"
47 #include "qemu/cutils.h"
48 #include "qemu/mmap-alloc.h"
49 #if defined(TARGET_PPC64)
50 #include "hw/ppc/spapr_cpu_core.h"
51 #endif
52 #include "elf.h"
54 //#define DEBUG_KVM
56 #ifdef DEBUG_KVM
57 #define DPRINTF(fmt, ...) \
58 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
59 #else
60 #define DPRINTF(fmt, ...) \
61 do { } while (0)
62 #endif
64 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
66 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
67 KVM_CAP_LAST_INFO
70 static int cap_interrupt_unset = false;
71 static int cap_interrupt_level = false;
72 static int cap_segstate;
73 static int cap_booke_sregs;
74 static int cap_ppc_smt;
75 static int cap_ppc_rma;
76 static int cap_spapr_tce;
77 static int cap_spapr_multitce;
78 static int cap_spapr_vfio;
79 static int cap_hior;
80 static int cap_one_reg;
81 static int cap_epr;
82 static int cap_ppc_watchdog;
83 static int cap_papr;
84 static int cap_htab_fd;
85 static int cap_fixup_hcalls;
86 static int cap_htm; /* Hardware transactional memory support */
88 static uint32_t debug_inst_opcode;
90 /* XXX We have a race condition where we actually have a level triggered
91 * interrupt, but the infrastructure can't expose that yet, so the guest
92 * takes but ignores it, goes to sleep and never gets notified that there's
93 * still an interrupt pending.
95 * As a quick workaround, let's just wake up again 20 ms after we injected
96 * an interrupt. That way we can assure that we're always reinjecting
97 * interrupts in case the guest swallowed them.
99 static QEMUTimer *idle_timer;
101 static void kvm_kick_cpu(void *opaque)
103 PowerPCCPU *cpu = opaque;
105 qemu_cpu_kick(CPU(cpu));
108 /* Check whether we are running with KVM-PR (instead of KVM-HV). This
109 * should only be used for fallback tests - generally we should use
110 * explicit capabilities for the features we want, rather than
111 * assuming what is/isn't available depending on the KVM variant. */
112 static bool kvmppc_is_pr(KVMState *ks)
114 /* Assume KVM-PR if the GET_PVINFO capability is available */
115 return kvm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0;
118 static int kvm_ppc_register_host_cpu_type(void);
120 int kvm_arch_init(MachineState *ms, KVMState *s)
122 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
123 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
124 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
125 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
126 cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
127 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
128 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
129 cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
130 cap_spapr_vfio = false;
131 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
132 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
133 cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
134 cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
135 /* Note: we don't set cap_papr here, because this capability is
136 * only activated after this by kvmppc_set_papr() */
137 cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
138 cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
139 cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
141 if (!cap_interrupt_level) {
142 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
143 "VM to stall at times!\n");
146 kvm_ppc_register_host_cpu_type();
148 return 0;
151 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
153 return 0;
156 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
158 CPUPPCState *cenv = &cpu->env;
159 CPUState *cs = CPU(cpu);
160 struct kvm_sregs sregs;
161 int ret;
163 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
164 /* What we're really trying to say is "if we're on BookE, we use
165 the native PVR for now". This is the only sane way to check
166 it though, so we potentially confuse users that they can run
167 BookE guests on BookS. Let's hope nobody dares enough :) */
168 return 0;
169 } else {
170 if (!cap_segstate) {
171 fprintf(stderr, "kvm error: missing PVR setting capability\n");
172 return -ENOSYS;
176 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
177 if (ret) {
178 return ret;
181 sregs.pvr = cenv->spr[SPR_PVR];
182 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
185 /* Set up a shared TLB array with KVM */
186 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
188 CPUPPCState *env = &cpu->env;
189 CPUState *cs = CPU(cpu);
190 struct kvm_book3e_206_tlb_params params = {};
191 struct kvm_config_tlb cfg = {};
192 unsigned int entries = 0;
193 int ret, i;
195 if (!kvm_enabled() ||
196 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
197 return 0;
200 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
202 for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
203 params.tlb_sizes[i] = booke206_tlb_size(env, i);
204 params.tlb_ways[i] = booke206_tlb_ways(env, i);
205 entries += params.tlb_sizes[i];
208 assert(entries == env->nb_tlb);
209 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
211 env->tlb_dirty = true;
213 cfg.array = (uintptr_t)env->tlb.tlbm;
214 cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
215 cfg.params = (uintptr_t)&params;
216 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
218 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
219 if (ret < 0) {
220 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
221 __func__, strerror(-ret));
222 return ret;
225 env->kvm_sw_tlb = true;
226 return 0;
230 #if defined(TARGET_PPC64)
231 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
232 struct kvm_ppc_smmu_info *info)
234 CPUPPCState *env = &cpu->env;
235 CPUState *cs = CPU(cpu);
237 memset(info, 0, sizeof(*info));
239 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
240 * need to "guess" what the supported page sizes are.
242 * For that to work we make a few assumptions:
244 * - Check whether we are running "PR" KVM which only supports 4K
245 * and 16M pages, but supports them regardless of the backing
246 * store characteritics. We also don't support 1T segments.
248 * This is safe as if HV KVM ever supports that capability or PR
249 * KVM grows supports for more page/segment sizes, those versions
250 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
251 * will not hit this fallback
253 * - Else we are running HV KVM. This means we only support page
254 * sizes that fit in the backing store. Additionally we only
255 * advertize 64K pages if the processor is ARCH 2.06 and we assume
256 * P7 encodings for the SLB and hash table. Here too, we assume
257 * support for any newer processor will mean a kernel that
258 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
259 * this fallback.
261 if (kvmppc_is_pr(cs->kvm_state)) {
262 /* No flags */
263 info->flags = 0;
264 info->slb_size = 64;
266 /* Standard 4k base page size segment */
267 info->sps[0].page_shift = 12;
268 info->sps[0].slb_enc = 0;
269 info->sps[0].enc[0].page_shift = 12;
270 info->sps[0].enc[0].pte_enc = 0;
272 /* Standard 16M large page size segment */
273 info->sps[1].page_shift = 24;
274 info->sps[1].slb_enc = SLB_VSID_L;
275 info->sps[1].enc[0].page_shift = 24;
276 info->sps[1].enc[0].pte_enc = 0;
277 } else {
278 int i = 0;
280 /* HV KVM has backing store size restrictions */
281 info->flags = KVM_PPC_PAGE_SIZES_REAL;
283 if (env->mmu_model & POWERPC_MMU_1TSEG) {
284 info->flags |= KVM_PPC_1T_SEGMENTS;
287 if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 ||
288 POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) {
289 info->slb_size = 32;
290 } else {
291 info->slb_size = 64;
294 /* Standard 4k base page size segment */
295 info->sps[i].page_shift = 12;
296 info->sps[i].slb_enc = 0;
297 info->sps[i].enc[0].page_shift = 12;
298 info->sps[i].enc[0].pte_enc = 0;
299 i++;
301 /* 64K on MMU 2.06 and later */
302 if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 ||
303 POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) {
304 info->sps[i].page_shift = 16;
305 info->sps[i].slb_enc = 0x110;
306 info->sps[i].enc[0].page_shift = 16;
307 info->sps[i].enc[0].pte_enc = 1;
308 i++;
311 /* Standard 16M large page size segment */
312 info->sps[i].page_shift = 24;
313 info->sps[i].slb_enc = SLB_VSID_L;
314 info->sps[i].enc[0].page_shift = 24;
315 info->sps[i].enc[0].pte_enc = 0;
319 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
321 CPUState *cs = CPU(cpu);
322 int ret;
324 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
325 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
326 if (ret == 0) {
327 return;
331 kvm_get_fallback_smmu_info(cpu, info);
334 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
336 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
337 return true;
340 return (1ul << shift) <= rampgsize;
343 static long max_cpu_page_size;
345 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
347 static struct kvm_ppc_smmu_info smmu_info;
348 static bool has_smmu_info;
349 CPUPPCState *env = &cpu->env;
350 int iq, ik, jq, jk;
351 bool has_64k_pages = false;
353 /* We only handle page sizes for 64-bit server guests for now */
354 if (!(env->mmu_model & POWERPC_MMU_64)) {
355 return;
358 /* Collect MMU info from kernel if not already */
359 if (!has_smmu_info) {
360 kvm_get_smmu_info(cpu, &smmu_info);
361 has_smmu_info = true;
364 if (!max_cpu_page_size) {
365 max_cpu_page_size = qemu_getrampagesize();
368 /* Convert to QEMU form */
369 memset(&env->sps, 0, sizeof(env->sps));
371 /* If we have HV KVM, we need to forbid CI large pages if our
372 * host page size is smaller than 64K.
374 if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
375 env->ci_large_pages = getpagesize() >= 0x10000;
379 * XXX This loop should be an entry wide AND of the capabilities that
380 * the selected CPU has with the capabilities that KVM supports.
382 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
383 struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
384 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
386 if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
387 ksps->page_shift)) {
388 continue;
390 qsps->page_shift = ksps->page_shift;
391 qsps->slb_enc = ksps->slb_enc;
392 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
393 if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
394 ksps->enc[jk].page_shift)) {
395 continue;
397 if (ksps->enc[jk].page_shift == 16) {
398 has_64k_pages = true;
400 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
401 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
402 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
403 break;
406 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
407 break;
410 env->slb_nr = smmu_info.slb_size;
411 if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
412 env->mmu_model &= ~POWERPC_MMU_1TSEG;
414 if (!has_64k_pages) {
415 env->mmu_model &= ~POWERPC_MMU_64K;
419 bool kvmppc_is_mem_backend_page_size_ok(char *obj_path)
421 Object *mem_obj = object_resolve_path(obj_path, NULL);
422 char *mempath = object_property_get_str(mem_obj, "mem-path", NULL);
423 long pagesize;
425 if (mempath) {
426 pagesize = qemu_mempath_getpagesize(mempath);
427 } else {
428 pagesize = getpagesize();
431 return pagesize >= max_cpu_page_size;
434 #else /* defined (TARGET_PPC64) */
436 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
440 bool kvmppc_is_mem_backend_page_size_ok(char *obj_path)
442 return true;
445 #endif /* !defined (TARGET_PPC64) */
447 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
449 return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
452 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
453 * book3s supports only 1 watchpoint, so array size
454 * of 4 is sufficient for now.
456 #define MAX_HW_BKPTS 4
458 static struct HWBreakpoint {
459 target_ulong addr;
460 int type;
461 } hw_debug_points[MAX_HW_BKPTS];
463 static CPUWatchpoint hw_watchpoint;
465 /* Default there is no breakpoint and watchpoint supported */
466 static int max_hw_breakpoint;
467 static int max_hw_watchpoint;
468 static int nb_hw_breakpoint;
469 static int nb_hw_watchpoint;
471 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
473 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
474 max_hw_breakpoint = 2;
475 max_hw_watchpoint = 2;
478 if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
479 fprintf(stderr, "Error initializing h/w breakpoints\n");
480 return;
484 int kvm_arch_init_vcpu(CPUState *cs)
486 PowerPCCPU *cpu = POWERPC_CPU(cs);
487 CPUPPCState *cenv = &cpu->env;
488 int ret;
490 /* Gather server mmu info from KVM and update the CPU state */
491 kvm_fixup_page_sizes(cpu);
493 /* Synchronize sregs with kvm */
494 ret = kvm_arch_sync_sregs(cpu);
495 if (ret) {
496 if (ret == -EINVAL) {
497 error_report("Register sync failed... If you're using kvm-hv.ko,"
498 " only \"-cpu host\" is possible");
500 return ret;
503 idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
505 switch (cenv->mmu_model) {
506 case POWERPC_MMU_BOOKE206:
507 /* This target supports access to KVM's guest TLB */
508 ret = kvm_booke206_tlb_init(cpu);
509 break;
510 case POWERPC_MMU_2_07:
511 if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) {
512 /* KVM-HV has transactional memory on POWER8 also without the
513 * KVM_CAP_PPC_HTM extension, so enable it here instead as
514 * long as it's availble to userspace on the host. */
515 if (qemu_getauxval(AT_HWCAP2) & PPC_FEATURE2_HAS_HTM) {
516 cap_htm = true;
519 break;
520 default:
521 break;
524 kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
525 kvmppc_hw_debug_points_init(cenv);
527 return ret;
530 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
532 CPUPPCState *env = &cpu->env;
533 CPUState *cs = CPU(cpu);
534 struct kvm_dirty_tlb dirty_tlb;
535 unsigned char *bitmap;
536 int ret;
538 if (!env->kvm_sw_tlb) {
539 return;
542 bitmap = g_malloc((env->nb_tlb + 7) / 8);
543 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
545 dirty_tlb.bitmap = (uintptr_t)bitmap;
546 dirty_tlb.num_dirty = env->nb_tlb;
548 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
549 if (ret) {
550 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
551 __func__, strerror(-ret));
554 g_free(bitmap);
557 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
559 PowerPCCPU *cpu = POWERPC_CPU(cs);
560 CPUPPCState *env = &cpu->env;
561 union {
562 uint32_t u32;
563 uint64_t u64;
564 } val;
565 struct kvm_one_reg reg = {
566 .id = id,
567 .addr = (uintptr_t) &val,
569 int ret;
571 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
572 if (ret != 0) {
573 trace_kvm_failed_spr_get(spr, strerror(errno));
574 } else {
575 switch (id & KVM_REG_SIZE_MASK) {
576 case KVM_REG_SIZE_U32:
577 env->spr[spr] = val.u32;
578 break;
580 case KVM_REG_SIZE_U64:
581 env->spr[spr] = val.u64;
582 break;
584 default:
585 /* Don't handle this size yet */
586 abort();
591 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
593 PowerPCCPU *cpu = POWERPC_CPU(cs);
594 CPUPPCState *env = &cpu->env;
595 union {
596 uint32_t u32;
597 uint64_t u64;
598 } val;
599 struct kvm_one_reg reg = {
600 .id = id,
601 .addr = (uintptr_t) &val,
603 int ret;
605 switch (id & KVM_REG_SIZE_MASK) {
606 case KVM_REG_SIZE_U32:
607 val.u32 = env->spr[spr];
608 break;
610 case KVM_REG_SIZE_U64:
611 val.u64 = env->spr[spr];
612 break;
614 default:
615 /* Don't handle this size yet */
616 abort();
619 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
620 if (ret != 0) {
621 trace_kvm_failed_spr_set(spr, strerror(errno));
625 static int kvm_put_fp(CPUState *cs)
627 PowerPCCPU *cpu = POWERPC_CPU(cs);
628 CPUPPCState *env = &cpu->env;
629 struct kvm_one_reg reg;
630 int i;
631 int ret;
633 if (env->insns_flags & PPC_FLOAT) {
634 uint64_t fpscr = env->fpscr;
635 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
637 reg.id = KVM_REG_PPC_FPSCR;
638 reg.addr = (uintptr_t)&fpscr;
639 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
640 if (ret < 0) {
641 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
642 return ret;
645 for (i = 0; i < 32; i++) {
646 uint64_t vsr[2];
648 #ifdef HOST_WORDS_BIGENDIAN
649 vsr[0] = float64_val(env->fpr[i]);
650 vsr[1] = env->vsr[i];
651 #else
652 vsr[0] = env->vsr[i];
653 vsr[1] = float64_val(env->fpr[i]);
654 #endif
655 reg.addr = (uintptr_t) &vsr;
656 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
658 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
659 if (ret < 0) {
660 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
661 i, strerror(errno));
662 return ret;
667 if (env->insns_flags & PPC_ALTIVEC) {
668 reg.id = KVM_REG_PPC_VSCR;
669 reg.addr = (uintptr_t)&env->vscr;
670 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
671 if (ret < 0) {
672 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
673 return ret;
676 for (i = 0; i < 32; i++) {
677 reg.id = KVM_REG_PPC_VR(i);
678 reg.addr = (uintptr_t)&env->avr[i];
679 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
680 if (ret < 0) {
681 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
682 return ret;
687 return 0;
690 static int kvm_get_fp(CPUState *cs)
692 PowerPCCPU *cpu = POWERPC_CPU(cs);
693 CPUPPCState *env = &cpu->env;
694 struct kvm_one_reg reg;
695 int i;
696 int ret;
698 if (env->insns_flags & PPC_FLOAT) {
699 uint64_t fpscr;
700 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
702 reg.id = KVM_REG_PPC_FPSCR;
703 reg.addr = (uintptr_t)&fpscr;
704 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
705 if (ret < 0) {
706 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
707 return ret;
708 } else {
709 env->fpscr = fpscr;
712 for (i = 0; i < 32; i++) {
713 uint64_t vsr[2];
715 reg.addr = (uintptr_t) &vsr;
716 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
718 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
719 if (ret < 0) {
720 DPRINTF("Unable to get %s%d from KVM: %s\n",
721 vsx ? "VSR" : "FPR", i, strerror(errno));
722 return ret;
723 } else {
724 #ifdef HOST_WORDS_BIGENDIAN
725 env->fpr[i] = vsr[0];
726 if (vsx) {
727 env->vsr[i] = vsr[1];
729 #else
730 env->fpr[i] = vsr[1];
731 if (vsx) {
732 env->vsr[i] = vsr[0];
734 #endif
739 if (env->insns_flags & PPC_ALTIVEC) {
740 reg.id = KVM_REG_PPC_VSCR;
741 reg.addr = (uintptr_t)&env->vscr;
742 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
743 if (ret < 0) {
744 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
745 return ret;
748 for (i = 0; i < 32; i++) {
749 reg.id = KVM_REG_PPC_VR(i);
750 reg.addr = (uintptr_t)&env->avr[i];
751 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
752 if (ret < 0) {
753 DPRINTF("Unable to get VR%d from KVM: %s\n",
754 i, strerror(errno));
755 return ret;
760 return 0;
763 #if defined(TARGET_PPC64)
764 static int kvm_get_vpa(CPUState *cs)
766 PowerPCCPU *cpu = POWERPC_CPU(cs);
767 CPUPPCState *env = &cpu->env;
768 struct kvm_one_reg reg;
769 int ret;
771 reg.id = KVM_REG_PPC_VPA_ADDR;
772 reg.addr = (uintptr_t)&env->vpa_addr;
773 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
774 if (ret < 0) {
775 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
776 return ret;
779 assert((uintptr_t)&env->slb_shadow_size
780 == ((uintptr_t)&env->slb_shadow_addr + 8));
781 reg.id = KVM_REG_PPC_VPA_SLB;
782 reg.addr = (uintptr_t)&env->slb_shadow_addr;
783 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
784 if (ret < 0) {
785 DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
786 strerror(errno));
787 return ret;
790 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
791 reg.id = KVM_REG_PPC_VPA_DTL;
792 reg.addr = (uintptr_t)&env->dtl_addr;
793 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
794 if (ret < 0) {
795 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
796 strerror(errno));
797 return ret;
800 return 0;
803 static int kvm_put_vpa(CPUState *cs)
805 PowerPCCPU *cpu = POWERPC_CPU(cs);
806 CPUPPCState *env = &cpu->env;
807 struct kvm_one_reg reg;
808 int ret;
810 /* SLB shadow or DTL can't be registered unless a master VPA is
811 * registered. That means when restoring state, if a VPA *is*
812 * registered, we need to set that up first. If not, we need to
813 * deregister the others before deregistering the master VPA */
814 assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
816 if (env->vpa_addr) {
817 reg.id = KVM_REG_PPC_VPA_ADDR;
818 reg.addr = (uintptr_t)&env->vpa_addr;
819 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
820 if (ret < 0) {
821 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
822 return ret;
826 assert((uintptr_t)&env->slb_shadow_size
827 == ((uintptr_t)&env->slb_shadow_addr + 8));
828 reg.id = KVM_REG_PPC_VPA_SLB;
829 reg.addr = (uintptr_t)&env->slb_shadow_addr;
830 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
831 if (ret < 0) {
832 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
833 return ret;
836 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
837 reg.id = KVM_REG_PPC_VPA_DTL;
838 reg.addr = (uintptr_t)&env->dtl_addr;
839 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
840 if (ret < 0) {
841 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
842 strerror(errno));
843 return ret;
846 if (!env->vpa_addr) {
847 reg.id = KVM_REG_PPC_VPA_ADDR;
848 reg.addr = (uintptr_t)&env->vpa_addr;
849 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
850 if (ret < 0) {
851 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
852 return ret;
856 return 0;
858 #endif /* TARGET_PPC64 */
860 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
862 CPUPPCState *env = &cpu->env;
863 struct kvm_sregs sregs;
864 int i;
866 sregs.pvr = env->spr[SPR_PVR];
868 sregs.u.s.sdr1 = env->spr[SPR_SDR1];
870 /* Sync SLB */
871 #ifdef TARGET_PPC64
872 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
873 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
874 if (env->slb[i].esid & SLB_ESID_V) {
875 sregs.u.s.ppc64.slb[i].slbe |= i;
877 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
879 #endif
881 /* Sync SRs */
882 for (i = 0; i < 16; i++) {
883 sregs.u.s.ppc32.sr[i] = env->sr[i];
886 /* Sync BATs */
887 for (i = 0; i < 8; i++) {
888 /* Beware. We have to swap upper and lower bits here */
889 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
890 | env->DBAT[1][i];
891 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
892 | env->IBAT[1][i];
895 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
898 int kvm_arch_put_registers(CPUState *cs, int level)
900 PowerPCCPU *cpu = POWERPC_CPU(cs);
901 CPUPPCState *env = &cpu->env;
902 struct kvm_regs regs;
903 int ret;
904 int i;
906 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
907 if (ret < 0) {
908 return ret;
911 regs.ctr = env->ctr;
912 regs.lr = env->lr;
913 regs.xer = cpu_read_xer(env);
914 regs.msr = env->msr;
915 regs.pc = env->nip;
917 regs.srr0 = env->spr[SPR_SRR0];
918 regs.srr1 = env->spr[SPR_SRR1];
920 regs.sprg0 = env->spr[SPR_SPRG0];
921 regs.sprg1 = env->spr[SPR_SPRG1];
922 regs.sprg2 = env->spr[SPR_SPRG2];
923 regs.sprg3 = env->spr[SPR_SPRG3];
924 regs.sprg4 = env->spr[SPR_SPRG4];
925 regs.sprg5 = env->spr[SPR_SPRG5];
926 regs.sprg6 = env->spr[SPR_SPRG6];
927 regs.sprg7 = env->spr[SPR_SPRG7];
929 regs.pid = env->spr[SPR_BOOKE_PID];
931 for (i = 0;i < 32; i++)
932 regs.gpr[i] = env->gpr[i];
934 regs.cr = 0;
935 for (i = 0; i < 8; i++) {
936 regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
939 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
940 if (ret < 0)
941 return ret;
943 kvm_put_fp(cs);
945 if (env->tlb_dirty) {
946 kvm_sw_tlb_put(cpu);
947 env->tlb_dirty = false;
950 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
951 ret = kvmppc_put_books_sregs(cpu);
952 if (ret < 0) {
953 return ret;
957 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
958 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
961 if (cap_one_reg) {
962 int i;
964 /* We deliberately ignore errors here, for kernels which have
965 * the ONE_REG calls, but don't support the specific
966 * registers, there's a reasonable chance things will still
967 * work, at least until we try to migrate. */
968 for (i = 0; i < 1024; i++) {
969 uint64_t id = env->spr_cb[i].one_reg_id;
971 if (id != 0) {
972 kvm_put_one_spr(cs, id, i);
976 #ifdef TARGET_PPC64
977 if (msr_ts) {
978 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
979 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
981 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
982 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
984 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
985 kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
986 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
987 kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
988 kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
989 kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
990 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
991 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
992 kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
993 kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
996 if (cap_papr) {
997 if (kvm_put_vpa(cs) < 0) {
998 DPRINTF("Warning: Unable to set VPA information to KVM\n");
1002 kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1003 #endif /* TARGET_PPC64 */
1006 return ret;
1009 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1011 env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1014 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1016 CPUPPCState *env = &cpu->env;
1017 struct kvm_sregs sregs;
1018 int ret;
1020 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1021 if (ret < 0) {
1022 return ret;
1025 if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1026 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1027 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1028 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1029 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1030 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1031 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1032 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1033 env->spr[SPR_DECR] = sregs.u.e.dec;
1034 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1035 env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1036 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1039 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1040 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1041 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1042 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1043 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1044 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1047 if (sregs.u.e.features & KVM_SREGS_E_64) {
1048 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1051 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1052 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1055 if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1056 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1057 kvm_sync_excp(env, POWERPC_EXCP_CRITICAL, SPR_BOOKE_IVOR0);
1058 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1059 kvm_sync_excp(env, POWERPC_EXCP_MCHECK, SPR_BOOKE_IVOR1);
1060 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1061 kvm_sync_excp(env, POWERPC_EXCP_DSI, SPR_BOOKE_IVOR2);
1062 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1063 kvm_sync_excp(env, POWERPC_EXCP_ISI, SPR_BOOKE_IVOR3);
1064 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1065 kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL, SPR_BOOKE_IVOR4);
1066 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1067 kvm_sync_excp(env, POWERPC_EXCP_ALIGN, SPR_BOOKE_IVOR5);
1068 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1069 kvm_sync_excp(env, POWERPC_EXCP_PROGRAM, SPR_BOOKE_IVOR6);
1070 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1071 kvm_sync_excp(env, POWERPC_EXCP_FPU, SPR_BOOKE_IVOR7);
1072 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1073 kvm_sync_excp(env, POWERPC_EXCP_SYSCALL, SPR_BOOKE_IVOR8);
1074 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1075 kvm_sync_excp(env, POWERPC_EXCP_APU, SPR_BOOKE_IVOR9);
1076 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1077 kvm_sync_excp(env, POWERPC_EXCP_DECR, SPR_BOOKE_IVOR10);
1078 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1079 kvm_sync_excp(env, POWERPC_EXCP_FIT, SPR_BOOKE_IVOR11);
1080 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1081 kvm_sync_excp(env, POWERPC_EXCP_WDT, SPR_BOOKE_IVOR12);
1082 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1083 kvm_sync_excp(env, POWERPC_EXCP_DTLB, SPR_BOOKE_IVOR13);
1084 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1085 kvm_sync_excp(env, POWERPC_EXCP_ITLB, SPR_BOOKE_IVOR14);
1086 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1087 kvm_sync_excp(env, POWERPC_EXCP_DEBUG, SPR_BOOKE_IVOR15);
1089 if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1090 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1091 kvm_sync_excp(env, POWERPC_EXCP_SPEU, SPR_BOOKE_IVOR32);
1092 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1093 kvm_sync_excp(env, POWERPC_EXCP_EFPDI, SPR_BOOKE_IVOR33);
1094 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1095 kvm_sync_excp(env, POWERPC_EXCP_EFPRI, SPR_BOOKE_IVOR34);
1098 if (sregs.u.e.features & KVM_SREGS_E_PM) {
1099 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1100 kvm_sync_excp(env, POWERPC_EXCP_EPERFM, SPR_BOOKE_IVOR35);
1103 if (sregs.u.e.features & KVM_SREGS_E_PC) {
1104 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1105 kvm_sync_excp(env, POWERPC_EXCP_DOORI, SPR_BOOKE_IVOR36);
1106 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1107 kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1111 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1112 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1113 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1114 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1115 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1116 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1117 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1118 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1119 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1120 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1121 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1124 if (sregs.u.e.features & KVM_SREGS_EXP) {
1125 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1128 if (sregs.u.e.features & KVM_SREGS_E_PD) {
1129 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1130 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1133 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1134 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1135 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1136 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1138 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1139 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1140 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1144 return 0;
1147 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1149 CPUPPCState *env = &cpu->env;
1150 struct kvm_sregs sregs;
1151 int ret;
1152 int i;
1154 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1155 if (ret < 0) {
1156 return ret;
1159 if (!cpu->vhyp) {
1160 ppc_store_sdr1(env, sregs.u.s.sdr1);
1163 /* Sync SLB */
1164 #ifdef TARGET_PPC64
1166 * The packed SLB array we get from KVM_GET_SREGS only contains
1167 * information about valid entries. So we flush our internal copy
1168 * to get rid of stale ones, then put all valid SLB entries back
1169 * in.
1171 memset(env->slb, 0, sizeof(env->slb));
1172 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1173 target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1174 target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1176 * Only restore valid entries
1178 if (rb & SLB_ESID_V) {
1179 ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1182 #endif
1184 /* Sync SRs */
1185 for (i = 0; i < 16; i++) {
1186 env->sr[i] = sregs.u.s.ppc32.sr[i];
1189 /* Sync BATs */
1190 for (i = 0; i < 8; i++) {
1191 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1192 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1193 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1194 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1197 return 0;
1200 int kvm_arch_get_registers(CPUState *cs)
1202 PowerPCCPU *cpu = POWERPC_CPU(cs);
1203 CPUPPCState *env = &cpu->env;
1204 struct kvm_regs regs;
1205 uint32_t cr;
1206 int i, ret;
1208 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1209 if (ret < 0)
1210 return ret;
1212 cr = regs.cr;
1213 for (i = 7; i >= 0; i--) {
1214 env->crf[i] = cr & 15;
1215 cr >>= 4;
1218 env->ctr = regs.ctr;
1219 env->lr = regs.lr;
1220 cpu_write_xer(env, regs.xer);
1221 env->msr = regs.msr;
1222 env->nip = regs.pc;
1224 env->spr[SPR_SRR0] = regs.srr0;
1225 env->spr[SPR_SRR1] = regs.srr1;
1227 env->spr[SPR_SPRG0] = regs.sprg0;
1228 env->spr[SPR_SPRG1] = regs.sprg1;
1229 env->spr[SPR_SPRG2] = regs.sprg2;
1230 env->spr[SPR_SPRG3] = regs.sprg3;
1231 env->spr[SPR_SPRG4] = regs.sprg4;
1232 env->spr[SPR_SPRG5] = regs.sprg5;
1233 env->spr[SPR_SPRG6] = regs.sprg6;
1234 env->spr[SPR_SPRG7] = regs.sprg7;
1236 env->spr[SPR_BOOKE_PID] = regs.pid;
1238 for (i = 0;i < 32; i++)
1239 env->gpr[i] = regs.gpr[i];
1241 kvm_get_fp(cs);
1243 if (cap_booke_sregs) {
1244 ret = kvmppc_get_booke_sregs(cpu);
1245 if (ret < 0) {
1246 return ret;
1250 if (cap_segstate) {
1251 ret = kvmppc_get_books_sregs(cpu);
1252 if (ret < 0) {
1253 return ret;
1257 if (cap_hior) {
1258 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1261 if (cap_one_reg) {
1262 int i;
1264 /* We deliberately ignore errors here, for kernels which have
1265 * the ONE_REG calls, but don't support the specific
1266 * registers, there's a reasonable chance things will still
1267 * work, at least until we try to migrate. */
1268 for (i = 0; i < 1024; i++) {
1269 uint64_t id = env->spr_cb[i].one_reg_id;
1271 if (id != 0) {
1272 kvm_get_one_spr(cs, id, i);
1276 #ifdef TARGET_PPC64
1277 if (msr_ts) {
1278 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1279 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1281 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1282 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1284 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1285 kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1286 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1287 kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1288 kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1289 kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1290 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1291 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1292 kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1293 kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1296 if (cap_papr) {
1297 if (kvm_get_vpa(cs) < 0) {
1298 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1302 kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1303 #endif
1306 return 0;
1309 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1311 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1313 if (irq != PPC_INTERRUPT_EXT) {
1314 return 0;
1317 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1318 return 0;
1321 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1323 return 0;
1326 #if defined(TARGET_PPCEMB)
1327 #define PPC_INPUT_INT PPC40x_INPUT_INT
1328 #elif defined(TARGET_PPC64)
1329 #define PPC_INPUT_INT PPC970_INPUT_INT
1330 #else
1331 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1332 #endif
1334 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1336 PowerPCCPU *cpu = POWERPC_CPU(cs);
1337 CPUPPCState *env = &cpu->env;
1338 int r;
1339 unsigned irq;
1341 qemu_mutex_lock_iothread();
1343 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1344 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1345 if (!cap_interrupt_level &&
1346 run->ready_for_interrupt_injection &&
1347 (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1348 (env->irq_input_state & (1<<PPC_INPUT_INT)))
1350 /* For now KVM disregards the 'irq' argument. However, in the
1351 * future KVM could cache it in-kernel to avoid a heavyweight exit
1352 * when reading the UIC.
1354 irq = KVM_INTERRUPT_SET;
1356 DPRINTF("injected interrupt %d\n", irq);
1357 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1358 if (r < 0) {
1359 printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1362 /* Always wake up soon in case the interrupt was level based */
1363 timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1364 (NANOSECONDS_PER_SECOND / 50));
1367 /* We don't know if there are more interrupts pending after this. However,
1368 * the guest will return to userspace in the course of handling this one
1369 * anyways, so we will get a chance to deliver the rest. */
1371 qemu_mutex_unlock_iothread();
1374 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1376 return MEMTXATTRS_UNSPECIFIED;
1379 int kvm_arch_process_async_events(CPUState *cs)
1381 return cs->halted;
1384 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1386 CPUState *cs = CPU(cpu);
1387 CPUPPCState *env = &cpu->env;
1389 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1390 cs->halted = 1;
1391 cs->exception_index = EXCP_HLT;
1394 return 0;
1397 /* map dcr access to existing qemu dcr emulation */
1398 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1400 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1401 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1403 return 0;
1406 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1408 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1409 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1411 return 0;
1414 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1416 /* Mixed endian case is not handled */
1417 uint32_t sc = debug_inst_opcode;
1419 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1420 sizeof(sc), 0) ||
1421 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1422 return -EINVAL;
1425 return 0;
1428 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1430 uint32_t sc;
1432 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1433 sc != debug_inst_opcode ||
1434 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1435 sizeof(sc), 1)) {
1436 return -EINVAL;
1439 return 0;
1442 static int find_hw_breakpoint(target_ulong addr, int type)
1444 int n;
1446 assert((nb_hw_breakpoint + nb_hw_watchpoint)
1447 <= ARRAY_SIZE(hw_debug_points));
1449 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1450 if (hw_debug_points[n].addr == addr &&
1451 hw_debug_points[n].type == type) {
1452 return n;
1456 return -1;
1459 static int find_hw_watchpoint(target_ulong addr, int *flag)
1461 int n;
1463 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1464 if (n >= 0) {
1465 *flag = BP_MEM_ACCESS;
1466 return n;
1469 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1470 if (n >= 0) {
1471 *flag = BP_MEM_WRITE;
1472 return n;
1475 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1476 if (n >= 0) {
1477 *flag = BP_MEM_READ;
1478 return n;
1481 return -1;
1484 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1485 target_ulong len, int type)
1487 if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1488 return -ENOBUFS;
1491 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1492 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1494 switch (type) {
1495 case GDB_BREAKPOINT_HW:
1496 if (nb_hw_breakpoint >= max_hw_breakpoint) {
1497 return -ENOBUFS;
1500 if (find_hw_breakpoint(addr, type) >= 0) {
1501 return -EEXIST;
1504 nb_hw_breakpoint++;
1505 break;
1507 case GDB_WATCHPOINT_WRITE:
1508 case GDB_WATCHPOINT_READ:
1509 case GDB_WATCHPOINT_ACCESS:
1510 if (nb_hw_watchpoint >= max_hw_watchpoint) {
1511 return -ENOBUFS;
1514 if (find_hw_breakpoint(addr, type) >= 0) {
1515 return -EEXIST;
1518 nb_hw_watchpoint++;
1519 break;
1521 default:
1522 return -ENOSYS;
1525 return 0;
1528 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1529 target_ulong len, int type)
1531 int n;
1533 n = find_hw_breakpoint(addr, type);
1534 if (n < 0) {
1535 return -ENOENT;
1538 switch (type) {
1539 case GDB_BREAKPOINT_HW:
1540 nb_hw_breakpoint--;
1541 break;
1543 case GDB_WATCHPOINT_WRITE:
1544 case GDB_WATCHPOINT_READ:
1545 case GDB_WATCHPOINT_ACCESS:
1546 nb_hw_watchpoint--;
1547 break;
1549 default:
1550 return -ENOSYS;
1552 hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1554 return 0;
1557 void kvm_arch_remove_all_hw_breakpoints(void)
1559 nb_hw_breakpoint = nb_hw_watchpoint = 0;
1562 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1564 int n;
1566 /* Software Breakpoint updates */
1567 if (kvm_sw_breakpoints_active(cs)) {
1568 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1571 assert((nb_hw_breakpoint + nb_hw_watchpoint)
1572 <= ARRAY_SIZE(hw_debug_points));
1573 assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1575 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1576 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1577 memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1578 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1579 switch (hw_debug_points[n].type) {
1580 case GDB_BREAKPOINT_HW:
1581 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1582 break;
1583 case GDB_WATCHPOINT_WRITE:
1584 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1585 break;
1586 case GDB_WATCHPOINT_READ:
1587 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1588 break;
1589 case GDB_WATCHPOINT_ACCESS:
1590 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1591 KVMPPC_DEBUG_WATCH_READ;
1592 break;
1593 default:
1594 cpu_abort(cs, "Unsupported breakpoint type\n");
1596 dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1601 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1603 CPUState *cs = CPU(cpu);
1604 CPUPPCState *env = &cpu->env;
1605 struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1606 int handle = 0;
1607 int n;
1608 int flag = 0;
1610 if (cs->singlestep_enabled) {
1611 handle = 1;
1612 } else if (arch_info->status) {
1613 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1614 if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1615 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1616 if (n >= 0) {
1617 handle = 1;
1619 } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1620 KVMPPC_DEBUG_WATCH_WRITE)) {
1621 n = find_hw_watchpoint(arch_info->address, &flag);
1622 if (n >= 0) {
1623 handle = 1;
1624 cs->watchpoint_hit = &hw_watchpoint;
1625 hw_watchpoint.vaddr = hw_debug_points[n].addr;
1626 hw_watchpoint.flags = flag;
1630 } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1631 handle = 1;
1632 } else {
1633 /* QEMU is not able to handle debug exception, so inject
1634 * program exception to guest;
1635 * Yes program exception NOT debug exception !!
1636 * When QEMU is using debug resources then debug exception must
1637 * be always set. To achieve this we set MSR_DE and also set
1638 * MSRP_DEP so guest cannot change MSR_DE.
1639 * When emulating debug resource for guest we want guest
1640 * to control MSR_DE (enable/disable debug interrupt on need).
1641 * Supporting both configurations are NOT possible.
1642 * So the result is that we cannot share debug resources
1643 * between QEMU and Guest on BOOKE architecture.
1644 * In the current design QEMU gets the priority over guest,
1645 * this means that if QEMU is using debug resources then guest
1646 * cannot use them;
1647 * For software breakpoint QEMU uses a privileged instruction;
1648 * So there cannot be any reason that we are here for guest
1649 * set debug exception, only possibility is guest executed a
1650 * privileged / illegal instruction and that's why we are
1651 * injecting a program interrupt.
1654 cpu_synchronize_state(cs);
1655 /* env->nip is PC, so increment this by 4 to use
1656 * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1658 env->nip += 4;
1659 cs->exception_index = POWERPC_EXCP_PROGRAM;
1660 env->error_code = POWERPC_EXCP_INVAL;
1661 ppc_cpu_do_interrupt(cs);
1664 return handle;
1667 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1669 PowerPCCPU *cpu = POWERPC_CPU(cs);
1670 CPUPPCState *env = &cpu->env;
1671 int ret;
1673 qemu_mutex_lock_iothread();
1675 switch (run->exit_reason) {
1676 case KVM_EXIT_DCR:
1677 if (run->dcr.is_write) {
1678 DPRINTF("handle dcr write\n");
1679 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1680 } else {
1681 DPRINTF("handle dcr read\n");
1682 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1684 break;
1685 case KVM_EXIT_HLT:
1686 DPRINTF("handle halt\n");
1687 ret = kvmppc_handle_halt(cpu);
1688 break;
1689 #if defined(TARGET_PPC64)
1690 case KVM_EXIT_PAPR_HCALL:
1691 DPRINTF("handle PAPR hypercall\n");
1692 run->papr_hcall.ret = spapr_hypercall(cpu,
1693 run->papr_hcall.nr,
1694 run->papr_hcall.args);
1695 ret = 0;
1696 break;
1697 #endif
1698 case KVM_EXIT_EPR:
1699 DPRINTF("handle epr\n");
1700 run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1701 ret = 0;
1702 break;
1703 case KVM_EXIT_WATCHDOG:
1704 DPRINTF("handle watchdog expiry\n");
1705 watchdog_perform_action();
1706 ret = 0;
1707 break;
1709 case KVM_EXIT_DEBUG:
1710 DPRINTF("handle debug exception\n");
1711 if (kvm_handle_debug(cpu, run)) {
1712 ret = EXCP_DEBUG;
1713 break;
1715 /* re-enter, this exception was guest-internal */
1716 ret = 0;
1717 break;
1719 default:
1720 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1721 ret = -1;
1722 break;
1725 qemu_mutex_unlock_iothread();
1726 return ret;
1729 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1731 CPUState *cs = CPU(cpu);
1732 uint32_t bits = tsr_bits;
1733 struct kvm_one_reg reg = {
1734 .id = KVM_REG_PPC_OR_TSR,
1735 .addr = (uintptr_t) &bits,
1738 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1741 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1744 CPUState *cs = CPU(cpu);
1745 uint32_t bits = tsr_bits;
1746 struct kvm_one_reg reg = {
1747 .id = KVM_REG_PPC_CLEAR_TSR,
1748 .addr = (uintptr_t) &bits,
1751 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1754 int kvmppc_set_tcr(PowerPCCPU *cpu)
1756 CPUState *cs = CPU(cpu);
1757 CPUPPCState *env = &cpu->env;
1758 uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1760 struct kvm_one_reg reg = {
1761 .id = KVM_REG_PPC_TCR,
1762 .addr = (uintptr_t) &tcr,
1765 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1768 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1770 CPUState *cs = CPU(cpu);
1771 int ret;
1773 if (!kvm_enabled()) {
1774 return -1;
1777 if (!cap_ppc_watchdog) {
1778 printf("warning: KVM does not support watchdog");
1779 return -1;
1782 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1783 if (ret < 0) {
1784 fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1785 __func__, strerror(-ret));
1786 return ret;
1789 return ret;
1792 static int read_cpuinfo(const char *field, char *value, int len)
1794 FILE *f;
1795 int ret = -1;
1796 int field_len = strlen(field);
1797 char line[512];
1799 f = fopen("/proc/cpuinfo", "r");
1800 if (!f) {
1801 return -1;
1804 do {
1805 if (!fgets(line, sizeof(line), f)) {
1806 break;
1808 if (!strncmp(line, field, field_len)) {
1809 pstrcpy(value, len, line);
1810 ret = 0;
1811 break;
1813 } while(*line);
1815 fclose(f);
1817 return ret;
1820 uint32_t kvmppc_get_tbfreq(void)
1822 char line[512];
1823 char *ns;
1824 uint32_t retval = NANOSECONDS_PER_SECOND;
1826 if (read_cpuinfo("timebase", line, sizeof(line))) {
1827 return retval;
1830 if (!(ns = strchr(line, ':'))) {
1831 return retval;
1834 ns++;
1836 return atoi(ns);
1839 bool kvmppc_get_host_serial(char **value)
1841 return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1842 NULL);
1845 bool kvmppc_get_host_model(char **value)
1847 return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1850 /* Try to find a device tree node for a CPU with clock-frequency property */
1851 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1853 struct dirent *dirp;
1854 DIR *dp;
1856 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1857 printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1858 return -1;
1861 buf[0] = '\0';
1862 while ((dirp = readdir(dp)) != NULL) {
1863 FILE *f;
1864 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1865 dirp->d_name);
1866 f = fopen(buf, "r");
1867 if (f) {
1868 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1869 fclose(f);
1870 break;
1872 buf[0] = '\0';
1874 closedir(dp);
1875 if (buf[0] == '\0') {
1876 printf("Unknown host!\n");
1877 return -1;
1880 return 0;
1883 static uint64_t kvmppc_read_int_dt(const char *filename)
1885 union {
1886 uint32_t v32;
1887 uint64_t v64;
1888 } u;
1889 FILE *f;
1890 int len;
1892 f = fopen(filename, "rb");
1893 if (!f) {
1894 return -1;
1897 len = fread(&u, 1, sizeof(u), f);
1898 fclose(f);
1899 switch (len) {
1900 case 4:
1901 /* property is a 32-bit quantity */
1902 return be32_to_cpu(u.v32);
1903 case 8:
1904 return be64_to_cpu(u.v64);
1907 return 0;
1910 /* Read a CPU node property from the host device tree that's a single
1911 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1912 * (can't find or open the property, or doesn't understand the
1913 * format) */
1914 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1916 char buf[PATH_MAX], *tmp;
1917 uint64_t val;
1919 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1920 return -1;
1923 tmp = g_strdup_printf("%s/%s", buf, propname);
1924 val = kvmppc_read_int_dt(tmp);
1925 g_free(tmp);
1927 return val;
1930 uint64_t kvmppc_get_clockfreq(void)
1932 return kvmppc_read_int_cpu_dt("clock-frequency");
1935 uint32_t kvmppc_get_vmx(void)
1937 return kvmppc_read_int_cpu_dt("ibm,vmx");
1940 uint32_t kvmppc_get_dfp(void)
1942 return kvmppc_read_int_cpu_dt("ibm,dfp");
1945 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1947 PowerPCCPU *cpu = ppc_env_get_cpu(env);
1948 CPUState *cs = CPU(cpu);
1950 if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1951 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1952 return 0;
1955 return 1;
1958 int kvmppc_get_hasidle(CPUPPCState *env)
1960 struct kvm_ppc_pvinfo pvinfo;
1962 if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1963 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1964 return 1;
1967 return 0;
1970 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1972 uint32_t *hc = (uint32_t*)buf;
1973 struct kvm_ppc_pvinfo pvinfo;
1975 if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1976 memcpy(buf, pvinfo.hcall, buf_len);
1977 return 0;
1981 * Fallback to always fail hypercalls regardless of endianness:
1983 * tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
1984 * li r3, -1
1985 * b .+8 (becomes nop in wrong endian)
1986 * bswap32(li r3, -1)
1989 hc[0] = cpu_to_be32(0x08000048);
1990 hc[1] = cpu_to_be32(0x3860ffff);
1991 hc[2] = cpu_to_be32(0x48000008);
1992 hc[3] = cpu_to_be32(bswap32(0x3860ffff));
1994 return 1;
1997 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
1999 return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2002 void kvmppc_enable_logical_ci_hcalls(void)
2005 * FIXME: it would be nice if we could detect the cases where
2006 * we're using a device which requires the in kernel
2007 * implementation of these hcalls, but the kernel lacks them and
2008 * produce a warning.
2010 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2011 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2014 void kvmppc_enable_set_mode_hcall(void)
2016 kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2019 void kvmppc_enable_clear_ref_mod_hcalls(void)
2021 kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
2022 kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
2025 void kvmppc_set_papr(PowerPCCPU *cpu)
2027 CPUState *cs = CPU(cpu);
2028 int ret;
2030 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2031 if (ret) {
2032 error_report("This vCPU type or KVM version does not support PAPR");
2033 exit(1);
2036 /* Update the capability flag so we sync the right information
2037 * with kvm */
2038 cap_papr = 1;
2041 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr)
2043 return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr);
2046 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2048 CPUState *cs = CPU(cpu);
2049 int ret;
2051 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2052 if (ret && mpic_proxy) {
2053 error_report("This KVM version does not support EPR");
2054 exit(1);
2058 int kvmppc_smt_threads(void)
2060 return cap_ppc_smt ? cap_ppc_smt : 1;
2063 #ifdef TARGET_PPC64
2064 off_t kvmppc_alloc_rma(void **rma)
2066 off_t size;
2067 int fd;
2068 struct kvm_allocate_rma ret;
2070 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2071 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2072 * not necessary on this hardware
2073 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2075 * FIXME: We should allow the user to force contiguous RMA
2076 * allocation in the cap_ppc_rma==1 case.
2078 if (cap_ppc_rma < 2) {
2079 return 0;
2082 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
2083 if (fd < 0) {
2084 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2085 strerror(errno));
2086 return -1;
2089 size = MIN(ret.rma_size, 256ul << 20);
2091 *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2092 if (*rma == MAP_FAILED) {
2093 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
2094 return -1;
2097 return size;
2100 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2102 struct kvm_ppc_smmu_info info;
2103 long rampagesize, best_page_shift;
2104 int i;
2106 if (cap_ppc_rma >= 2) {
2107 return current_size;
2110 /* Find the largest hardware supported page size that's less than
2111 * or equal to the (logical) backing page size of guest RAM */
2112 kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2113 rampagesize = qemu_getrampagesize();
2114 best_page_shift = 0;
2116 for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2117 struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2119 if (!sps->page_shift) {
2120 continue;
2123 if ((sps->page_shift > best_page_shift)
2124 && ((1UL << sps->page_shift) <= rampagesize)) {
2125 best_page_shift = sps->page_shift;
2129 return MIN(current_size,
2130 1ULL << (best_page_shift + hash_shift - 7));
2132 #endif
2134 bool kvmppc_spapr_use_multitce(void)
2136 return cap_spapr_multitce;
2139 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
2140 bool need_vfio)
2142 struct kvm_create_spapr_tce args = {
2143 .liobn = liobn,
2144 .window_size = window_size,
2146 long len;
2147 int fd;
2148 void *table;
2150 /* Must set fd to -1 so we don't try to munmap when called for
2151 * destroying the table, which the upper layers -will- do
2153 *pfd = -1;
2154 if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2155 return NULL;
2158 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2159 if (fd < 0) {
2160 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2161 liobn);
2162 return NULL;
2165 len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
2166 /* FIXME: round this up to page size */
2168 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2169 if (table == MAP_FAILED) {
2170 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2171 liobn);
2172 close(fd);
2173 return NULL;
2176 *pfd = fd;
2177 return table;
2180 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2182 long len;
2184 if (fd < 0) {
2185 return -1;
2188 len = nb_table * sizeof(uint64_t);
2189 if ((munmap(table, len) < 0) ||
2190 (close(fd) < 0)) {
2191 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2192 strerror(errno));
2193 /* Leak the table */
2196 return 0;
2199 int kvmppc_reset_htab(int shift_hint)
2201 uint32_t shift = shift_hint;
2203 if (!kvm_enabled()) {
2204 /* Full emulation, tell caller to allocate htab itself */
2205 return 0;
2207 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2208 int ret;
2209 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2210 if (ret == -ENOTTY) {
2211 /* At least some versions of PR KVM advertise the
2212 * capability, but don't implement the ioctl(). Oops.
2213 * Return 0 so that we allocate the htab in qemu, as is
2214 * correct for PR. */
2215 return 0;
2216 } else if (ret < 0) {
2217 return ret;
2219 return shift;
2222 /* We have a kernel that predates the htab reset calls. For PR
2223 * KVM, we need to allocate the htab ourselves, for an HV KVM of
2224 * this era, it has allocated a 16MB fixed size hash table already. */
2225 if (kvmppc_is_pr(kvm_state)) {
2226 /* PR - tell caller to allocate htab */
2227 return 0;
2228 } else {
2229 /* HV - assume 16MB kernel allocated htab */
2230 return 24;
2234 static inline uint32_t mfpvr(void)
2236 uint32_t pvr;
2238 asm ("mfpvr %0"
2239 : "=r"(pvr));
2240 return pvr;
2243 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2245 if (on) {
2246 *word |= flags;
2247 } else {
2248 *word &= ~flags;
2252 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2254 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2255 uint32_t vmx = kvmppc_get_vmx();
2256 uint32_t dfp = kvmppc_get_dfp();
2257 uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2258 uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2260 /* Now fix up the class with information we can query from the host */
2261 pcc->pvr = mfpvr();
2263 if (vmx != -1) {
2264 /* Only override when we know what the host supports */
2265 alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
2266 alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
2268 if (dfp != -1) {
2269 /* Only override when we know what the host supports */
2270 alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
2273 if (dcache_size != -1) {
2274 pcc->l1_dcache_size = dcache_size;
2277 if (icache_size != -1) {
2278 pcc->l1_icache_size = icache_size;
2282 bool kvmppc_has_cap_epr(void)
2284 return cap_epr;
2287 bool kvmppc_has_cap_htab_fd(void)
2289 return cap_htab_fd;
2292 bool kvmppc_has_cap_fixup_hcalls(void)
2294 return cap_fixup_hcalls;
2297 bool kvmppc_has_cap_htm(void)
2299 return cap_htm;
2302 static PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc)
2304 ObjectClass *oc = OBJECT_CLASS(pcc);
2306 while (oc && !object_class_is_abstract(oc)) {
2307 oc = object_class_get_parent(oc);
2309 assert(oc);
2311 return POWERPC_CPU_CLASS(oc);
2314 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2316 uint32_t host_pvr = mfpvr();
2317 PowerPCCPUClass *pvr_pcc;
2319 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2320 if (pvr_pcc == NULL) {
2321 pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2324 return pvr_pcc;
2327 static int kvm_ppc_register_host_cpu_type(void)
2329 TypeInfo type_info = {
2330 .name = TYPE_HOST_POWERPC_CPU,
2331 .class_init = kvmppc_host_cpu_class_init,
2333 PowerPCCPUClass *pvr_pcc;
2334 DeviceClass *dc;
2335 int i;
2337 pvr_pcc = kvm_ppc_get_host_cpu_class();
2338 if (pvr_pcc == NULL) {
2339 return -1;
2341 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2342 type_register(&type_info);
2344 #if defined(TARGET_PPC64)
2345 type_info.name = g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE, "host");
2346 type_info.parent = TYPE_SPAPR_CPU_CORE,
2347 type_info.instance_size = sizeof(sPAPRCPUCore);
2348 type_info.instance_init = NULL;
2349 type_info.class_init = spapr_cpu_core_class_init;
2350 type_info.class_data = (void *) "host";
2351 type_register(&type_info);
2352 g_free((void *)type_info.name);
2353 #endif
2356 * Update generic CPU family class alias (e.g. on a POWER8NVL host,
2357 * we want "POWER8" to be a "family" alias that points to the current
2358 * host CPU type, too)
2360 dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc));
2361 for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) {
2362 if (strcmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) {
2363 ObjectClass *oc = OBJECT_CLASS(pvr_pcc);
2364 char *suffix;
2366 ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc));
2367 suffix = strstr(ppc_cpu_aliases[i].model, "-"TYPE_POWERPC_CPU);
2368 if (suffix) {
2369 *suffix = 0;
2371 ppc_cpu_aliases[i].oc = oc;
2372 break;
2376 return 0;
2379 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2381 struct kvm_rtas_token_args args = {
2382 .token = token,
2385 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2386 return -ENOENT;
2389 strncpy(args.name, function, sizeof(args.name));
2391 return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2394 int kvmppc_get_htab_fd(bool write)
2396 struct kvm_get_htab_fd s = {
2397 .flags = write ? KVM_GET_HTAB_WRITE : 0,
2398 .start_index = 0,
2401 if (!cap_htab_fd) {
2402 fprintf(stderr, "KVM version doesn't support saving the hash table\n");
2403 return -1;
2406 return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2409 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2411 int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2412 uint8_t buf[bufsize];
2413 ssize_t rc;
2415 do {
2416 rc = read(fd, buf, bufsize);
2417 if (rc < 0) {
2418 fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2419 strerror(errno));
2420 return rc;
2421 } else if (rc) {
2422 uint8_t *buffer = buf;
2423 ssize_t n = rc;
2424 while (n) {
2425 struct kvm_get_htab_header *head =
2426 (struct kvm_get_htab_header *) buffer;
2427 size_t chunksize = sizeof(*head) +
2428 HASH_PTE_SIZE_64 * head->n_valid;
2430 qemu_put_be32(f, head->index);
2431 qemu_put_be16(f, head->n_valid);
2432 qemu_put_be16(f, head->n_invalid);
2433 qemu_put_buffer(f, (void *)(head + 1),
2434 HASH_PTE_SIZE_64 * head->n_valid);
2436 buffer += chunksize;
2437 n -= chunksize;
2440 } while ((rc != 0)
2441 && ((max_ns < 0)
2442 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2444 return (rc == 0) ? 1 : 0;
2447 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2448 uint16_t n_valid, uint16_t n_invalid)
2450 struct kvm_get_htab_header *buf;
2451 size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2452 ssize_t rc;
2454 buf = alloca(chunksize);
2455 buf->index = index;
2456 buf->n_valid = n_valid;
2457 buf->n_invalid = n_invalid;
2459 qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2461 rc = write(fd, buf, chunksize);
2462 if (rc < 0) {
2463 fprintf(stderr, "Error writing KVM hash table: %s\n",
2464 strerror(errno));
2465 return rc;
2467 if (rc != chunksize) {
2468 /* We should never get a short write on a single chunk */
2469 fprintf(stderr, "Short write, restoring KVM hash table\n");
2470 return -1;
2472 return 0;
2475 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2477 return true;
2480 void kvm_arch_init_irq_routing(KVMState *s)
2484 void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n)
2486 struct kvm_get_htab_fd ghf = {
2487 .flags = 0,
2488 .start_index = ptex,
2490 int fd, rc;
2491 int i;
2493 fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2494 if (fd < 0) {
2495 hw_error("kvmppc_read_hptes: Unable to open HPT fd");
2498 i = 0;
2499 while (i < n) {
2500 struct kvm_get_htab_header *hdr;
2501 int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP;
2502 char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64];
2504 rc = read(fd, buf, sizeof(buf));
2505 if (rc < 0) {
2506 hw_error("kvmppc_read_hptes: Unable to read HPTEs");
2509 hdr = (struct kvm_get_htab_header *)buf;
2510 while ((i < n) && ((char *)hdr < (buf + rc))) {
2511 int invalid = hdr->n_invalid;
2513 if (hdr->index != (ptex + i)) {
2514 hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32
2515 " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i);
2518 memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * hdr->n_valid);
2519 i += hdr->n_valid;
2521 if ((n - i) < invalid) {
2522 invalid = n - i;
2524 memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64);
2525 i += hdr->n_invalid;
2527 hdr = (struct kvm_get_htab_header *)
2528 ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid);
2532 close(fd);
2535 void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1)
2537 int fd, rc;
2538 struct kvm_get_htab_fd ghf;
2539 struct {
2540 struct kvm_get_htab_header hdr;
2541 uint64_t pte0;
2542 uint64_t pte1;
2543 } buf;
2545 ghf.flags = 0;
2546 ghf.start_index = 0; /* Ignored */
2547 fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2548 if (fd < 0) {
2549 hw_error("kvmppc_write_hpte: Unable to open HPT fd");
2552 buf.hdr.n_valid = 1;
2553 buf.hdr.n_invalid = 0;
2554 buf.hdr.index = ptex;
2555 buf.pte0 = cpu_to_be64(pte0);
2556 buf.pte1 = cpu_to_be64(pte1);
2558 rc = write(fd, &buf, sizeof(buf));
2559 if (rc != sizeof(buf)) {
2560 hw_error("kvmppc_write_hpte: Unable to update KVM HPT");
2562 close(fd);
2565 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2566 uint64_t address, uint32_t data, PCIDevice *dev)
2568 return 0;
2571 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2572 int vector, PCIDevice *dev)
2574 return 0;
2577 int kvm_arch_release_virq_post(int virq)
2579 return 0;
2582 int kvm_arch_msi_data_to_gsi(uint32_t data)
2584 return data & 0xffff;
2587 int kvmppc_enable_hwrng(void)
2589 if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2590 return -1;
2593 return kvmppc_enable_hcall(kvm_state, H_RANDOM);