MAINTAINERS: update Allwinner A10 maintainer
[qemu/ar7.git] / target-ppc / kvm.c
blobe641680fb1461a19b50b76f72763cf2698db4ee9
1 /*
2 * PowerPC implementation of KVM hooks
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
7 * Authors:
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
17 #include <dirent.h>
18 #include <sys/types.h>
19 #include <sys/ioctl.h>
20 #include <sys/mman.h>
21 #include <sys/vfs.h>
23 #include <linux/kvm.h>
25 #include "qemu-common.h"
26 #include "qemu/timer.h"
27 #include "sysemu/sysemu.h"
28 #include "sysemu/kvm.h"
29 #include "kvm_ppc.h"
30 #include "cpu.h"
31 #include "sysemu/cpus.h"
32 #include "sysemu/device_tree.h"
33 #include "mmu-hash64.h"
35 #include "hw/sysbus.h"
36 #include "hw/ppc/spapr.h"
37 #include "hw/ppc/spapr_vio.h"
38 #include "hw/ppc/ppc.h"
39 #include "sysemu/watchdog.h"
40 #include "trace.h"
41 #include "exec/gdbstub.h"
42 #include "exec/memattrs.h"
43 #include "sysemu/hostmem.h"
45 //#define DEBUG_KVM
47 #ifdef DEBUG_KVM
48 #define DPRINTF(fmt, ...) \
49 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
50 #else
51 #define DPRINTF(fmt, ...) \
52 do { } while (0)
53 #endif
55 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
57 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
58 KVM_CAP_LAST_INFO
61 static int cap_interrupt_unset = false;
62 static int cap_interrupt_level = false;
63 static int cap_segstate;
64 static int cap_booke_sregs;
65 static int cap_ppc_smt;
66 static int cap_ppc_rma;
67 static int cap_spapr_tce;
68 static int cap_spapr_multitce;
69 static int cap_spapr_vfio;
70 static int cap_hior;
71 static int cap_one_reg;
72 static int cap_epr;
73 static int cap_ppc_watchdog;
74 static int cap_papr;
75 static int cap_htab_fd;
76 static int cap_fixup_hcalls;
78 static uint32_t debug_inst_opcode;
80 /* XXX We have a race condition where we actually have a level triggered
81 * interrupt, but the infrastructure can't expose that yet, so the guest
82 * takes but ignores it, goes to sleep and never gets notified that there's
83 * still an interrupt pending.
85 * As a quick workaround, let's just wake up again 20 ms after we injected
86 * an interrupt. That way we can assure that we're always reinjecting
87 * interrupts in case the guest swallowed them.
89 static QEMUTimer *idle_timer;
91 static void kvm_kick_cpu(void *opaque)
93 PowerPCCPU *cpu = opaque;
95 qemu_cpu_kick(CPU(cpu));
98 static int kvm_ppc_register_host_cpu_type(void);
100 int kvm_arch_init(MachineState *ms, KVMState *s)
102 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
103 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
104 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
105 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
106 cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
107 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
108 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
109 cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
110 cap_spapr_vfio = false;
111 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
112 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
113 cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
114 cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
115 /* Note: we don't set cap_papr here, because this capability is
116 * only activated after this by kvmppc_set_papr() */
117 cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
118 cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
120 if (!cap_interrupt_level) {
121 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
122 "VM to stall at times!\n");
125 kvm_ppc_register_host_cpu_type();
127 return 0;
130 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
132 CPUPPCState *cenv = &cpu->env;
133 CPUState *cs = CPU(cpu);
134 struct kvm_sregs sregs;
135 int ret;
137 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
138 /* What we're really trying to say is "if we're on BookE, we use
139 the native PVR for now". This is the only sane way to check
140 it though, so we potentially confuse users that they can run
141 BookE guests on BookS. Let's hope nobody dares enough :) */
142 return 0;
143 } else {
144 if (!cap_segstate) {
145 fprintf(stderr, "kvm error: missing PVR setting capability\n");
146 return -ENOSYS;
150 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
151 if (ret) {
152 return ret;
155 sregs.pvr = cenv->spr[SPR_PVR];
156 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
159 /* Set up a shared TLB array with KVM */
160 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
162 CPUPPCState *env = &cpu->env;
163 CPUState *cs = CPU(cpu);
164 struct kvm_book3e_206_tlb_params params = {};
165 struct kvm_config_tlb cfg = {};
166 unsigned int entries = 0;
167 int ret, i;
169 if (!kvm_enabled() ||
170 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
171 return 0;
174 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
176 for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
177 params.tlb_sizes[i] = booke206_tlb_size(env, i);
178 params.tlb_ways[i] = booke206_tlb_ways(env, i);
179 entries += params.tlb_sizes[i];
182 assert(entries == env->nb_tlb);
183 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
185 env->tlb_dirty = true;
187 cfg.array = (uintptr_t)env->tlb.tlbm;
188 cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
189 cfg.params = (uintptr_t)&params;
190 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
192 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
193 if (ret < 0) {
194 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
195 __func__, strerror(-ret));
196 return ret;
199 env->kvm_sw_tlb = true;
200 return 0;
204 #if defined(TARGET_PPC64)
205 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
206 struct kvm_ppc_smmu_info *info)
208 CPUPPCState *env = &cpu->env;
209 CPUState *cs = CPU(cpu);
211 memset(info, 0, sizeof(*info));
213 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
214 * need to "guess" what the supported page sizes are.
216 * For that to work we make a few assumptions:
218 * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
219 * KVM which only supports 4K and 16M pages, but supports them
220 * regardless of the backing store characteritics. We also don't
221 * support 1T segments.
223 * This is safe as if HV KVM ever supports that capability or PR
224 * KVM grows supports for more page/segment sizes, those versions
225 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
226 * will not hit this fallback
228 * - Else we are running HV KVM. This means we only support page
229 * sizes that fit in the backing store. Additionally we only
230 * advertize 64K pages if the processor is ARCH 2.06 and we assume
231 * P7 encodings for the SLB and hash table. Here too, we assume
232 * support for any newer processor will mean a kernel that
233 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
234 * this fallback.
236 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
237 /* No flags */
238 info->flags = 0;
239 info->slb_size = 64;
241 /* Standard 4k base page size segment */
242 info->sps[0].page_shift = 12;
243 info->sps[0].slb_enc = 0;
244 info->sps[0].enc[0].page_shift = 12;
245 info->sps[0].enc[0].pte_enc = 0;
247 /* Standard 16M large page size segment */
248 info->sps[1].page_shift = 24;
249 info->sps[1].slb_enc = SLB_VSID_L;
250 info->sps[1].enc[0].page_shift = 24;
251 info->sps[1].enc[0].pte_enc = 0;
252 } else {
253 int i = 0;
255 /* HV KVM has backing store size restrictions */
256 info->flags = KVM_PPC_PAGE_SIZES_REAL;
258 if (env->mmu_model & POWERPC_MMU_1TSEG) {
259 info->flags |= KVM_PPC_1T_SEGMENTS;
262 if (env->mmu_model == POWERPC_MMU_2_06) {
263 info->slb_size = 32;
264 } else {
265 info->slb_size = 64;
268 /* Standard 4k base page size segment */
269 info->sps[i].page_shift = 12;
270 info->sps[i].slb_enc = 0;
271 info->sps[i].enc[0].page_shift = 12;
272 info->sps[i].enc[0].pte_enc = 0;
273 i++;
275 /* 64K on MMU 2.06 */
276 if (env->mmu_model == POWERPC_MMU_2_06) {
277 info->sps[i].page_shift = 16;
278 info->sps[i].slb_enc = 0x110;
279 info->sps[i].enc[0].page_shift = 16;
280 info->sps[i].enc[0].pte_enc = 1;
281 i++;
284 /* Standard 16M large page size segment */
285 info->sps[i].page_shift = 24;
286 info->sps[i].slb_enc = SLB_VSID_L;
287 info->sps[i].enc[0].page_shift = 24;
288 info->sps[i].enc[0].pte_enc = 0;
292 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
294 CPUState *cs = CPU(cpu);
295 int ret;
297 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
298 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
299 if (ret == 0) {
300 return;
304 kvm_get_fallback_smmu_info(cpu, info);
307 static long gethugepagesize(const char *mem_path)
309 struct statfs fs;
310 int ret;
312 do {
313 ret = statfs(mem_path, &fs);
314 } while (ret != 0 && errno == EINTR);
316 if (ret != 0) {
317 fprintf(stderr, "Couldn't statfs() memory path: %s\n",
318 strerror(errno));
319 exit(1);
322 #define HUGETLBFS_MAGIC 0x958458f6
324 if (fs.f_type != HUGETLBFS_MAGIC) {
325 /* Explicit mempath, but it's ordinary pages */
326 return getpagesize();
329 /* It's hugepage, return the huge page size */
330 return fs.f_bsize;
333 static int find_max_supported_pagesize(Object *obj, void *opaque)
335 char *mem_path;
336 long *hpsize_min = opaque;
338 if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) {
339 mem_path = object_property_get_str(obj, "mem-path", NULL);
340 if (mem_path) {
341 long hpsize = gethugepagesize(mem_path);
342 if (hpsize < *hpsize_min) {
343 *hpsize_min = hpsize;
345 } else {
346 *hpsize_min = getpagesize();
350 return 0;
353 static long getrampagesize(void)
355 long hpsize = LONG_MAX;
356 Object *memdev_root;
358 if (mem_path) {
359 return gethugepagesize(mem_path);
362 /* it's possible we have memory-backend objects with
363 * hugepage-backed RAM. these may get mapped into system
364 * address space via -numa parameters or memory hotplug
365 * hooks. we want to take these into account, but we
366 * also want to make sure these supported hugepage
367 * sizes are applicable across the entire range of memory
368 * we may boot from, so we take the min across all
369 * backends, and assume normal pages in cases where a
370 * backend isn't backed by hugepages.
372 memdev_root = object_resolve_path("/objects", NULL);
373 if (!memdev_root) {
374 return getpagesize();
377 object_child_foreach(memdev_root, find_max_supported_pagesize, &hpsize);
379 return (hpsize == LONG_MAX) ? getpagesize() : hpsize;
382 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
384 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
385 return true;
388 return (1ul << shift) <= rampgsize;
391 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
393 static struct kvm_ppc_smmu_info smmu_info;
394 static bool has_smmu_info;
395 CPUPPCState *env = &cpu->env;
396 long rampagesize;
397 int iq, ik, jq, jk;
399 /* We only handle page sizes for 64-bit server guests for now */
400 if (!(env->mmu_model & POWERPC_MMU_64)) {
401 return;
404 /* Collect MMU info from kernel if not already */
405 if (!has_smmu_info) {
406 kvm_get_smmu_info(cpu, &smmu_info);
407 has_smmu_info = true;
410 rampagesize = getrampagesize();
412 /* Convert to QEMU form */
413 memset(&env->sps, 0, sizeof(env->sps));
416 * XXX This loop should be an entry wide AND of the capabilities that
417 * the selected CPU has with the capabilities that KVM supports.
419 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
420 struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
421 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
423 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
424 ksps->page_shift)) {
425 continue;
427 qsps->page_shift = ksps->page_shift;
428 qsps->slb_enc = ksps->slb_enc;
429 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
430 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
431 ksps->enc[jk].page_shift)) {
432 continue;
434 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
435 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
436 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
437 break;
440 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
441 break;
444 env->slb_nr = smmu_info.slb_size;
445 if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
446 env->mmu_model &= ~POWERPC_MMU_1TSEG;
449 #else /* defined (TARGET_PPC64) */
451 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
455 #endif /* !defined (TARGET_PPC64) */
457 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
459 return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
462 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
463 * book3s supports only 1 watchpoint, so array size
464 * of 4 is sufficient for now.
466 #define MAX_HW_BKPTS 4
468 static struct HWBreakpoint {
469 target_ulong addr;
470 int type;
471 } hw_debug_points[MAX_HW_BKPTS];
473 static CPUWatchpoint hw_watchpoint;
475 /* Default there is no breakpoint and watchpoint supported */
476 static int max_hw_breakpoint;
477 static int max_hw_watchpoint;
478 static int nb_hw_breakpoint;
479 static int nb_hw_watchpoint;
481 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
483 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
484 max_hw_breakpoint = 2;
485 max_hw_watchpoint = 2;
488 if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
489 fprintf(stderr, "Error initializing h/w breakpoints\n");
490 return;
494 int kvm_arch_init_vcpu(CPUState *cs)
496 PowerPCCPU *cpu = POWERPC_CPU(cs);
497 CPUPPCState *cenv = &cpu->env;
498 int ret;
500 /* Gather server mmu info from KVM and update the CPU state */
501 kvm_fixup_page_sizes(cpu);
503 /* Synchronize sregs with kvm */
504 ret = kvm_arch_sync_sregs(cpu);
505 if (ret) {
506 return ret;
509 idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
511 /* Some targets support access to KVM's guest TLB. */
512 switch (cenv->mmu_model) {
513 case POWERPC_MMU_BOOKE206:
514 ret = kvm_booke206_tlb_init(cpu);
515 break;
516 default:
517 break;
520 kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
521 kvmppc_hw_debug_points_init(cenv);
523 return ret;
526 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
528 CPUPPCState *env = &cpu->env;
529 CPUState *cs = CPU(cpu);
530 struct kvm_dirty_tlb dirty_tlb;
531 unsigned char *bitmap;
532 int ret;
534 if (!env->kvm_sw_tlb) {
535 return;
538 bitmap = g_malloc((env->nb_tlb + 7) / 8);
539 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
541 dirty_tlb.bitmap = (uintptr_t)bitmap;
542 dirty_tlb.num_dirty = env->nb_tlb;
544 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
545 if (ret) {
546 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
547 __func__, strerror(-ret));
550 g_free(bitmap);
553 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
555 PowerPCCPU *cpu = POWERPC_CPU(cs);
556 CPUPPCState *env = &cpu->env;
557 union {
558 uint32_t u32;
559 uint64_t u64;
560 } val;
561 struct kvm_one_reg reg = {
562 .id = id,
563 .addr = (uintptr_t) &val,
565 int ret;
567 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
568 if (ret != 0) {
569 trace_kvm_failed_spr_get(spr, strerror(errno));
570 } else {
571 switch (id & KVM_REG_SIZE_MASK) {
572 case KVM_REG_SIZE_U32:
573 env->spr[spr] = val.u32;
574 break;
576 case KVM_REG_SIZE_U64:
577 env->spr[spr] = val.u64;
578 break;
580 default:
581 /* Don't handle this size yet */
582 abort();
587 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
589 PowerPCCPU *cpu = POWERPC_CPU(cs);
590 CPUPPCState *env = &cpu->env;
591 union {
592 uint32_t u32;
593 uint64_t u64;
594 } val;
595 struct kvm_one_reg reg = {
596 .id = id,
597 .addr = (uintptr_t) &val,
599 int ret;
601 switch (id & KVM_REG_SIZE_MASK) {
602 case KVM_REG_SIZE_U32:
603 val.u32 = env->spr[spr];
604 break;
606 case KVM_REG_SIZE_U64:
607 val.u64 = env->spr[spr];
608 break;
610 default:
611 /* Don't handle this size yet */
612 abort();
615 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
616 if (ret != 0) {
617 trace_kvm_failed_spr_set(spr, strerror(errno));
621 static int kvm_put_fp(CPUState *cs)
623 PowerPCCPU *cpu = POWERPC_CPU(cs);
624 CPUPPCState *env = &cpu->env;
625 struct kvm_one_reg reg;
626 int i;
627 int ret;
629 if (env->insns_flags & PPC_FLOAT) {
630 uint64_t fpscr = env->fpscr;
631 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
633 reg.id = KVM_REG_PPC_FPSCR;
634 reg.addr = (uintptr_t)&fpscr;
635 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
636 if (ret < 0) {
637 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
638 return ret;
641 for (i = 0; i < 32; i++) {
642 uint64_t vsr[2];
644 vsr[0] = float64_val(env->fpr[i]);
645 vsr[1] = env->vsr[i];
646 reg.addr = (uintptr_t) &vsr;
647 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
649 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
650 if (ret < 0) {
651 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
652 i, strerror(errno));
653 return ret;
658 if (env->insns_flags & PPC_ALTIVEC) {
659 reg.id = KVM_REG_PPC_VSCR;
660 reg.addr = (uintptr_t)&env->vscr;
661 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
662 if (ret < 0) {
663 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
664 return ret;
667 for (i = 0; i < 32; i++) {
668 reg.id = KVM_REG_PPC_VR(i);
669 reg.addr = (uintptr_t)&env->avr[i];
670 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
671 if (ret < 0) {
672 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
673 return ret;
678 return 0;
681 static int kvm_get_fp(CPUState *cs)
683 PowerPCCPU *cpu = POWERPC_CPU(cs);
684 CPUPPCState *env = &cpu->env;
685 struct kvm_one_reg reg;
686 int i;
687 int ret;
689 if (env->insns_flags & PPC_FLOAT) {
690 uint64_t fpscr;
691 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
693 reg.id = KVM_REG_PPC_FPSCR;
694 reg.addr = (uintptr_t)&fpscr;
695 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
696 if (ret < 0) {
697 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
698 return ret;
699 } else {
700 env->fpscr = fpscr;
703 for (i = 0; i < 32; i++) {
704 uint64_t vsr[2];
706 reg.addr = (uintptr_t) &vsr;
707 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
709 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
710 if (ret < 0) {
711 DPRINTF("Unable to get %s%d from KVM: %s\n",
712 vsx ? "VSR" : "FPR", i, strerror(errno));
713 return ret;
714 } else {
715 env->fpr[i] = vsr[0];
716 if (vsx) {
717 env->vsr[i] = vsr[1];
723 if (env->insns_flags & PPC_ALTIVEC) {
724 reg.id = KVM_REG_PPC_VSCR;
725 reg.addr = (uintptr_t)&env->vscr;
726 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
727 if (ret < 0) {
728 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
729 return ret;
732 for (i = 0; i < 32; i++) {
733 reg.id = KVM_REG_PPC_VR(i);
734 reg.addr = (uintptr_t)&env->avr[i];
735 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
736 if (ret < 0) {
737 DPRINTF("Unable to get VR%d from KVM: %s\n",
738 i, strerror(errno));
739 return ret;
744 return 0;
747 #if defined(TARGET_PPC64)
748 static int kvm_get_vpa(CPUState *cs)
750 PowerPCCPU *cpu = POWERPC_CPU(cs);
751 CPUPPCState *env = &cpu->env;
752 struct kvm_one_reg reg;
753 int ret;
755 reg.id = KVM_REG_PPC_VPA_ADDR;
756 reg.addr = (uintptr_t)&env->vpa_addr;
757 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
758 if (ret < 0) {
759 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
760 return ret;
763 assert((uintptr_t)&env->slb_shadow_size
764 == ((uintptr_t)&env->slb_shadow_addr + 8));
765 reg.id = KVM_REG_PPC_VPA_SLB;
766 reg.addr = (uintptr_t)&env->slb_shadow_addr;
767 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
768 if (ret < 0) {
769 DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
770 strerror(errno));
771 return ret;
774 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
775 reg.id = KVM_REG_PPC_VPA_DTL;
776 reg.addr = (uintptr_t)&env->dtl_addr;
777 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
778 if (ret < 0) {
779 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
780 strerror(errno));
781 return ret;
784 return 0;
787 static int kvm_put_vpa(CPUState *cs)
789 PowerPCCPU *cpu = POWERPC_CPU(cs);
790 CPUPPCState *env = &cpu->env;
791 struct kvm_one_reg reg;
792 int ret;
794 /* SLB shadow or DTL can't be registered unless a master VPA is
795 * registered. That means when restoring state, if a VPA *is*
796 * registered, we need to set that up first. If not, we need to
797 * deregister the others before deregistering the master VPA */
798 assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
800 if (env->vpa_addr) {
801 reg.id = KVM_REG_PPC_VPA_ADDR;
802 reg.addr = (uintptr_t)&env->vpa_addr;
803 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
804 if (ret < 0) {
805 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
806 return ret;
810 assert((uintptr_t)&env->slb_shadow_size
811 == ((uintptr_t)&env->slb_shadow_addr + 8));
812 reg.id = KVM_REG_PPC_VPA_SLB;
813 reg.addr = (uintptr_t)&env->slb_shadow_addr;
814 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
815 if (ret < 0) {
816 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
817 return ret;
820 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
821 reg.id = KVM_REG_PPC_VPA_DTL;
822 reg.addr = (uintptr_t)&env->dtl_addr;
823 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
824 if (ret < 0) {
825 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
826 strerror(errno));
827 return ret;
830 if (!env->vpa_addr) {
831 reg.id = KVM_REG_PPC_VPA_ADDR;
832 reg.addr = (uintptr_t)&env->vpa_addr;
833 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
834 if (ret < 0) {
835 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
836 return ret;
840 return 0;
842 #endif /* TARGET_PPC64 */
844 int kvm_arch_put_registers(CPUState *cs, int level)
846 PowerPCCPU *cpu = POWERPC_CPU(cs);
847 CPUPPCState *env = &cpu->env;
848 struct kvm_regs regs;
849 int ret;
850 int i;
852 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
853 if (ret < 0) {
854 return ret;
857 regs.ctr = env->ctr;
858 regs.lr = env->lr;
859 regs.xer = cpu_read_xer(env);
860 regs.msr = env->msr;
861 regs.pc = env->nip;
863 regs.srr0 = env->spr[SPR_SRR0];
864 regs.srr1 = env->spr[SPR_SRR1];
866 regs.sprg0 = env->spr[SPR_SPRG0];
867 regs.sprg1 = env->spr[SPR_SPRG1];
868 regs.sprg2 = env->spr[SPR_SPRG2];
869 regs.sprg3 = env->spr[SPR_SPRG3];
870 regs.sprg4 = env->spr[SPR_SPRG4];
871 regs.sprg5 = env->spr[SPR_SPRG5];
872 regs.sprg6 = env->spr[SPR_SPRG6];
873 regs.sprg7 = env->spr[SPR_SPRG7];
875 regs.pid = env->spr[SPR_BOOKE_PID];
877 for (i = 0;i < 32; i++)
878 regs.gpr[i] = env->gpr[i];
880 regs.cr = 0;
881 for (i = 0; i < 8; i++) {
882 regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
885 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
886 if (ret < 0)
887 return ret;
889 kvm_put_fp(cs);
891 if (env->tlb_dirty) {
892 kvm_sw_tlb_put(cpu);
893 env->tlb_dirty = false;
896 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
897 struct kvm_sregs sregs;
899 sregs.pvr = env->spr[SPR_PVR];
901 sregs.u.s.sdr1 = env->spr[SPR_SDR1];
903 /* Sync SLB */
904 #ifdef TARGET_PPC64
905 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
906 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
907 if (env->slb[i].esid & SLB_ESID_V) {
908 sregs.u.s.ppc64.slb[i].slbe |= i;
910 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
912 #endif
914 /* Sync SRs */
915 for (i = 0; i < 16; i++) {
916 sregs.u.s.ppc32.sr[i] = env->sr[i];
919 /* Sync BATs */
920 for (i = 0; i < 8; i++) {
921 /* Beware. We have to swap upper and lower bits here */
922 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
923 | env->DBAT[1][i];
924 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
925 | env->IBAT[1][i];
928 ret = kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
929 if (ret) {
930 return ret;
934 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
935 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
938 if (cap_one_reg) {
939 int i;
941 /* We deliberately ignore errors here, for kernels which have
942 * the ONE_REG calls, but don't support the specific
943 * registers, there's a reasonable chance things will still
944 * work, at least until we try to migrate. */
945 for (i = 0; i < 1024; i++) {
946 uint64_t id = env->spr_cb[i].one_reg_id;
948 if (id != 0) {
949 kvm_put_one_spr(cs, id, i);
953 #ifdef TARGET_PPC64
954 if (msr_ts) {
955 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
956 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
958 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
959 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
961 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
962 kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
963 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
964 kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
965 kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
966 kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
967 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
968 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
969 kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
970 kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
973 if (cap_papr) {
974 if (kvm_put_vpa(cs) < 0) {
975 DPRINTF("Warning: Unable to set VPA information to KVM\n");
979 kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
980 #endif /* TARGET_PPC64 */
983 return ret;
986 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
988 env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
991 int kvm_arch_get_registers(CPUState *cs)
993 PowerPCCPU *cpu = POWERPC_CPU(cs);
994 CPUPPCState *env = &cpu->env;
995 struct kvm_regs regs;
996 struct kvm_sregs sregs;
997 uint32_t cr;
998 int i, ret;
1000 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1001 if (ret < 0)
1002 return ret;
1004 cr = regs.cr;
1005 for (i = 7; i >= 0; i--) {
1006 env->crf[i] = cr & 15;
1007 cr >>= 4;
1010 env->ctr = regs.ctr;
1011 env->lr = regs.lr;
1012 cpu_write_xer(env, regs.xer);
1013 env->msr = regs.msr;
1014 env->nip = regs.pc;
1016 env->spr[SPR_SRR0] = regs.srr0;
1017 env->spr[SPR_SRR1] = regs.srr1;
1019 env->spr[SPR_SPRG0] = regs.sprg0;
1020 env->spr[SPR_SPRG1] = regs.sprg1;
1021 env->spr[SPR_SPRG2] = regs.sprg2;
1022 env->spr[SPR_SPRG3] = regs.sprg3;
1023 env->spr[SPR_SPRG4] = regs.sprg4;
1024 env->spr[SPR_SPRG5] = regs.sprg5;
1025 env->spr[SPR_SPRG6] = regs.sprg6;
1026 env->spr[SPR_SPRG7] = regs.sprg7;
1028 env->spr[SPR_BOOKE_PID] = regs.pid;
1030 for (i = 0;i < 32; i++)
1031 env->gpr[i] = regs.gpr[i];
1033 kvm_get_fp(cs);
1035 if (cap_booke_sregs) {
1036 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
1037 if (ret < 0) {
1038 return ret;
1041 if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1042 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1043 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1044 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1045 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1046 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1047 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1048 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1049 env->spr[SPR_DECR] = sregs.u.e.dec;
1050 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1051 env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1052 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1055 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1056 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1057 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1058 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1059 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1060 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1063 if (sregs.u.e.features & KVM_SREGS_E_64) {
1064 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1067 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1068 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1071 if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1072 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1073 kvm_sync_excp(env, POWERPC_EXCP_CRITICAL, SPR_BOOKE_IVOR0);
1074 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1075 kvm_sync_excp(env, POWERPC_EXCP_MCHECK, SPR_BOOKE_IVOR1);
1076 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1077 kvm_sync_excp(env, POWERPC_EXCP_DSI, SPR_BOOKE_IVOR2);
1078 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1079 kvm_sync_excp(env, POWERPC_EXCP_ISI, SPR_BOOKE_IVOR3);
1080 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1081 kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL, SPR_BOOKE_IVOR4);
1082 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1083 kvm_sync_excp(env, POWERPC_EXCP_ALIGN, SPR_BOOKE_IVOR5);
1084 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1085 kvm_sync_excp(env, POWERPC_EXCP_PROGRAM, SPR_BOOKE_IVOR6);
1086 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1087 kvm_sync_excp(env, POWERPC_EXCP_FPU, SPR_BOOKE_IVOR7);
1088 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1089 kvm_sync_excp(env, POWERPC_EXCP_SYSCALL, SPR_BOOKE_IVOR8);
1090 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1091 kvm_sync_excp(env, POWERPC_EXCP_APU, SPR_BOOKE_IVOR9);
1092 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1093 kvm_sync_excp(env, POWERPC_EXCP_DECR, SPR_BOOKE_IVOR10);
1094 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1095 kvm_sync_excp(env, POWERPC_EXCP_FIT, SPR_BOOKE_IVOR11);
1096 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1097 kvm_sync_excp(env, POWERPC_EXCP_WDT, SPR_BOOKE_IVOR12);
1098 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1099 kvm_sync_excp(env, POWERPC_EXCP_DTLB, SPR_BOOKE_IVOR13);
1100 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1101 kvm_sync_excp(env, POWERPC_EXCP_ITLB, SPR_BOOKE_IVOR14);
1102 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1103 kvm_sync_excp(env, POWERPC_EXCP_DEBUG, SPR_BOOKE_IVOR15);
1105 if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1106 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1107 kvm_sync_excp(env, POWERPC_EXCP_SPEU, SPR_BOOKE_IVOR32);
1108 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1109 kvm_sync_excp(env, POWERPC_EXCP_EFPDI, SPR_BOOKE_IVOR33);
1110 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1111 kvm_sync_excp(env, POWERPC_EXCP_EFPRI, SPR_BOOKE_IVOR34);
1114 if (sregs.u.e.features & KVM_SREGS_E_PM) {
1115 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1116 kvm_sync_excp(env, POWERPC_EXCP_EPERFM, SPR_BOOKE_IVOR35);
1119 if (sregs.u.e.features & KVM_SREGS_E_PC) {
1120 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1121 kvm_sync_excp(env, POWERPC_EXCP_DOORI, SPR_BOOKE_IVOR36);
1122 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1123 kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1127 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1128 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1129 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1130 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1131 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1132 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1133 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1134 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1135 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1136 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1137 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1140 if (sregs.u.e.features & KVM_SREGS_EXP) {
1141 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1144 if (sregs.u.e.features & KVM_SREGS_E_PD) {
1145 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1146 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1149 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1150 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1151 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1152 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1154 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1155 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1156 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1161 if (cap_segstate) {
1162 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
1163 if (ret < 0) {
1164 return ret;
1167 if (!env->external_htab) {
1168 ppc_store_sdr1(env, sregs.u.s.sdr1);
1171 /* Sync SLB */
1172 #ifdef TARGET_PPC64
1174 * The packed SLB array we get from KVM_GET_SREGS only contains
1175 * information about valid entries. So we flush our internal
1176 * copy to get rid of stale ones, then put all valid SLB entries
1177 * back in.
1179 memset(env->slb, 0, sizeof(env->slb));
1180 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1181 target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1182 target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1184 * Only restore valid entries
1186 if (rb & SLB_ESID_V) {
1187 ppc_store_slb(env, rb, rs);
1190 #endif
1192 /* Sync SRs */
1193 for (i = 0; i < 16; i++) {
1194 env->sr[i] = sregs.u.s.ppc32.sr[i];
1197 /* Sync BATs */
1198 for (i = 0; i < 8; i++) {
1199 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1200 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1201 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1202 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1206 if (cap_hior) {
1207 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1210 if (cap_one_reg) {
1211 int i;
1213 /* We deliberately ignore errors here, for kernels which have
1214 * the ONE_REG calls, but don't support the specific
1215 * registers, there's a reasonable chance things will still
1216 * work, at least until we try to migrate. */
1217 for (i = 0; i < 1024; i++) {
1218 uint64_t id = env->spr_cb[i].one_reg_id;
1220 if (id != 0) {
1221 kvm_get_one_spr(cs, id, i);
1225 #ifdef TARGET_PPC64
1226 if (msr_ts) {
1227 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1228 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1230 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1231 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1233 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1234 kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1235 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1236 kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1237 kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1238 kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1239 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1240 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1241 kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1242 kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1245 if (cap_papr) {
1246 if (kvm_get_vpa(cs) < 0) {
1247 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1251 kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1252 #endif
1255 return 0;
1258 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1260 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1262 if (irq != PPC_INTERRUPT_EXT) {
1263 return 0;
1266 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1267 return 0;
1270 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1272 return 0;
1275 #if defined(TARGET_PPCEMB)
1276 #define PPC_INPUT_INT PPC40x_INPUT_INT
1277 #elif defined(TARGET_PPC64)
1278 #define PPC_INPUT_INT PPC970_INPUT_INT
1279 #else
1280 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1281 #endif
1283 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1285 PowerPCCPU *cpu = POWERPC_CPU(cs);
1286 CPUPPCState *env = &cpu->env;
1287 int r;
1288 unsigned irq;
1290 qemu_mutex_lock_iothread();
1292 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1293 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1294 if (!cap_interrupt_level &&
1295 run->ready_for_interrupt_injection &&
1296 (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1297 (env->irq_input_state & (1<<PPC_INPUT_INT)))
1299 /* For now KVM disregards the 'irq' argument. However, in the
1300 * future KVM could cache it in-kernel to avoid a heavyweight exit
1301 * when reading the UIC.
1303 irq = KVM_INTERRUPT_SET;
1305 DPRINTF("injected interrupt %d\n", irq);
1306 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1307 if (r < 0) {
1308 printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1311 /* Always wake up soon in case the interrupt was level based */
1312 timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1313 (get_ticks_per_sec() / 50));
1316 /* We don't know if there are more interrupts pending after this. However,
1317 * the guest will return to userspace in the course of handling this one
1318 * anyways, so we will get a chance to deliver the rest. */
1320 qemu_mutex_unlock_iothread();
1323 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1325 return MEMTXATTRS_UNSPECIFIED;
1328 int kvm_arch_process_async_events(CPUState *cs)
1330 return cs->halted;
1333 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1335 CPUState *cs = CPU(cpu);
1336 CPUPPCState *env = &cpu->env;
1338 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1339 cs->halted = 1;
1340 cs->exception_index = EXCP_HLT;
1343 return 0;
1346 /* map dcr access to existing qemu dcr emulation */
1347 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1349 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1350 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1352 return 0;
1355 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1357 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1358 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1360 return 0;
1363 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1365 /* Mixed endian case is not handled */
1366 uint32_t sc = debug_inst_opcode;
1368 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1369 sizeof(sc), 0) ||
1370 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1371 return -EINVAL;
1374 return 0;
1377 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1379 uint32_t sc;
1381 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1382 sc != debug_inst_opcode ||
1383 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1384 sizeof(sc), 1)) {
1385 return -EINVAL;
1388 return 0;
1391 static int find_hw_breakpoint(target_ulong addr, int type)
1393 int n;
1395 assert((nb_hw_breakpoint + nb_hw_watchpoint)
1396 <= ARRAY_SIZE(hw_debug_points));
1398 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1399 if (hw_debug_points[n].addr == addr &&
1400 hw_debug_points[n].type == type) {
1401 return n;
1405 return -1;
1408 static int find_hw_watchpoint(target_ulong addr, int *flag)
1410 int n;
1412 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1413 if (n >= 0) {
1414 *flag = BP_MEM_ACCESS;
1415 return n;
1418 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1419 if (n >= 0) {
1420 *flag = BP_MEM_WRITE;
1421 return n;
1424 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1425 if (n >= 0) {
1426 *flag = BP_MEM_READ;
1427 return n;
1430 return -1;
1433 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1434 target_ulong len, int type)
1436 if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1437 return -ENOBUFS;
1440 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1441 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1443 switch (type) {
1444 case GDB_BREAKPOINT_HW:
1445 if (nb_hw_breakpoint >= max_hw_breakpoint) {
1446 return -ENOBUFS;
1449 if (find_hw_breakpoint(addr, type) >= 0) {
1450 return -EEXIST;
1453 nb_hw_breakpoint++;
1454 break;
1456 case GDB_WATCHPOINT_WRITE:
1457 case GDB_WATCHPOINT_READ:
1458 case GDB_WATCHPOINT_ACCESS:
1459 if (nb_hw_watchpoint >= max_hw_watchpoint) {
1460 return -ENOBUFS;
1463 if (find_hw_breakpoint(addr, type) >= 0) {
1464 return -EEXIST;
1467 nb_hw_watchpoint++;
1468 break;
1470 default:
1471 return -ENOSYS;
1474 return 0;
1477 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1478 target_ulong len, int type)
1480 int n;
1482 n = find_hw_breakpoint(addr, type);
1483 if (n < 0) {
1484 return -ENOENT;
1487 switch (type) {
1488 case GDB_BREAKPOINT_HW:
1489 nb_hw_breakpoint--;
1490 break;
1492 case GDB_WATCHPOINT_WRITE:
1493 case GDB_WATCHPOINT_READ:
1494 case GDB_WATCHPOINT_ACCESS:
1495 nb_hw_watchpoint--;
1496 break;
1498 default:
1499 return -ENOSYS;
1501 hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1503 return 0;
1506 void kvm_arch_remove_all_hw_breakpoints(void)
1508 nb_hw_breakpoint = nb_hw_watchpoint = 0;
1511 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1513 int n;
1515 /* Software Breakpoint updates */
1516 if (kvm_sw_breakpoints_active(cs)) {
1517 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1520 assert((nb_hw_breakpoint + nb_hw_watchpoint)
1521 <= ARRAY_SIZE(hw_debug_points));
1522 assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1524 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1525 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1526 memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1527 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1528 switch (hw_debug_points[n].type) {
1529 case GDB_BREAKPOINT_HW:
1530 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1531 break;
1532 case GDB_WATCHPOINT_WRITE:
1533 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1534 break;
1535 case GDB_WATCHPOINT_READ:
1536 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1537 break;
1538 case GDB_WATCHPOINT_ACCESS:
1539 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1540 KVMPPC_DEBUG_WATCH_READ;
1541 break;
1542 default:
1543 cpu_abort(cs, "Unsupported breakpoint type\n");
1545 dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1550 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1552 CPUState *cs = CPU(cpu);
1553 CPUPPCState *env = &cpu->env;
1554 struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1555 int handle = 0;
1556 int n;
1557 int flag = 0;
1559 if (cs->singlestep_enabled) {
1560 handle = 1;
1561 } else if (arch_info->status) {
1562 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1563 if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1564 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1565 if (n >= 0) {
1566 handle = 1;
1568 } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1569 KVMPPC_DEBUG_WATCH_WRITE)) {
1570 n = find_hw_watchpoint(arch_info->address, &flag);
1571 if (n >= 0) {
1572 handle = 1;
1573 cs->watchpoint_hit = &hw_watchpoint;
1574 hw_watchpoint.vaddr = hw_debug_points[n].addr;
1575 hw_watchpoint.flags = flag;
1579 } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1580 handle = 1;
1581 } else {
1582 /* QEMU is not able to handle debug exception, so inject
1583 * program exception to guest;
1584 * Yes program exception NOT debug exception !!
1585 * When QEMU is using debug resources then debug exception must
1586 * be always set. To achieve this we set MSR_DE and also set
1587 * MSRP_DEP so guest cannot change MSR_DE.
1588 * When emulating debug resource for guest we want guest
1589 * to control MSR_DE (enable/disable debug interrupt on need).
1590 * Supporting both configurations are NOT possible.
1591 * So the result is that we cannot share debug resources
1592 * between QEMU and Guest on BOOKE architecture.
1593 * In the current design QEMU gets the priority over guest,
1594 * this means that if QEMU is using debug resources then guest
1595 * cannot use them;
1596 * For software breakpoint QEMU uses a privileged instruction;
1597 * So there cannot be any reason that we are here for guest
1598 * set debug exception, only possibility is guest executed a
1599 * privileged / illegal instruction and that's why we are
1600 * injecting a program interrupt.
1603 cpu_synchronize_state(cs);
1604 /* env->nip is PC, so increment this by 4 to use
1605 * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1607 env->nip += 4;
1608 cs->exception_index = POWERPC_EXCP_PROGRAM;
1609 env->error_code = POWERPC_EXCP_INVAL;
1610 ppc_cpu_do_interrupt(cs);
1613 return handle;
1616 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1618 PowerPCCPU *cpu = POWERPC_CPU(cs);
1619 CPUPPCState *env = &cpu->env;
1620 int ret;
1622 qemu_mutex_lock_iothread();
1624 switch (run->exit_reason) {
1625 case KVM_EXIT_DCR:
1626 if (run->dcr.is_write) {
1627 DPRINTF("handle dcr write\n");
1628 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1629 } else {
1630 DPRINTF("handle dcr read\n");
1631 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1633 break;
1634 case KVM_EXIT_HLT:
1635 DPRINTF("handle halt\n");
1636 ret = kvmppc_handle_halt(cpu);
1637 break;
1638 #if defined(TARGET_PPC64)
1639 case KVM_EXIT_PAPR_HCALL:
1640 DPRINTF("handle PAPR hypercall\n");
1641 run->papr_hcall.ret = spapr_hypercall(cpu,
1642 run->papr_hcall.nr,
1643 run->papr_hcall.args);
1644 ret = 0;
1645 break;
1646 #endif
1647 case KVM_EXIT_EPR:
1648 DPRINTF("handle epr\n");
1649 run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1650 ret = 0;
1651 break;
1652 case KVM_EXIT_WATCHDOG:
1653 DPRINTF("handle watchdog expiry\n");
1654 watchdog_perform_action();
1655 ret = 0;
1656 break;
1658 case KVM_EXIT_DEBUG:
1659 DPRINTF("handle debug exception\n");
1660 if (kvm_handle_debug(cpu, run)) {
1661 ret = EXCP_DEBUG;
1662 break;
1664 /* re-enter, this exception was guest-internal */
1665 ret = 0;
1666 break;
1668 default:
1669 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1670 ret = -1;
1671 break;
1674 qemu_mutex_unlock_iothread();
1675 return ret;
1678 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1680 CPUState *cs = CPU(cpu);
1681 uint32_t bits = tsr_bits;
1682 struct kvm_one_reg reg = {
1683 .id = KVM_REG_PPC_OR_TSR,
1684 .addr = (uintptr_t) &bits,
1687 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1690 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1693 CPUState *cs = CPU(cpu);
1694 uint32_t bits = tsr_bits;
1695 struct kvm_one_reg reg = {
1696 .id = KVM_REG_PPC_CLEAR_TSR,
1697 .addr = (uintptr_t) &bits,
1700 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1703 int kvmppc_set_tcr(PowerPCCPU *cpu)
1705 CPUState *cs = CPU(cpu);
1706 CPUPPCState *env = &cpu->env;
1707 uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1709 struct kvm_one_reg reg = {
1710 .id = KVM_REG_PPC_TCR,
1711 .addr = (uintptr_t) &tcr,
1714 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1717 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1719 CPUState *cs = CPU(cpu);
1720 int ret;
1722 if (!kvm_enabled()) {
1723 return -1;
1726 if (!cap_ppc_watchdog) {
1727 printf("warning: KVM does not support watchdog");
1728 return -1;
1731 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1732 if (ret < 0) {
1733 fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1734 __func__, strerror(-ret));
1735 return ret;
1738 return ret;
1741 static int read_cpuinfo(const char *field, char *value, int len)
1743 FILE *f;
1744 int ret = -1;
1745 int field_len = strlen(field);
1746 char line[512];
1748 f = fopen("/proc/cpuinfo", "r");
1749 if (!f) {
1750 return -1;
1753 do {
1754 if (!fgets(line, sizeof(line), f)) {
1755 break;
1757 if (!strncmp(line, field, field_len)) {
1758 pstrcpy(value, len, line);
1759 ret = 0;
1760 break;
1762 } while(*line);
1764 fclose(f);
1766 return ret;
1769 uint32_t kvmppc_get_tbfreq(void)
1771 char line[512];
1772 char *ns;
1773 uint32_t retval = get_ticks_per_sec();
1775 if (read_cpuinfo("timebase", line, sizeof(line))) {
1776 return retval;
1779 if (!(ns = strchr(line, ':'))) {
1780 return retval;
1783 ns++;
1785 retval = atoi(ns);
1786 return retval;
1789 bool kvmppc_get_host_serial(char **value)
1791 return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1792 NULL);
1795 bool kvmppc_get_host_model(char **value)
1797 return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1800 /* Try to find a device tree node for a CPU with clock-frequency property */
1801 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1803 struct dirent *dirp;
1804 DIR *dp;
1806 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1807 printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1808 return -1;
1811 buf[0] = '\0';
1812 while ((dirp = readdir(dp)) != NULL) {
1813 FILE *f;
1814 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1815 dirp->d_name);
1816 f = fopen(buf, "r");
1817 if (f) {
1818 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1819 fclose(f);
1820 break;
1822 buf[0] = '\0';
1824 closedir(dp);
1825 if (buf[0] == '\0') {
1826 printf("Unknown host!\n");
1827 return -1;
1830 return 0;
1833 /* Read a CPU node property from the host device tree that's a single
1834 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1835 * (can't find or open the property, or doesn't understand the
1836 * format) */
1837 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1839 char buf[PATH_MAX], *tmp;
1840 union {
1841 uint32_t v32;
1842 uint64_t v64;
1843 } u;
1844 FILE *f;
1845 int len;
1847 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1848 return -1;
1851 tmp = g_strdup_printf("%s/%s", buf, propname);
1853 f = fopen(tmp, "rb");
1854 g_free(tmp);
1855 if (!f) {
1856 return -1;
1859 len = fread(&u, 1, sizeof(u), f);
1860 fclose(f);
1861 switch (len) {
1862 case 4:
1863 /* property is a 32-bit quantity */
1864 return be32_to_cpu(u.v32);
1865 case 8:
1866 return be64_to_cpu(u.v64);
1869 return 0;
1872 uint64_t kvmppc_get_clockfreq(void)
1874 return kvmppc_read_int_cpu_dt("clock-frequency");
1877 uint32_t kvmppc_get_vmx(void)
1879 return kvmppc_read_int_cpu_dt("ibm,vmx");
1882 uint32_t kvmppc_get_dfp(void)
1884 return kvmppc_read_int_cpu_dt("ibm,dfp");
1887 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1889 PowerPCCPU *cpu = ppc_env_get_cpu(env);
1890 CPUState *cs = CPU(cpu);
1892 if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1893 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1894 return 0;
1897 return 1;
1900 int kvmppc_get_hasidle(CPUPPCState *env)
1902 struct kvm_ppc_pvinfo pvinfo;
1904 if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1905 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1906 return 1;
1909 return 0;
1912 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1914 uint32_t *hc = (uint32_t*)buf;
1915 struct kvm_ppc_pvinfo pvinfo;
1917 if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1918 memcpy(buf, pvinfo.hcall, buf_len);
1919 return 0;
1923 * Fallback to always fail hypercalls regardless of endianness:
1925 * tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
1926 * li r3, -1
1927 * b .+8 (becomes nop in wrong endian)
1928 * bswap32(li r3, -1)
1931 hc[0] = cpu_to_be32(0x08000048);
1932 hc[1] = cpu_to_be32(0x3860ffff);
1933 hc[2] = cpu_to_be32(0x48000008);
1934 hc[3] = cpu_to_be32(bswap32(0x3860ffff));
1936 return 0;
1939 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
1941 return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
1944 void kvmppc_enable_logical_ci_hcalls(void)
1947 * FIXME: it would be nice if we could detect the cases where
1948 * we're using a device which requires the in kernel
1949 * implementation of these hcalls, but the kernel lacks them and
1950 * produce a warning.
1952 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
1953 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
1956 void kvmppc_enable_set_mode_hcall(void)
1958 kvmppc_enable_hcall(kvm_state, H_SET_MODE);
1961 void kvmppc_set_papr(PowerPCCPU *cpu)
1963 CPUState *cs = CPU(cpu);
1964 int ret;
1966 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
1967 if (ret) {
1968 cpu_abort(cs, "This KVM version does not support PAPR\n");
1971 /* Update the capability flag so we sync the right information
1972 * with kvm */
1973 cap_papr = 1;
1976 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t cpu_version)
1978 return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &cpu_version);
1981 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
1983 CPUState *cs = CPU(cpu);
1984 int ret;
1986 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
1987 if (ret && mpic_proxy) {
1988 cpu_abort(cs, "This KVM version does not support EPR\n");
1992 int kvmppc_smt_threads(void)
1994 return cap_ppc_smt ? cap_ppc_smt : 1;
1997 #ifdef TARGET_PPC64
1998 off_t kvmppc_alloc_rma(void **rma)
2000 off_t size;
2001 int fd;
2002 struct kvm_allocate_rma ret;
2004 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2005 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2006 * not necessary on this hardware
2007 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2009 * FIXME: We should allow the user to force contiguous RMA
2010 * allocation in the cap_ppc_rma==1 case.
2012 if (cap_ppc_rma < 2) {
2013 return 0;
2016 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
2017 if (fd < 0) {
2018 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2019 strerror(errno));
2020 return -1;
2023 size = MIN(ret.rma_size, 256ul << 20);
2025 *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2026 if (*rma == MAP_FAILED) {
2027 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
2028 return -1;
2031 return size;
2034 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2036 struct kvm_ppc_smmu_info info;
2037 long rampagesize, best_page_shift;
2038 int i;
2040 if (cap_ppc_rma >= 2) {
2041 return current_size;
2044 /* Find the largest hardware supported page size that's less than
2045 * or equal to the (logical) backing page size of guest RAM */
2046 kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2047 rampagesize = getrampagesize();
2048 best_page_shift = 0;
2050 for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2051 struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2053 if (!sps->page_shift) {
2054 continue;
2057 if ((sps->page_shift > best_page_shift)
2058 && ((1UL << sps->page_shift) <= rampagesize)) {
2059 best_page_shift = sps->page_shift;
2063 return MIN(current_size,
2064 1ULL << (best_page_shift + hash_shift - 7));
2066 #endif
2068 bool kvmppc_spapr_use_multitce(void)
2070 return cap_spapr_multitce;
2073 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
2074 bool vfio_accel)
2076 struct kvm_create_spapr_tce args = {
2077 .liobn = liobn,
2078 .window_size = window_size,
2080 long len;
2081 int fd;
2082 void *table;
2084 /* Must set fd to -1 so we don't try to munmap when called for
2085 * destroying the table, which the upper layers -will- do
2087 *pfd = -1;
2088 if (!cap_spapr_tce || (vfio_accel && !cap_spapr_vfio)) {
2089 return NULL;
2092 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2093 if (fd < 0) {
2094 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2095 liobn);
2096 return NULL;
2099 len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
2100 /* FIXME: round this up to page size */
2102 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2103 if (table == MAP_FAILED) {
2104 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2105 liobn);
2106 close(fd);
2107 return NULL;
2110 *pfd = fd;
2111 return table;
2114 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2116 long len;
2118 if (fd < 0) {
2119 return -1;
2122 len = nb_table * sizeof(uint64_t);
2123 if ((munmap(table, len) < 0) ||
2124 (close(fd) < 0)) {
2125 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2126 strerror(errno));
2127 /* Leak the table */
2130 return 0;
2133 int kvmppc_reset_htab(int shift_hint)
2135 uint32_t shift = shift_hint;
2137 if (!kvm_enabled()) {
2138 /* Full emulation, tell caller to allocate htab itself */
2139 return 0;
2141 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2142 int ret;
2143 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2144 if (ret == -ENOTTY) {
2145 /* At least some versions of PR KVM advertise the
2146 * capability, but don't implement the ioctl(). Oops.
2147 * Return 0 so that we allocate the htab in qemu, as is
2148 * correct for PR. */
2149 return 0;
2150 } else if (ret < 0) {
2151 return ret;
2153 return shift;
2156 /* We have a kernel that predates the htab reset calls. For PR
2157 * KVM, we need to allocate the htab ourselves, for an HV KVM of
2158 * this era, it has allocated a 16MB fixed size hash table
2159 * already. Kernels of this era have the GET_PVINFO capability
2160 * only on PR, so we use this hack to determine the right
2161 * answer */
2162 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
2163 /* PR - tell caller to allocate htab */
2164 return 0;
2165 } else {
2166 /* HV - assume 16MB kernel allocated htab */
2167 return 24;
2171 static inline uint32_t mfpvr(void)
2173 uint32_t pvr;
2175 asm ("mfpvr %0"
2176 : "=r"(pvr));
2177 return pvr;
2180 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2182 if (on) {
2183 *word |= flags;
2184 } else {
2185 *word &= ~flags;
2189 static void kvmppc_host_cpu_initfn(Object *obj)
2191 assert(kvm_enabled());
2194 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2196 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2197 uint32_t vmx = kvmppc_get_vmx();
2198 uint32_t dfp = kvmppc_get_dfp();
2199 uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2200 uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2202 /* Now fix up the class with information we can query from the host */
2203 pcc->pvr = mfpvr();
2205 if (vmx != -1) {
2206 /* Only override when we know what the host supports */
2207 alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
2208 alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
2210 if (dfp != -1) {
2211 /* Only override when we know what the host supports */
2212 alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
2215 if (dcache_size != -1) {
2216 pcc->l1_dcache_size = dcache_size;
2219 if (icache_size != -1) {
2220 pcc->l1_icache_size = icache_size;
2224 bool kvmppc_has_cap_epr(void)
2226 return cap_epr;
2229 bool kvmppc_has_cap_htab_fd(void)
2231 return cap_htab_fd;
2234 bool kvmppc_has_cap_fixup_hcalls(void)
2236 return cap_fixup_hcalls;
2239 static PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc)
2241 ObjectClass *oc = OBJECT_CLASS(pcc);
2243 while (oc && !object_class_is_abstract(oc)) {
2244 oc = object_class_get_parent(oc);
2246 assert(oc);
2248 return POWERPC_CPU_CLASS(oc);
2251 static int kvm_ppc_register_host_cpu_type(void)
2253 TypeInfo type_info = {
2254 .name = TYPE_HOST_POWERPC_CPU,
2255 .instance_init = kvmppc_host_cpu_initfn,
2256 .class_init = kvmppc_host_cpu_class_init,
2258 uint32_t host_pvr = mfpvr();
2259 PowerPCCPUClass *pvr_pcc;
2260 DeviceClass *dc;
2262 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2263 if (pvr_pcc == NULL) {
2264 pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2266 if (pvr_pcc == NULL) {
2267 return -1;
2269 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2270 type_register(&type_info);
2272 /* Register generic family CPU class for a family */
2273 pvr_pcc = ppc_cpu_get_family_class(pvr_pcc);
2274 dc = DEVICE_CLASS(pvr_pcc);
2275 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2276 type_info.name = g_strdup_printf("%s-"TYPE_POWERPC_CPU, dc->desc);
2277 type_register(&type_info);
2279 return 0;
2282 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2284 struct kvm_rtas_token_args args = {
2285 .token = token,
2288 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2289 return -ENOENT;
2292 strncpy(args.name, function, sizeof(args.name));
2294 return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2297 int kvmppc_get_htab_fd(bool write)
2299 struct kvm_get_htab_fd s = {
2300 .flags = write ? KVM_GET_HTAB_WRITE : 0,
2301 .start_index = 0,
2304 if (!cap_htab_fd) {
2305 fprintf(stderr, "KVM version doesn't support saving the hash table\n");
2306 return -1;
2309 return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2312 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2314 int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2315 uint8_t buf[bufsize];
2316 ssize_t rc;
2318 do {
2319 rc = read(fd, buf, bufsize);
2320 if (rc < 0) {
2321 fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2322 strerror(errno));
2323 return rc;
2324 } else if (rc) {
2325 uint8_t *buffer = buf;
2326 ssize_t n = rc;
2327 while (n) {
2328 struct kvm_get_htab_header *head =
2329 (struct kvm_get_htab_header *) buffer;
2330 size_t chunksize = sizeof(*head) +
2331 HASH_PTE_SIZE_64 * head->n_valid;
2333 qemu_put_be32(f, head->index);
2334 qemu_put_be16(f, head->n_valid);
2335 qemu_put_be16(f, head->n_invalid);
2336 qemu_put_buffer(f, (void *)(head + 1),
2337 HASH_PTE_SIZE_64 * head->n_valid);
2339 buffer += chunksize;
2340 n -= chunksize;
2343 } while ((rc != 0)
2344 && ((max_ns < 0)
2345 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2347 return (rc == 0) ? 1 : 0;
2350 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2351 uint16_t n_valid, uint16_t n_invalid)
2353 struct kvm_get_htab_header *buf;
2354 size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2355 ssize_t rc;
2357 buf = alloca(chunksize);
2358 buf->index = index;
2359 buf->n_valid = n_valid;
2360 buf->n_invalid = n_invalid;
2362 qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2364 rc = write(fd, buf, chunksize);
2365 if (rc < 0) {
2366 fprintf(stderr, "Error writing KVM hash table: %s\n",
2367 strerror(errno));
2368 return rc;
2370 if (rc != chunksize) {
2371 /* We should never get a short write on a single chunk */
2372 fprintf(stderr, "Short write, restoring KVM hash table\n");
2373 return -1;
2375 return 0;
2378 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2380 return true;
2383 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
2385 return 1;
2388 int kvm_arch_on_sigbus(int code, void *addr)
2390 return 1;
2393 void kvm_arch_init_irq_routing(KVMState *s)
2397 struct kvm_get_htab_buf {
2398 struct kvm_get_htab_header header;
2400 * We require one extra byte for read
2402 target_ulong hpte[(HPTES_PER_GROUP * 2) + 1];
2405 uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index)
2407 int htab_fd;
2408 struct kvm_get_htab_fd ghf;
2409 struct kvm_get_htab_buf *hpte_buf;
2411 ghf.flags = 0;
2412 ghf.start_index = pte_index;
2413 htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2414 if (htab_fd < 0) {
2415 goto error_out;
2418 hpte_buf = g_malloc0(sizeof(*hpte_buf));
2420 * Read the hpte group
2422 if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) {
2423 goto out_close;
2426 close(htab_fd);
2427 return (uint64_t)(uintptr_t) hpte_buf->hpte;
2429 out_close:
2430 g_free(hpte_buf);
2431 close(htab_fd);
2432 error_out:
2433 return 0;
2436 void kvmppc_hash64_free_pteg(uint64_t token)
2438 struct kvm_get_htab_buf *htab_buf;
2440 htab_buf = container_of((void *)(uintptr_t) token, struct kvm_get_htab_buf,
2441 hpte);
2442 g_free(htab_buf);
2443 return;
2446 void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
2447 target_ulong pte0, target_ulong pte1)
2449 int htab_fd;
2450 struct kvm_get_htab_fd ghf;
2451 struct kvm_get_htab_buf hpte_buf;
2453 ghf.flags = 0;
2454 ghf.start_index = 0; /* Ignored */
2455 htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2456 if (htab_fd < 0) {
2457 goto error_out;
2460 hpte_buf.header.n_valid = 1;
2461 hpte_buf.header.n_invalid = 0;
2462 hpte_buf.header.index = pte_index;
2463 hpte_buf.hpte[0] = pte0;
2464 hpte_buf.hpte[1] = pte1;
2466 * Write the hpte entry.
2467 * CAUTION: write() has the warn_unused_result attribute. Hence we
2468 * need to check the return value, even though we do nothing.
2470 if (write(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) {
2471 goto out_close;
2474 out_close:
2475 close(htab_fd);
2476 return;
2478 error_out:
2479 return;
2482 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2483 uint64_t address, uint32_t data)
2485 return 0;
2488 int kvm_arch_msi_data_to_gsi(uint32_t data)
2490 return data & 0xffff;
2493 int kvmppc_enable_hwrng(void)
2495 if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2496 return -1;
2499 return kvmppc_enable_hcall(kvm_state, H_RANDOM);