ide: fix device_reset to not ignore pending AIO
[qemu.git] / target-ppc / kvm.c
blob70ca29637f947ac404385a8e00b1e57a859ba2f3
1 /*
2 * PowerPC implementation of KVM hooks
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
7 * Authors:
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
17 #include "qemu/osdep.h"
18 #include <dirent.h>
19 #include <sys/ioctl.h>
20 #include <sys/mman.h>
21 #include <sys/vfs.h>
23 #include <linux/kvm.h>
25 #include "qemu-common.h"
26 #include "qemu/timer.h"
27 #include "sysemu/sysemu.h"
28 #include "sysemu/kvm.h"
29 #include "kvm_ppc.h"
30 #include "cpu.h"
31 #include "sysemu/cpus.h"
32 #include "sysemu/device_tree.h"
33 #include "mmu-hash64.h"
35 #include "hw/sysbus.h"
36 #include "hw/ppc/spapr.h"
37 #include "hw/ppc/spapr_vio.h"
38 #include "hw/ppc/ppc.h"
39 #include "sysemu/watchdog.h"
40 #include "trace.h"
41 #include "exec/gdbstub.h"
42 #include "exec/memattrs.h"
43 #include "sysemu/hostmem.h"
45 //#define DEBUG_KVM
47 #ifdef DEBUG_KVM
48 #define DPRINTF(fmt, ...) \
49 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
50 #else
51 #define DPRINTF(fmt, ...) \
52 do { } while (0)
53 #endif
55 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
57 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
58 KVM_CAP_LAST_INFO
61 static int cap_interrupt_unset = false;
62 static int cap_interrupt_level = false;
63 static int cap_segstate;
64 static int cap_booke_sregs;
65 static int cap_ppc_smt;
66 static int cap_ppc_rma;
67 static int cap_spapr_tce;
68 static int cap_spapr_multitce;
69 static int cap_spapr_vfio;
70 static int cap_hior;
71 static int cap_one_reg;
72 static int cap_epr;
73 static int cap_ppc_watchdog;
74 static int cap_papr;
75 static int cap_htab_fd;
76 static int cap_fixup_hcalls;
78 static uint32_t debug_inst_opcode;
80 /* XXX We have a race condition where we actually have a level triggered
81 * interrupt, but the infrastructure can't expose that yet, so the guest
82 * takes but ignores it, goes to sleep and never gets notified that there's
83 * still an interrupt pending.
85 * As a quick workaround, let's just wake up again 20 ms after we injected
86 * an interrupt. That way we can assure that we're always reinjecting
87 * interrupts in case the guest swallowed them.
89 static QEMUTimer *idle_timer;
91 static void kvm_kick_cpu(void *opaque)
93 PowerPCCPU *cpu = opaque;
95 qemu_cpu_kick(CPU(cpu));
98 static int kvm_ppc_register_host_cpu_type(void);
100 int kvm_arch_init(MachineState *ms, KVMState *s)
102 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
103 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
104 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
105 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
106 cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
107 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
108 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
109 cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
110 cap_spapr_vfio = false;
111 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
112 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
113 cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
114 cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
115 /* Note: we don't set cap_papr here, because this capability is
116 * only activated after this by kvmppc_set_papr() */
117 cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
118 cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
120 if (!cap_interrupt_level) {
121 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
122 "VM to stall at times!\n");
125 kvm_ppc_register_host_cpu_type();
127 return 0;
130 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
132 CPUPPCState *cenv = &cpu->env;
133 CPUState *cs = CPU(cpu);
134 struct kvm_sregs sregs;
135 int ret;
137 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
138 /* What we're really trying to say is "if we're on BookE, we use
139 the native PVR for now". This is the only sane way to check
140 it though, so we potentially confuse users that they can run
141 BookE guests on BookS. Let's hope nobody dares enough :) */
142 return 0;
143 } else {
144 if (!cap_segstate) {
145 fprintf(stderr, "kvm error: missing PVR setting capability\n");
146 return -ENOSYS;
150 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
151 if (ret) {
152 return ret;
155 sregs.pvr = cenv->spr[SPR_PVR];
156 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
159 /* Set up a shared TLB array with KVM */
160 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
162 CPUPPCState *env = &cpu->env;
163 CPUState *cs = CPU(cpu);
164 struct kvm_book3e_206_tlb_params params = {};
165 struct kvm_config_tlb cfg = {};
166 unsigned int entries = 0;
167 int ret, i;
169 if (!kvm_enabled() ||
170 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
171 return 0;
174 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
176 for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
177 params.tlb_sizes[i] = booke206_tlb_size(env, i);
178 params.tlb_ways[i] = booke206_tlb_ways(env, i);
179 entries += params.tlb_sizes[i];
182 assert(entries == env->nb_tlb);
183 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
185 env->tlb_dirty = true;
187 cfg.array = (uintptr_t)env->tlb.tlbm;
188 cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
189 cfg.params = (uintptr_t)&params;
190 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
192 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
193 if (ret < 0) {
194 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
195 __func__, strerror(-ret));
196 return ret;
199 env->kvm_sw_tlb = true;
200 return 0;
204 #if defined(TARGET_PPC64)
205 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
206 struct kvm_ppc_smmu_info *info)
208 CPUPPCState *env = &cpu->env;
209 CPUState *cs = CPU(cpu);
211 memset(info, 0, sizeof(*info));
213 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
214 * need to "guess" what the supported page sizes are.
216 * For that to work we make a few assumptions:
218 * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
219 * KVM which only supports 4K and 16M pages, but supports them
220 * regardless of the backing store characteritics. We also don't
221 * support 1T segments.
223 * This is safe as if HV KVM ever supports that capability or PR
224 * KVM grows supports for more page/segment sizes, those versions
225 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
226 * will not hit this fallback
228 * - Else we are running HV KVM. This means we only support page
229 * sizes that fit in the backing store. Additionally we only
230 * advertize 64K pages if the processor is ARCH 2.06 and we assume
231 * P7 encodings for the SLB and hash table. Here too, we assume
232 * support for any newer processor will mean a kernel that
233 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
234 * this fallback.
236 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
237 /* No flags */
238 info->flags = 0;
239 info->slb_size = 64;
241 /* Standard 4k base page size segment */
242 info->sps[0].page_shift = 12;
243 info->sps[0].slb_enc = 0;
244 info->sps[0].enc[0].page_shift = 12;
245 info->sps[0].enc[0].pte_enc = 0;
247 /* Standard 16M large page size segment */
248 info->sps[1].page_shift = 24;
249 info->sps[1].slb_enc = SLB_VSID_L;
250 info->sps[1].enc[0].page_shift = 24;
251 info->sps[1].enc[0].pte_enc = 0;
252 } else {
253 int i = 0;
255 /* HV KVM has backing store size restrictions */
256 info->flags = KVM_PPC_PAGE_SIZES_REAL;
258 if (env->mmu_model & POWERPC_MMU_1TSEG) {
259 info->flags |= KVM_PPC_1T_SEGMENTS;
262 if (env->mmu_model == POWERPC_MMU_2_06 ||
263 env->mmu_model == POWERPC_MMU_2_07) {
264 info->slb_size = 32;
265 } else {
266 info->slb_size = 64;
269 /* Standard 4k base page size segment */
270 info->sps[i].page_shift = 12;
271 info->sps[i].slb_enc = 0;
272 info->sps[i].enc[0].page_shift = 12;
273 info->sps[i].enc[0].pte_enc = 0;
274 i++;
276 /* 64K on MMU 2.06 and later */
277 if (env->mmu_model == POWERPC_MMU_2_06 ||
278 env->mmu_model == POWERPC_MMU_2_07) {
279 info->sps[i].page_shift = 16;
280 info->sps[i].slb_enc = 0x110;
281 info->sps[i].enc[0].page_shift = 16;
282 info->sps[i].enc[0].pte_enc = 1;
283 i++;
286 /* Standard 16M large page size segment */
287 info->sps[i].page_shift = 24;
288 info->sps[i].slb_enc = SLB_VSID_L;
289 info->sps[i].enc[0].page_shift = 24;
290 info->sps[i].enc[0].pte_enc = 0;
294 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
296 CPUState *cs = CPU(cpu);
297 int ret;
299 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
300 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
301 if (ret == 0) {
302 return;
306 kvm_get_fallback_smmu_info(cpu, info);
309 static long gethugepagesize(const char *mem_path)
311 struct statfs fs;
312 int ret;
314 do {
315 ret = statfs(mem_path, &fs);
316 } while (ret != 0 && errno == EINTR);
318 if (ret != 0) {
319 fprintf(stderr, "Couldn't statfs() memory path: %s\n",
320 strerror(errno));
321 exit(1);
324 #define HUGETLBFS_MAGIC 0x958458f6
326 if (fs.f_type != HUGETLBFS_MAGIC) {
327 /* Explicit mempath, but it's ordinary pages */
328 return getpagesize();
331 /* It's hugepage, return the huge page size */
332 return fs.f_bsize;
335 static int find_max_supported_pagesize(Object *obj, void *opaque)
337 char *mem_path;
338 long *hpsize_min = opaque;
340 if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) {
341 mem_path = object_property_get_str(obj, "mem-path", NULL);
342 if (mem_path) {
343 long hpsize = gethugepagesize(mem_path);
344 if (hpsize < *hpsize_min) {
345 *hpsize_min = hpsize;
347 } else {
348 *hpsize_min = getpagesize();
352 return 0;
355 static long getrampagesize(void)
357 long hpsize = LONG_MAX;
358 Object *memdev_root;
360 if (mem_path) {
361 return gethugepagesize(mem_path);
364 /* it's possible we have memory-backend objects with
365 * hugepage-backed RAM. these may get mapped into system
366 * address space via -numa parameters or memory hotplug
367 * hooks. we want to take these into account, but we
368 * also want to make sure these supported hugepage
369 * sizes are applicable across the entire range of memory
370 * we may boot from, so we take the min across all
371 * backends, and assume normal pages in cases where a
372 * backend isn't backed by hugepages.
374 memdev_root = object_resolve_path("/objects", NULL);
375 if (!memdev_root) {
376 return getpagesize();
379 object_child_foreach(memdev_root, find_max_supported_pagesize, &hpsize);
381 return (hpsize == LONG_MAX) ? getpagesize() : hpsize;
384 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
386 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
387 return true;
390 return (1ul << shift) <= rampgsize;
393 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
395 static struct kvm_ppc_smmu_info smmu_info;
396 static bool has_smmu_info;
397 CPUPPCState *env = &cpu->env;
398 long rampagesize;
399 int iq, ik, jq, jk;
401 /* We only handle page sizes for 64-bit server guests for now */
402 if (!(env->mmu_model & POWERPC_MMU_64)) {
403 return;
406 /* Collect MMU info from kernel if not already */
407 if (!has_smmu_info) {
408 kvm_get_smmu_info(cpu, &smmu_info);
409 has_smmu_info = true;
412 rampagesize = getrampagesize();
414 /* Convert to QEMU form */
415 memset(&env->sps, 0, sizeof(env->sps));
417 /* If we have HV KVM, we need to forbid CI large pages if our
418 * host page size is smaller than 64K.
420 if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
421 env->ci_large_pages = getpagesize() >= 0x10000;
425 * XXX This loop should be an entry wide AND of the capabilities that
426 * the selected CPU has with the capabilities that KVM supports.
428 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
429 struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
430 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
432 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
433 ksps->page_shift)) {
434 continue;
436 qsps->page_shift = ksps->page_shift;
437 qsps->slb_enc = ksps->slb_enc;
438 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
439 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
440 ksps->enc[jk].page_shift)) {
441 continue;
443 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
444 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
445 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
446 break;
449 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
450 break;
453 env->slb_nr = smmu_info.slb_size;
454 if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
455 env->mmu_model &= ~POWERPC_MMU_1TSEG;
458 #else /* defined (TARGET_PPC64) */
460 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
464 #endif /* !defined (TARGET_PPC64) */
466 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
468 return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
471 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
472 * book3s supports only 1 watchpoint, so array size
473 * of 4 is sufficient for now.
475 #define MAX_HW_BKPTS 4
477 static struct HWBreakpoint {
478 target_ulong addr;
479 int type;
480 } hw_debug_points[MAX_HW_BKPTS];
482 static CPUWatchpoint hw_watchpoint;
484 /* Default there is no breakpoint and watchpoint supported */
485 static int max_hw_breakpoint;
486 static int max_hw_watchpoint;
487 static int nb_hw_breakpoint;
488 static int nb_hw_watchpoint;
490 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
492 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
493 max_hw_breakpoint = 2;
494 max_hw_watchpoint = 2;
497 if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
498 fprintf(stderr, "Error initializing h/w breakpoints\n");
499 return;
503 int kvm_arch_init_vcpu(CPUState *cs)
505 PowerPCCPU *cpu = POWERPC_CPU(cs);
506 CPUPPCState *cenv = &cpu->env;
507 int ret;
509 /* Gather server mmu info from KVM and update the CPU state */
510 kvm_fixup_page_sizes(cpu);
512 /* Synchronize sregs with kvm */
513 ret = kvm_arch_sync_sregs(cpu);
514 if (ret) {
515 return ret;
518 idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
520 /* Some targets support access to KVM's guest TLB. */
521 switch (cenv->mmu_model) {
522 case POWERPC_MMU_BOOKE206:
523 ret = kvm_booke206_tlb_init(cpu);
524 break;
525 default:
526 break;
529 kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
530 kvmppc_hw_debug_points_init(cenv);
532 return ret;
535 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
537 CPUPPCState *env = &cpu->env;
538 CPUState *cs = CPU(cpu);
539 struct kvm_dirty_tlb dirty_tlb;
540 unsigned char *bitmap;
541 int ret;
543 if (!env->kvm_sw_tlb) {
544 return;
547 bitmap = g_malloc((env->nb_tlb + 7) / 8);
548 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
550 dirty_tlb.bitmap = (uintptr_t)bitmap;
551 dirty_tlb.num_dirty = env->nb_tlb;
553 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
554 if (ret) {
555 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
556 __func__, strerror(-ret));
559 g_free(bitmap);
562 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
564 PowerPCCPU *cpu = POWERPC_CPU(cs);
565 CPUPPCState *env = &cpu->env;
566 union {
567 uint32_t u32;
568 uint64_t u64;
569 } val;
570 struct kvm_one_reg reg = {
571 .id = id,
572 .addr = (uintptr_t) &val,
574 int ret;
576 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
577 if (ret != 0) {
578 trace_kvm_failed_spr_get(spr, strerror(errno));
579 } else {
580 switch (id & KVM_REG_SIZE_MASK) {
581 case KVM_REG_SIZE_U32:
582 env->spr[spr] = val.u32;
583 break;
585 case KVM_REG_SIZE_U64:
586 env->spr[spr] = val.u64;
587 break;
589 default:
590 /* Don't handle this size yet */
591 abort();
596 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
598 PowerPCCPU *cpu = POWERPC_CPU(cs);
599 CPUPPCState *env = &cpu->env;
600 union {
601 uint32_t u32;
602 uint64_t u64;
603 } val;
604 struct kvm_one_reg reg = {
605 .id = id,
606 .addr = (uintptr_t) &val,
608 int ret;
610 switch (id & KVM_REG_SIZE_MASK) {
611 case KVM_REG_SIZE_U32:
612 val.u32 = env->spr[spr];
613 break;
615 case KVM_REG_SIZE_U64:
616 val.u64 = env->spr[spr];
617 break;
619 default:
620 /* Don't handle this size yet */
621 abort();
624 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
625 if (ret != 0) {
626 trace_kvm_failed_spr_set(spr, strerror(errno));
630 static int kvm_put_fp(CPUState *cs)
632 PowerPCCPU *cpu = POWERPC_CPU(cs);
633 CPUPPCState *env = &cpu->env;
634 struct kvm_one_reg reg;
635 int i;
636 int ret;
638 if (env->insns_flags & PPC_FLOAT) {
639 uint64_t fpscr = env->fpscr;
640 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
642 reg.id = KVM_REG_PPC_FPSCR;
643 reg.addr = (uintptr_t)&fpscr;
644 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
645 if (ret < 0) {
646 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
647 return ret;
650 for (i = 0; i < 32; i++) {
651 uint64_t vsr[2];
653 #ifdef HOST_WORDS_BIGENDIAN
654 vsr[0] = float64_val(env->fpr[i]);
655 vsr[1] = env->vsr[i];
656 #else
657 vsr[0] = env->vsr[i];
658 vsr[1] = float64_val(env->fpr[i]);
659 #endif
660 reg.addr = (uintptr_t) &vsr;
661 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
663 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
664 if (ret < 0) {
665 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
666 i, strerror(errno));
667 return ret;
672 if (env->insns_flags & PPC_ALTIVEC) {
673 reg.id = KVM_REG_PPC_VSCR;
674 reg.addr = (uintptr_t)&env->vscr;
675 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
676 if (ret < 0) {
677 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
678 return ret;
681 for (i = 0; i < 32; i++) {
682 reg.id = KVM_REG_PPC_VR(i);
683 reg.addr = (uintptr_t)&env->avr[i];
684 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
685 if (ret < 0) {
686 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
687 return ret;
692 return 0;
695 static int kvm_get_fp(CPUState *cs)
697 PowerPCCPU *cpu = POWERPC_CPU(cs);
698 CPUPPCState *env = &cpu->env;
699 struct kvm_one_reg reg;
700 int i;
701 int ret;
703 if (env->insns_flags & PPC_FLOAT) {
704 uint64_t fpscr;
705 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
707 reg.id = KVM_REG_PPC_FPSCR;
708 reg.addr = (uintptr_t)&fpscr;
709 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
710 if (ret < 0) {
711 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
712 return ret;
713 } else {
714 env->fpscr = fpscr;
717 for (i = 0; i < 32; i++) {
718 uint64_t vsr[2];
720 reg.addr = (uintptr_t) &vsr;
721 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
723 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
724 if (ret < 0) {
725 DPRINTF("Unable to get %s%d from KVM: %s\n",
726 vsx ? "VSR" : "FPR", i, strerror(errno));
727 return ret;
728 } else {
729 #ifdef HOST_WORDS_BIGENDIAN
730 env->fpr[i] = vsr[0];
731 if (vsx) {
732 env->vsr[i] = vsr[1];
734 #else
735 env->fpr[i] = vsr[1];
736 if (vsx) {
737 env->vsr[i] = vsr[0];
739 #endif
744 if (env->insns_flags & PPC_ALTIVEC) {
745 reg.id = KVM_REG_PPC_VSCR;
746 reg.addr = (uintptr_t)&env->vscr;
747 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
748 if (ret < 0) {
749 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
750 return ret;
753 for (i = 0; i < 32; i++) {
754 reg.id = KVM_REG_PPC_VR(i);
755 reg.addr = (uintptr_t)&env->avr[i];
756 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
757 if (ret < 0) {
758 DPRINTF("Unable to get VR%d from KVM: %s\n",
759 i, strerror(errno));
760 return ret;
765 return 0;
768 #if defined(TARGET_PPC64)
769 static int kvm_get_vpa(CPUState *cs)
771 PowerPCCPU *cpu = POWERPC_CPU(cs);
772 CPUPPCState *env = &cpu->env;
773 struct kvm_one_reg reg;
774 int ret;
776 reg.id = KVM_REG_PPC_VPA_ADDR;
777 reg.addr = (uintptr_t)&env->vpa_addr;
778 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
779 if (ret < 0) {
780 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
781 return ret;
784 assert((uintptr_t)&env->slb_shadow_size
785 == ((uintptr_t)&env->slb_shadow_addr + 8));
786 reg.id = KVM_REG_PPC_VPA_SLB;
787 reg.addr = (uintptr_t)&env->slb_shadow_addr;
788 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
789 if (ret < 0) {
790 DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
791 strerror(errno));
792 return ret;
795 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
796 reg.id = KVM_REG_PPC_VPA_DTL;
797 reg.addr = (uintptr_t)&env->dtl_addr;
798 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
799 if (ret < 0) {
800 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
801 strerror(errno));
802 return ret;
805 return 0;
808 static int kvm_put_vpa(CPUState *cs)
810 PowerPCCPU *cpu = POWERPC_CPU(cs);
811 CPUPPCState *env = &cpu->env;
812 struct kvm_one_reg reg;
813 int ret;
815 /* SLB shadow or DTL can't be registered unless a master VPA is
816 * registered. That means when restoring state, if a VPA *is*
817 * registered, we need to set that up first. If not, we need to
818 * deregister the others before deregistering the master VPA */
819 assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
821 if (env->vpa_addr) {
822 reg.id = KVM_REG_PPC_VPA_ADDR;
823 reg.addr = (uintptr_t)&env->vpa_addr;
824 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
825 if (ret < 0) {
826 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
827 return ret;
831 assert((uintptr_t)&env->slb_shadow_size
832 == ((uintptr_t)&env->slb_shadow_addr + 8));
833 reg.id = KVM_REG_PPC_VPA_SLB;
834 reg.addr = (uintptr_t)&env->slb_shadow_addr;
835 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
836 if (ret < 0) {
837 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
838 return ret;
841 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
842 reg.id = KVM_REG_PPC_VPA_DTL;
843 reg.addr = (uintptr_t)&env->dtl_addr;
844 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
845 if (ret < 0) {
846 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
847 strerror(errno));
848 return ret;
851 if (!env->vpa_addr) {
852 reg.id = KVM_REG_PPC_VPA_ADDR;
853 reg.addr = (uintptr_t)&env->vpa_addr;
854 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
855 if (ret < 0) {
856 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
857 return ret;
861 return 0;
863 #endif /* TARGET_PPC64 */
865 int kvm_arch_put_registers(CPUState *cs, int level)
867 PowerPCCPU *cpu = POWERPC_CPU(cs);
868 CPUPPCState *env = &cpu->env;
869 struct kvm_regs regs;
870 int ret;
871 int i;
873 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
874 if (ret < 0) {
875 return ret;
878 regs.ctr = env->ctr;
879 regs.lr = env->lr;
880 regs.xer = cpu_read_xer(env);
881 regs.msr = env->msr;
882 regs.pc = env->nip;
884 regs.srr0 = env->spr[SPR_SRR0];
885 regs.srr1 = env->spr[SPR_SRR1];
887 regs.sprg0 = env->spr[SPR_SPRG0];
888 regs.sprg1 = env->spr[SPR_SPRG1];
889 regs.sprg2 = env->spr[SPR_SPRG2];
890 regs.sprg3 = env->spr[SPR_SPRG3];
891 regs.sprg4 = env->spr[SPR_SPRG4];
892 regs.sprg5 = env->spr[SPR_SPRG5];
893 regs.sprg6 = env->spr[SPR_SPRG6];
894 regs.sprg7 = env->spr[SPR_SPRG7];
896 regs.pid = env->spr[SPR_BOOKE_PID];
898 for (i = 0;i < 32; i++)
899 regs.gpr[i] = env->gpr[i];
901 regs.cr = 0;
902 for (i = 0; i < 8; i++) {
903 regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
906 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
907 if (ret < 0)
908 return ret;
910 kvm_put_fp(cs);
912 if (env->tlb_dirty) {
913 kvm_sw_tlb_put(cpu);
914 env->tlb_dirty = false;
917 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
918 struct kvm_sregs sregs;
920 sregs.pvr = env->spr[SPR_PVR];
922 sregs.u.s.sdr1 = env->spr[SPR_SDR1];
924 /* Sync SLB */
925 #ifdef TARGET_PPC64
926 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
927 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
928 if (env->slb[i].esid & SLB_ESID_V) {
929 sregs.u.s.ppc64.slb[i].slbe |= i;
931 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
933 #endif
935 /* Sync SRs */
936 for (i = 0; i < 16; i++) {
937 sregs.u.s.ppc32.sr[i] = env->sr[i];
940 /* Sync BATs */
941 for (i = 0; i < 8; i++) {
942 /* Beware. We have to swap upper and lower bits here */
943 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
944 | env->DBAT[1][i];
945 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
946 | env->IBAT[1][i];
949 ret = kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
950 if (ret) {
951 return ret;
955 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
956 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
959 if (cap_one_reg) {
960 int i;
962 /* We deliberately ignore errors here, for kernels which have
963 * the ONE_REG calls, but don't support the specific
964 * registers, there's a reasonable chance things will still
965 * work, at least until we try to migrate. */
966 for (i = 0; i < 1024; i++) {
967 uint64_t id = env->spr_cb[i].one_reg_id;
969 if (id != 0) {
970 kvm_put_one_spr(cs, id, i);
974 #ifdef TARGET_PPC64
975 if (msr_ts) {
976 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
977 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
979 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
980 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
982 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
983 kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
984 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
985 kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
986 kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
987 kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
988 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
989 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
990 kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
991 kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
994 if (cap_papr) {
995 if (kvm_put_vpa(cs) < 0) {
996 DPRINTF("Warning: Unable to set VPA information to KVM\n");
1000 kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1001 #endif /* TARGET_PPC64 */
1004 return ret;
1007 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1009 env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1012 int kvm_arch_get_registers(CPUState *cs)
1014 PowerPCCPU *cpu = POWERPC_CPU(cs);
1015 CPUPPCState *env = &cpu->env;
1016 struct kvm_regs regs;
1017 struct kvm_sregs sregs;
1018 uint32_t cr;
1019 int i, ret;
1021 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1022 if (ret < 0)
1023 return ret;
1025 cr = regs.cr;
1026 for (i = 7; i >= 0; i--) {
1027 env->crf[i] = cr & 15;
1028 cr >>= 4;
1031 env->ctr = regs.ctr;
1032 env->lr = regs.lr;
1033 cpu_write_xer(env, regs.xer);
1034 env->msr = regs.msr;
1035 env->nip = regs.pc;
1037 env->spr[SPR_SRR0] = regs.srr0;
1038 env->spr[SPR_SRR1] = regs.srr1;
1040 env->spr[SPR_SPRG0] = regs.sprg0;
1041 env->spr[SPR_SPRG1] = regs.sprg1;
1042 env->spr[SPR_SPRG2] = regs.sprg2;
1043 env->spr[SPR_SPRG3] = regs.sprg3;
1044 env->spr[SPR_SPRG4] = regs.sprg4;
1045 env->spr[SPR_SPRG5] = regs.sprg5;
1046 env->spr[SPR_SPRG6] = regs.sprg6;
1047 env->spr[SPR_SPRG7] = regs.sprg7;
1049 env->spr[SPR_BOOKE_PID] = regs.pid;
1051 for (i = 0;i < 32; i++)
1052 env->gpr[i] = regs.gpr[i];
1054 kvm_get_fp(cs);
1056 if (cap_booke_sregs) {
1057 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
1058 if (ret < 0) {
1059 return ret;
1062 if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1063 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1064 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1065 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1066 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1067 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1068 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1069 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1070 env->spr[SPR_DECR] = sregs.u.e.dec;
1071 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1072 env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1073 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1076 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1077 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1078 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1079 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1080 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1081 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1084 if (sregs.u.e.features & KVM_SREGS_E_64) {
1085 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1088 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1089 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1092 if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1093 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1094 kvm_sync_excp(env, POWERPC_EXCP_CRITICAL, SPR_BOOKE_IVOR0);
1095 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1096 kvm_sync_excp(env, POWERPC_EXCP_MCHECK, SPR_BOOKE_IVOR1);
1097 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1098 kvm_sync_excp(env, POWERPC_EXCP_DSI, SPR_BOOKE_IVOR2);
1099 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1100 kvm_sync_excp(env, POWERPC_EXCP_ISI, SPR_BOOKE_IVOR3);
1101 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1102 kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL, SPR_BOOKE_IVOR4);
1103 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1104 kvm_sync_excp(env, POWERPC_EXCP_ALIGN, SPR_BOOKE_IVOR5);
1105 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1106 kvm_sync_excp(env, POWERPC_EXCP_PROGRAM, SPR_BOOKE_IVOR6);
1107 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1108 kvm_sync_excp(env, POWERPC_EXCP_FPU, SPR_BOOKE_IVOR7);
1109 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1110 kvm_sync_excp(env, POWERPC_EXCP_SYSCALL, SPR_BOOKE_IVOR8);
1111 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1112 kvm_sync_excp(env, POWERPC_EXCP_APU, SPR_BOOKE_IVOR9);
1113 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1114 kvm_sync_excp(env, POWERPC_EXCP_DECR, SPR_BOOKE_IVOR10);
1115 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1116 kvm_sync_excp(env, POWERPC_EXCP_FIT, SPR_BOOKE_IVOR11);
1117 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1118 kvm_sync_excp(env, POWERPC_EXCP_WDT, SPR_BOOKE_IVOR12);
1119 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1120 kvm_sync_excp(env, POWERPC_EXCP_DTLB, SPR_BOOKE_IVOR13);
1121 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1122 kvm_sync_excp(env, POWERPC_EXCP_ITLB, SPR_BOOKE_IVOR14);
1123 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1124 kvm_sync_excp(env, POWERPC_EXCP_DEBUG, SPR_BOOKE_IVOR15);
1126 if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1127 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1128 kvm_sync_excp(env, POWERPC_EXCP_SPEU, SPR_BOOKE_IVOR32);
1129 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1130 kvm_sync_excp(env, POWERPC_EXCP_EFPDI, SPR_BOOKE_IVOR33);
1131 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1132 kvm_sync_excp(env, POWERPC_EXCP_EFPRI, SPR_BOOKE_IVOR34);
1135 if (sregs.u.e.features & KVM_SREGS_E_PM) {
1136 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1137 kvm_sync_excp(env, POWERPC_EXCP_EPERFM, SPR_BOOKE_IVOR35);
1140 if (sregs.u.e.features & KVM_SREGS_E_PC) {
1141 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1142 kvm_sync_excp(env, POWERPC_EXCP_DOORI, SPR_BOOKE_IVOR36);
1143 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1144 kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1148 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1149 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1150 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1151 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1152 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1153 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1154 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1155 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1156 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1157 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1158 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1161 if (sregs.u.e.features & KVM_SREGS_EXP) {
1162 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1165 if (sregs.u.e.features & KVM_SREGS_E_PD) {
1166 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1167 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1170 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1171 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1172 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1173 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1175 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1176 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1177 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1182 if (cap_segstate) {
1183 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
1184 if (ret < 0) {
1185 return ret;
1188 if (!env->external_htab) {
1189 ppc_store_sdr1(env, sregs.u.s.sdr1);
1192 /* Sync SLB */
1193 #ifdef TARGET_PPC64
1195 * The packed SLB array we get from KVM_GET_SREGS only contains
1196 * information about valid entries. So we flush our internal
1197 * copy to get rid of stale ones, then put all valid SLB entries
1198 * back in.
1200 memset(env->slb, 0, sizeof(env->slb));
1201 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1202 target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1203 target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1205 * Only restore valid entries
1207 if (rb & SLB_ESID_V) {
1208 ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1211 #endif
1213 /* Sync SRs */
1214 for (i = 0; i < 16; i++) {
1215 env->sr[i] = sregs.u.s.ppc32.sr[i];
1218 /* Sync BATs */
1219 for (i = 0; i < 8; i++) {
1220 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1221 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1222 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1223 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1227 if (cap_hior) {
1228 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1231 if (cap_one_reg) {
1232 int i;
1234 /* We deliberately ignore errors here, for kernels which have
1235 * the ONE_REG calls, but don't support the specific
1236 * registers, there's a reasonable chance things will still
1237 * work, at least until we try to migrate. */
1238 for (i = 0; i < 1024; i++) {
1239 uint64_t id = env->spr_cb[i].one_reg_id;
1241 if (id != 0) {
1242 kvm_get_one_spr(cs, id, i);
1246 #ifdef TARGET_PPC64
1247 if (msr_ts) {
1248 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1249 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1251 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1252 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1254 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1255 kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1256 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1257 kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1258 kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1259 kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1260 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1261 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1262 kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1263 kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1266 if (cap_papr) {
1267 if (kvm_get_vpa(cs) < 0) {
1268 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1272 kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1273 #endif
1276 return 0;
1279 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1281 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1283 if (irq != PPC_INTERRUPT_EXT) {
1284 return 0;
1287 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1288 return 0;
1291 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1293 return 0;
1296 #if defined(TARGET_PPCEMB)
1297 #define PPC_INPUT_INT PPC40x_INPUT_INT
1298 #elif defined(TARGET_PPC64)
1299 #define PPC_INPUT_INT PPC970_INPUT_INT
1300 #else
1301 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1302 #endif
1304 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1306 PowerPCCPU *cpu = POWERPC_CPU(cs);
1307 CPUPPCState *env = &cpu->env;
1308 int r;
1309 unsigned irq;
1311 qemu_mutex_lock_iothread();
1313 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1314 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1315 if (!cap_interrupt_level &&
1316 run->ready_for_interrupt_injection &&
1317 (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1318 (env->irq_input_state & (1<<PPC_INPUT_INT)))
1320 /* For now KVM disregards the 'irq' argument. However, in the
1321 * future KVM could cache it in-kernel to avoid a heavyweight exit
1322 * when reading the UIC.
1324 irq = KVM_INTERRUPT_SET;
1326 DPRINTF("injected interrupt %d\n", irq);
1327 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1328 if (r < 0) {
1329 printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1332 /* Always wake up soon in case the interrupt was level based */
1333 timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1334 (get_ticks_per_sec() / 50));
1337 /* We don't know if there are more interrupts pending after this. However,
1338 * the guest will return to userspace in the course of handling this one
1339 * anyways, so we will get a chance to deliver the rest. */
1341 qemu_mutex_unlock_iothread();
1344 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1346 return MEMTXATTRS_UNSPECIFIED;
1349 int kvm_arch_process_async_events(CPUState *cs)
1351 return cs->halted;
1354 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1356 CPUState *cs = CPU(cpu);
1357 CPUPPCState *env = &cpu->env;
1359 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1360 cs->halted = 1;
1361 cs->exception_index = EXCP_HLT;
1364 return 0;
1367 /* map dcr access to existing qemu dcr emulation */
1368 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1370 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1371 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1373 return 0;
1376 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1378 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1379 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1381 return 0;
1384 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1386 /* Mixed endian case is not handled */
1387 uint32_t sc = debug_inst_opcode;
1389 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1390 sizeof(sc), 0) ||
1391 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1392 return -EINVAL;
1395 return 0;
1398 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1400 uint32_t sc;
1402 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1403 sc != debug_inst_opcode ||
1404 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1405 sizeof(sc), 1)) {
1406 return -EINVAL;
1409 return 0;
1412 static int find_hw_breakpoint(target_ulong addr, int type)
1414 int n;
1416 assert((nb_hw_breakpoint + nb_hw_watchpoint)
1417 <= ARRAY_SIZE(hw_debug_points));
1419 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1420 if (hw_debug_points[n].addr == addr &&
1421 hw_debug_points[n].type == type) {
1422 return n;
1426 return -1;
1429 static int find_hw_watchpoint(target_ulong addr, int *flag)
1431 int n;
1433 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1434 if (n >= 0) {
1435 *flag = BP_MEM_ACCESS;
1436 return n;
1439 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1440 if (n >= 0) {
1441 *flag = BP_MEM_WRITE;
1442 return n;
1445 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1446 if (n >= 0) {
1447 *flag = BP_MEM_READ;
1448 return n;
1451 return -1;
1454 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1455 target_ulong len, int type)
1457 if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1458 return -ENOBUFS;
1461 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1462 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1464 switch (type) {
1465 case GDB_BREAKPOINT_HW:
1466 if (nb_hw_breakpoint >= max_hw_breakpoint) {
1467 return -ENOBUFS;
1470 if (find_hw_breakpoint(addr, type) >= 0) {
1471 return -EEXIST;
1474 nb_hw_breakpoint++;
1475 break;
1477 case GDB_WATCHPOINT_WRITE:
1478 case GDB_WATCHPOINT_READ:
1479 case GDB_WATCHPOINT_ACCESS:
1480 if (nb_hw_watchpoint >= max_hw_watchpoint) {
1481 return -ENOBUFS;
1484 if (find_hw_breakpoint(addr, type) >= 0) {
1485 return -EEXIST;
1488 nb_hw_watchpoint++;
1489 break;
1491 default:
1492 return -ENOSYS;
1495 return 0;
1498 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1499 target_ulong len, int type)
1501 int n;
1503 n = find_hw_breakpoint(addr, type);
1504 if (n < 0) {
1505 return -ENOENT;
1508 switch (type) {
1509 case GDB_BREAKPOINT_HW:
1510 nb_hw_breakpoint--;
1511 break;
1513 case GDB_WATCHPOINT_WRITE:
1514 case GDB_WATCHPOINT_READ:
1515 case GDB_WATCHPOINT_ACCESS:
1516 nb_hw_watchpoint--;
1517 break;
1519 default:
1520 return -ENOSYS;
1522 hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1524 return 0;
1527 void kvm_arch_remove_all_hw_breakpoints(void)
1529 nb_hw_breakpoint = nb_hw_watchpoint = 0;
1532 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1534 int n;
1536 /* Software Breakpoint updates */
1537 if (kvm_sw_breakpoints_active(cs)) {
1538 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1541 assert((nb_hw_breakpoint + nb_hw_watchpoint)
1542 <= ARRAY_SIZE(hw_debug_points));
1543 assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1545 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1546 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1547 memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1548 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1549 switch (hw_debug_points[n].type) {
1550 case GDB_BREAKPOINT_HW:
1551 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1552 break;
1553 case GDB_WATCHPOINT_WRITE:
1554 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1555 break;
1556 case GDB_WATCHPOINT_READ:
1557 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1558 break;
1559 case GDB_WATCHPOINT_ACCESS:
1560 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1561 KVMPPC_DEBUG_WATCH_READ;
1562 break;
1563 default:
1564 cpu_abort(cs, "Unsupported breakpoint type\n");
1566 dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1571 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1573 CPUState *cs = CPU(cpu);
1574 CPUPPCState *env = &cpu->env;
1575 struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1576 int handle = 0;
1577 int n;
1578 int flag = 0;
1580 if (cs->singlestep_enabled) {
1581 handle = 1;
1582 } else if (arch_info->status) {
1583 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1584 if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1585 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1586 if (n >= 0) {
1587 handle = 1;
1589 } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1590 KVMPPC_DEBUG_WATCH_WRITE)) {
1591 n = find_hw_watchpoint(arch_info->address, &flag);
1592 if (n >= 0) {
1593 handle = 1;
1594 cs->watchpoint_hit = &hw_watchpoint;
1595 hw_watchpoint.vaddr = hw_debug_points[n].addr;
1596 hw_watchpoint.flags = flag;
1600 } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1601 handle = 1;
1602 } else {
1603 /* QEMU is not able to handle debug exception, so inject
1604 * program exception to guest;
1605 * Yes program exception NOT debug exception !!
1606 * When QEMU is using debug resources then debug exception must
1607 * be always set. To achieve this we set MSR_DE and also set
1608 * MSRP_DEP so guest cannot change MSR_DE.
1609 * When emulating debug resource for guest we want guest
1610 * to control MSR_DE (enable/disable debug interrupt on need).
1611 * Supporting both configurations are NOT possible.
1612 * So the result is that we cannot share debug resources
1613 * between QEMU and Guest on BOOKE architecture.
1614 * In the current design QEMU gets the priority over guest,
1615 * this means that if QEMU is using debug resources then guest
1616 * cannot use them;
1617 * For software breakpoint QEMU uses a privileged instruction;
1618 * So there cannot be any reason that we are here for guest
1619 * set debug exception, only possibility is guest executed a
1620 * privileged / illegal instruction and that's why we are
1621 * injecting a program interrupt.
1624 cpu_synchronize_state(cs);
1625 /* env->nip is PC, so increment this by 4 to use
1626 * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1628 env->nip += 4;
1629 cs->exception_index = POWERPC_EXCP_PROGRAM;
1630 env->error_code = POWERPC_EXCP_INVAL;
1631 ppc_cpu_do_interrupt(cs);
1634 return handle;
1637 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1639 PowerPCCPU *cpu = POWERPC_CPU(cs);
1640 CPUPPCState *env = &cpu->env;
1641 int ret;
1643 qemu_mutex_lock_iothread();
1645 switch (run->exit_reason) {
1646 case KVM_EXIT_DCR:
1647 if (run->dcr.is_write) {
1648 DPRINTF("handle dcr write\n");
1649 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1650 } else {
1651 DPRINTF("handle dcr read\n");
1652 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1654 break;
1655 case KVM_EXIT_HLT:
1656 DPRINTF("handle halt\n");
1657 ret = kvmppc_handle_halt(cpu);
1658 break;
1659 #if defined(TARGET_PPC64)
1660 case KVM_EXIT_PAPR_HCALL:
1661 DPRINTF("handle PAPR hypercall\n");
1662 run->papr_hcall.ret = spapr_hypercall(cpu,
1663 run->papr_hcall.nr,
1664 run->papr_hcall.args);
1665 ret = 0;
1666 break;
1667 #endif
1668 case KVM_EXIT_EPR:
1669 DPRINTF("handle epr\n");
1670 run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1671 ret = 0;
1672 break;
1673 case KVM_EXIT_WATCHDOG:
1674 DPRINTF("handle watchdog expiry\n");
1675 watchdog_perform_action();
1676 ret = 0;
1677 break;
1679 case KVM_EXIT_DEBUG:
1680 DPRINTF("handle debug exception\n");
1681 if (kvm_handle_debug(cpu, run)) {
1682 ret = EXCP_DEBUG;
1683 break;
1685 /* re-enter, this exception was guest-internal */
1686 ret = 0;
1687 break;
1689 default:
1690 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1691 ret = -1;
1692 break;
1695 qemu_mutex_unlock_iothread();
1696 return ret;
1699 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1701 CPUState *cs = CPU(cpu);
1702 uint32_t bits = tsr_bits;
1703 struct kvm_one_reg reg = {
1704 .id = KVM_REG_PPC_OR_TSR,
1705 .addr = (uintptr_t) &bits,
1708 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1711 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1714 CPUState *cs = CPU(cpu);
1715 uint32_t bits = tsr_bits;
1716 struct kvm_one_reg reg = {
1717 .id = KVM_REG_PPC_CLEAR_TSR,
1718 .addr = (uintptr_t) &bits,
1721 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1724 int kvmppc_set_tcr(PowerPCCPU *cpu)
1726 CPUState *cs = CPU(cpu);
1727 CPUPPCState *env = &cpu->env;
1728 uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1730 struct kvm_one_reg reg = {
1731 .id = KVM_REG_PPC_TCR,
1732 .addr = (uintptr_t) &tcr,
1735 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1738 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1740 CPUState *cs = CPU(cpu);
1741 int ret;
1743 if (!kvm_enabled()) {
1744 return -1;
1747 if (!cap_ppc_watchdog) {
1748 printf("warning: KVM does not support watchdog");
1749 return -1;
1752 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1753 if (ret < 0) {
1754 fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1755 __func__, strerror(-ret));
1756 return ret;
1759 return ret;
1762 static int read_cpuinfo(const char *field, char *value, int len)
1764 FILE *f;
1765 int ret = -1;
1766 int field_len = strlen(field);
1767 char line[512];
1769 f = fopen("/proc/cpuinfo", "r");
1770 if (!f) {
1771 return -1;
1774 do {
1775 if (!fgets(line, sizeof(line), f)) {
1776 break;
1778 if (!strncmp(line, field, field_len)) {
1779 pstrcpy(value, len, line);
1780 ret = 0;
1781 break;
1783 } while(*line);
1785 fclose(f);
1787 return ret;
1790 uint32_t kvmppc_get_tbfreq(void)
1792 char line[512];
1793 char *ns;
1794 uint32_t retval = get_ticks_per_sec();
1796 if (read_cpuinfo("timebase", line, sizeof(line))) {
1797 return retval;
1800 if (!(ns = strchr(line, ':'))) {
1801 return retval;
1804 ns++;
1806 return atoi(ns);
1809 bool kvmppc_get_host_serial(char **value)
1811 return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1812 NULL);
1815 bool kvmppc_get_host_model(char **value)
1817 return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1820 /* Try to find a device tree node for a CPU with clock-frequency property */
1821 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1823 struct dirent *dirp;
1824 DIR *dp;
1826 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1827 printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1828 return -1;
1831 buf[0] = '\0';
1832 while ((dirp = readdir(dp)) != NULL) {
1833 FILE *f;
1834 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1835 dirp->d_name);
1836 f = fopen(buf, "r");
1837 if (f) {
1838 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1839 fclose(f);
1840 break;
1842 buf[0] = '\0';
1844 closedir(dp);
1845 if (buf[0] == '\0') {
1846 printf("Unknown host!\n");
1847 return -1;
1850 return 0;
1853 static uint64_t kvmppc_read_int_dt(const char *filename)
1855 union {
1856 uint32_t v32;
1857 uint64_t v64;
1858 } u;
1859 FILE *f;
1860 int len;
1862 f = fopen(filename, "rb");
1863 if (!f) {
1864 return -1;
1867 len = fread(&u, 1, sizeof(u), f);
1868 fclose(f);
1869 switch (len) {
1870 case 4:
1871 /* property is a 32-bit quantity */
1872 return be32_to_cpu(u.v32);
1873 case 8:
1874 return be64_to_cpu(u.v64);
1877 return 0;
1880 /* Read a CPU node property from the host device tree that's a single
1881 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1882 * (can't find or open the property, or doesn't understand the
1883 * format) */
1884 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1886 char buf[PATH_MAX], *tmp;
1887 uint64_t val;
1889 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1890 return -1;
1893 tmp = g_strdup_printf("%s/%s", buf, propname);
1894 val = kvmppc_read_int_dt(tmp);
1895 g_free(tmp);
1897 return val;
1900 uint64_t kvmppc_get_clockfreq(void)
1902 return kvmppc_read_int_cpu_dt("clock-frequency");
1905 uint32_t kvmppc_get_vmx(void)
1907 return kvmppc_read_int_cpu_dt("ibm,vmx");
1910 uint32_t kvmppc_get_dfp(void)
1912 return kvmppc_read_int_cpu_dt("ibm,dfp");
1915 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1917 PowerPCCPU *cpu = ppc_env_get_cpu(env);
1918 CPUState *cs = CPU(cpu);
1920 if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1921 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1922 return 0;
1925 return 1;
1928 int kvmppc_get_hasidle(CPUPPCState *env)
1930 struct kvm_ppc_pvinfo pvinfo;
1932 if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1933 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1934 return 1;
1937 return 0;
1940 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1942 uint32_t *hc = (uint32_t*)buf;
1943 struct kvm_ppc_pvinfo pvinfo;
1945 if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1946 memcpy(buf, pvinfo.hcall, buf_len);
1947 return 0;
1951 * Fallback to always fail hypercalls regardless of endianness:
1953 * tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
1954 * li r3, -1
1955 * b .+8 (becomes nop in wrong endian)
1956 * bswap32(li r3, -1)
1959 hc[0] = cpu_to_be32(0x08000048);
1960 hc[1] = cpu_to_be32(0x3860ffff);
1961 hc[2] = cpu_to_be32(0x48000008);
1962 hc[3] = cpu_to_be32(bswap32(0x3860ffff));
1964 return 0;
1967 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
1969 return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
1972 void kvmppc_enable_logical_ci_hcalls(void)
1975 * FIXME: it would be nice if we could detect the cases where
1976 * we're using a device which requires the in kernel
1977 * implementation of these hcalls, but the kernel lacks them and
1978 * produce a warning.
1980 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
1981 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
1984 void kvmppc_enable_set_mode_hcall(void)
1986 kvmppc_enable_hcall(kvm_state, H_SET_MODE);
1989 void kvmppc_set_papr(PowerPCCPU *cpu)
1991 CPUState *cs = CPU(cpu);
1992 int ret;
1994 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
1995 if (ret) {
1996 cpu_abort(cs, "This KVM version does not support PAPR\n");
1999 /* Update the capability flag so we sync the right information
2000 * with kvm */
2001 cap_papr = 1;
2004 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t cpu_version)
2006 return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &cpu_version);
2009 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2011 CPUState *cs = CPU(cpu);
2012 int ret;
2014 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2015 if (ret && mpic_proxy) {
2016 cpu_abort(cs, "This KVM version does not support EPR\n");
2020 int kvmppc_smt_threads(void)
2022 return cap_ppc_smt ? cap_ppc_smt : 1;
2025 #ifdef TARGET_PPC64
2026 off_t kvmppc_alloc_rma(void **rma)
2028 off_t size;
2029 int fd;
2030 struct kvm_allocate_rma ret;
2032 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2033 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2034 * not necessary on this hardware
2035 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2037 * FIXME: We should allow the user to force contiguous RMA
2038 * allocation in the cap_ppc_rma==1 case.
2040 if (cap_ppc_rma < 2) {
2041 return 0;
2044 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
2045 if (fd < 0) {
2046 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2047 strerror(errno));
2048 return -1;
2051 size = MIN(ret.rma_size, 256ul << 20);
2053 *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2054 if (*rma == MAP_FAILED) {
2055 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
2056 return -1;
2059 return size;
2062 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2064 struct kvm_ppc_smmu_info info;
2065 long rampagesize, best_page_shift;
2066 int i;
2068 if (cap_ppc_rma >= 2) {
2069 return current_size;
2072 /* Find the largest hardware supported page size that's less than
2073 * or equal to the (logical) backing page size of guest RAM */
2074 kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2075 rampagesize = getrampagesize();
2076 best_page_shift = 0;
2078 for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2079 struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2081 if (!sps->page_shift) {
2082 continue;
2085 if ((sps->page_shift > best_page_shift)
2086 && ((1UL << sps->page_shift) <= rampagesize)) {
2087 best_page_shift = sps->page_shift;
2091 return MIN(current_size,
2092 1ULL << (best_page_shift + hash_shift - 7));
2094 #endif
2096 bool kvmppc_spapr_use_multitce(void)
2098 return cap_spapr_multitce;
2101 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
2102 bool need_vfio)
2104 struct kvm_create_spapr_tce args = {
2105 .liobn = liobn,
2106 .window_size = window_size,
2108 long len;
2109 int fd;
2110 void *table;
2112 /* Must set fd to -1 so we don't try to munmap when called for
2113 * destroying the table, which the upper layers -will- do
2115 *pfd = -1;
2116 if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2117 return NULL;
2120 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2121 if (fd < 0) {
2122 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2123 liobn);
2124 return NULL;
2127 len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
2128 /* FIXME: round this up to page size */
2130 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2131 if (table == MAP_FAILED) {
2132 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2133 liobn);
2134 close(fd);
2135 return NULL;
2138 *pfd = fd;
2139 return table;
2142 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2144 long len;
2146 if (fd < 0) {
2147 return -1;
2150 len = nb_table * sizeof(uint64_t);
2151 if ((munmap(table, len) < 0) ||
2152 (close(fd) < 0)) {
2153 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2154 strerror(errno));
2155 /* Leak the table */
2158 return 0;
2161 int kvmppc_reset_htab(int shift_hint)
2163 uint32_t shift = shift_hint;
2165 if (!kvm_enabled()) {
2166 /* Full emulation, tell caller to allocate htab itself */
2167 return 0;
2169 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2170 int ret;
2171 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2172 if (ret == -ENOTTY) {
2173 /* At least some versions of PR KVM advertise the
2174 * capability, but don't implement the ioctl(). Oops.
2175 * Return 0 so that we allocate the htab in qemu, as is
2176 * correct for PR. */
2177 return 0;
2178 } else if (ret < 0) {
2179 return ret;
2181 return shift;
2184 /* We have a kernel that predates the htab reset calls. For PR
2185 * KVM, we need to allocate the htab ourselves, for an HV KVM of
2186 * this era, it has allocated a 16MB fixed size hash table
2187 * already. Kernels of this era have the GET_PVINFO capability
2188 * only on PR, so we use this hack to determine the right
2189 * answer */
2190 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
2191 /* PR - tell caller to allocate htab */
2192 return 0;
2193 } else {
2194 /* HV - assume 16MB kernel allocated htab */
2195 return 24;
2199 static inline uint32_t mfpvr(void)
2201 uint32_t pvr;
2203 asm ("mfpvr %0"
2204 : "=r"(pvr));
2205 return pvr;
2208 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2210 if (on) {
2211 *word |= flags;
2212 } else {
2213 *word &= ~flags;
2217 static void kvmppc_host_cpu_initfn(Object *obj)
2219 assert(kvm_enabled());
2222 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2224 DeviceClass *dc = DEVICE_CLASS(oc);
2225 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2226 uint32_t vmx = kvmppc_get_vmx();
2227 uint32_t dfp = kvmppc_get_dfp();
2228 uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2229 uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2231 /* Now fix up the class with information we can query from the host */
2232 pcc->pvr = mfpvr();
2234 if (vmx != -1) {
2235 /* Only override when we know what the host supports */
2236 alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
2237 alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
2239 if (dfp != -1) {
2240 /* Only override when we know what the host supports */
2241 alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
2244 if (dcache_size != -1) {
2245 pcc->l1_dcache_size = dcache_size;
2248 if (icache_size != -1) {
2249 pcc->l1_icache_size = icache_size;
2252 /* Reason: kvmppc_host_cpu_initfn() dies when !kvm_enabled() */
2253 dc->cannot_destroy_with_object_finalize_yet = true;
2256 bool kvmppc_has_cap_epr(void)
2258 return cap_epr;
2261 bool kvmppc_has_cap_htab_fd(void)
2263 return cap_htab_fd;
2266 bool kvmppc_has_cap_fixup_hcalls(void)
2268 return cap_fixup_hcalls;
2271 static PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc)
2273 ObjectClass *oc = OBJECT_CLASS(pcc);
2275 while (oc && !object_class_is_abstract(oc)) {
2276 oc = object_class_get_parent(oc);
2278 assert(oc);
2280 return POWERPC_CPU_CLASS(oc);
2283 static int kvm_ppc_register_host_cpu_type(void)
2285 TypeInfo type_info = {
2286 .name = TYPE_HOST_POWERPC_CPU,
2287 .instance_init = kvmppc_host_cpu_initfn,
2288 .class_init = kvmppc_host_cpu_class_init,
2290 uint32_t host_pvr = mfpvr();
2291 PowerPCCPUClass *pvr_pcc;
2292 DeviceClass *dc;
2294 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2295 if (pvr_pcc == NULL) {
2296 pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2298 if (pvr_pcc == NULL) {
2299 return -1;
2301 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2302 type_register(&type_info);
2304 /* Register generic family CPU class for a family */
2305 pvr_pcc = ppc_cpu_get_family_class(pvr_pcc);
2306 dc = DEVICE_CLASS(pvr_pcc);
2307 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2308 type_info.name = g_strdup_printf("%s-"TYPE_POWERPC_CPU, dc->desc);
2309 type_register(&type_info);
2311 return 0;
2314 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2316 struct kvm_rtas_token_args args = {
2317 .token = token,
2320 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2321 return -ENOENT;
2324 strncpy(args.name, function, sizeof(args.name));
2326 return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2329 int kvmppc_get_htab_fd(bool write)
2331 struct kvm_get_htab_fd s = {
2332 .flags = write ? KVM_GET_HTAB_WRITE : 0,
2333 .start_index = 0,
2336 if (!cap_htab_fd) {
2337 fprintf(stderr, "KVM version doesn't support saving the hash table\n");
2338 return -1;
2341 return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2344 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2346 int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2347 uint8_t buf[bufsize];
2348 ssize_t rc;
2350 do {
2351 rc = read(fd, buf, bufsize);
2352 if (rc < 0) {
2353 fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2354 strerror(errno));
2355 return rc;
2356 } else if (rc) {
2357 uint8_t *buffer = buf;
2358 ssize_t n = rc;
2359 while (n) {
2360 struct kvm_get_htab_header *head =
2361 (struct kvm_get_htab_header *) buffer;
2362 size_t chunksize = sizeof(*head) +
2363 HASH_PTE_SIZE_64 * head->n_valid;
2365 qemu_put_be32(f, head->index);
2366 qemu_put_be16(f, head->n_valid);
2367 qemu_put_be16(f, head->n_invalid);
2368 qemu_put_buffer(f, (void *)(head + 1),
2369 HASH_PTE_SIZE_64 * head->n_valid);
2371 buffer += chunksize;
2372 n -= chunksize;
2375 } while ((rc != 0)
2376 && ((max_ns < 0)
2377 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2379 return (rc == 0) ? 1 : 0;
2382 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2383 uint16_t n_valid, uint16_t n_invalid)
2385 struct kvm_get_htab_header *buf;
2386 size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2387 ssize_t rc;
2389 buf = alloca(chunksize);
2390 buf->index = index;
2391 buf->n_valid = n_valid;
2392 buf->n_invalid = n_invalid;
2394 qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2396 rc = write(fd, buf, chunksize);
2397 if (rc < 0) {
2398 fprintf(stderr, "Error writing KVM hash table: %s\n",
2399 strerror(errno));
2400 return rc;
2402 if (rc != chunksize) {
2403 /* We should never get a short write on a single chunk */
2404 fprintf(stderr, "Short write, restoring KVM hash table\n");
2405 return -1;
2407 return 0;
2410 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2412 return true;
2415 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
2417 return 1;
2420 int kvm_arch_on_sigbus(int code, void *addr)
2422 return 1;
2425 void kvm_arch_init_irq_routing(KVMState *s)
2429 struct kvm_get_htab_buf {
2430 struct kvm_get_htab_header header;
2432 * We require one extra byte for read
2434 target_ulong hpte[(HPTES_PER_GROUP * 2) + 1];
2437 uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index)
2439 int htab_fd;
2440 struct kvm_get_htab_fd ghf;
2441 struct kvm_get_htab_buf *hpte_buf;
2443 ghf.flags = 0;
2444 ghf.start_index = pte_index;
2445 htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2446 if (htab_fd < 0) {
2447 goto error_out;
2450 hpte_buf = g_malloc0(sizeof(*hpte_buf));
2452 * Read the hpte group
2454 if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) {
2455 goto out_close;
2458 close(htab_fd);
2459 return (uint64_t)(uintptr_t) hpte_buf->hpte;
2461 out_close:
2462 g_free(hpte_buf);
2463 close(htab_fd);
2464 error_out:
2465 return 0;
2468 void kvmppc_hash64_free_pteg(uint64_t token)
2470 struct kvm_get_htab_buf *htab_buf;
2472 htab_buf = container_of((void *)(uintptr_t) token, struct kvm_get_htab_buf,
2473 hpte);
2474 g_free(htab_buf);
2475 return;
2478 void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
2479 target_ulong pte0, target_ulong pte1)
2481 int htab_fd;
2482 struct kvm_get_htab_fd ghf;
2483 struct kvm_get_htab_buf hpte_buf;
2485 ghf.flags = 0;
2486 ghf.start_index = 0; /* Ignored */
2487 htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2488 if (htab_fd < 0) {
2489 goto error_out;
2492 hpte_buf.header.n_valid = 1;
2493 hpte_buf.header.n_invalid = 0;
2494 hpte_buf.header.index = pte_index;
2495 hpte_buf.hpte[0] = pte0;
2496 hpte_buf.hpte[1] = pte1;
2498 * Write the hpte entry.
2499 * CAUTION: write() has the warn_unused_result attribute. Hence we
2500 * need to check the return value, even though we do nothing.
2502 if (write(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) {
2503 goto out_close;
2506 out_close:
2507 close(htab_fd);
2508 return;
2510 error_out:
2511 return;
2514 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2515 uint64_t address, uint32_t data, PCIDevice *dev)
2517 return 0;
2520 int kvm_arch_msi_data_to_gsi(uint32_t data)
2522 return data & 0xffff;
2525 int kvmppc_enable_hwrng(void)
2527 if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2528 return -1;
2531 return kvmppc_enable_hcall(kvm_state, H_RANDOM);