qemu-char: convert vc backend to data-driven creation
[qemu/cris-port.git] / target-ppc / kvm.c
blob72762991dce5d9e5d5af7448e8438376c9df079e
1 /*
2 * PowerPC implementation of KVM hooks
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
7 * Authors:
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
17 #include <dirent.h>
18 #include <sys/types.h>
19 #include <sys/ioctl.h>
20 #include <sys/mman.h>
21 #include <sys/vfs.h>
23 #include <linux/kvm.h>
25 #include "qemu-common.h"
26 #include "qemu/timer.h"
27 #include "sysemu/sysemu.h"
28 #include "sysemu/kvm.h"
29 #include "kvm_ppc.h"
30 #include "cpu.h"
31 #include "sysemu/cpus.h"
32 #include "sysemu/device_tree.h"
33 #include "mmu-hash64.h"
35 #include "hw/sysbus.h"
36 #include "hw/ppc/spapr.h"
37 #include "hw/ppc/spapr_vio.h"
38 #include "hw/ppc/ppc.h"
39 #include "sysemu/watchdog.h"
40 #include "trace.h"
41 #include "exec/gdbstub.h"
42 #include "exec/memattrs.h"
43 #include "sysemu/hostmem.h"
45 //#define DEBUG_KVM
47 #ifdef DEBUG_KVM
48 #define DPRINTF(fmt, ...) \
49 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
50 #else
51 #define DPRINTF(fmt, ...) \
52 do { } while (0)
53 #endif
55 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
57 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
58 KVM_CAP_LAST_INFO
61 static int cap_interrupt_unset = false;
62 static int cap_interrupt_level = false;
63 static int cap_segstate;
64 static int cap_booke_sregs;
65 static int cap_ppc_smt;
66 static int cap_ppc_rma;
67 static int cap_spapr_tce;
68 static int cap_spapr_multitce;
69 static int cap_spapr_vfio;
70 static int cap_hior;
71 static int cap_one_reg;
72 static int cap_epr;
73 static int cap_ppc_watchdog;
74 static int cap_papr;
75 static int cap_htab_fd;
76 static int cap_fixup_hcalls;
78 static uint32_t debug_inst_opcode;
80 /* XXX We have a race condition where we actually have a level triggered
81 * interrupt, but the infrastructure can't expose that yet, so the guest
82 * takes but ignores it, goes to sleep and never gets notified that there's
83 * still an interrupt pending.
85 * As a quick workaround, let's just wake up again 20 ms after we injected
86 * an interrupt. That way we can assure that we're always reinjecting
87 * interrupts in case the guest swallowed them.
89 static QEMUTimer *idle_timer;
91 static void kvm_kick_cpu(void *opaque)
93 PowerPCCPU *cpu = opaque;
95 qemu_cpu_kick(CPU(cpu));
98 static int kvm_ppc_register_host_cpu_type(void);
100 int kvm_arch_init(MachineState *ms, KVMState *s)
102 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
103 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
104 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
105 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
106 cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
107 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
108 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
109 cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
110 cap_spapr_vfio = false;
111 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
112 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
113 cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
114 cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
115 /* Note: we don't set cap_papr here, because this capability is
116 * only activated after this by kvmppc_set_papr() */
117 cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
118 cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
120 if (!cap_interrupt_level) {
121 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
122 "VM to stall at times!\n");
125 kvm_ppc_register_host_cpu_type();
127 return 0;
130 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
132 CPUPPCState *cenv = &cpu->env;
133 CPUState *cs = CPU(cpu);
134 struct kvm_sregs sregs;
135 int ret;
137 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
138 /* What we're really trying to say is "if we're on BookE, we use
139 the native PVR for now". This is the only sane way to check
140 it though, so we potentially confuse users that they can run
141 BookE guests on BookS. Let's hope nobody dares enough :) */
142 return 0;
143 } else {
144 if (!cap_segstate) {
145 fprintf(stderr, "kvm error: missing PVR setting capability\n");
146 return -ENOSYS;
150 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
151 if (ret) {
152 return ret;
155 sregs.pvr = cenv->spr[SPR_PVR];
156 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
159 /* Set up a shared TLB array with KVM */
160 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
162 CPUPPCState *env = &cpu->env;
163 CPUState *cs = CPU(cpu);
164 struct kvm_book3e_206_tlb_params params = {};
165 struct kvm_config_tlb cfg = {};
166 unsigned int entries = 0;
167 int ret, i;
169 if (!kvm_enabled() ||
170 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
171 return 0;
174 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
176 for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
177 params.tlb_sizes[i] = booke206_tlb_size(env, i);
178 params.tlb_ways[i] = booke206_tlb_ways(env, i);
179 entries += params.tlb_sizes[i];
182 assert(entries == env->nb_tlb);
183 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
185 env->tlb_dirty = true;
187 cfg.array = (uintptr_t)env->tlb.tlbm;
188 cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
189 cfg.params = (uintptr_t)&params;
190 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
192 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
193 if (ret < 0) {
194 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
195 __func__, strerror(-ret));
196 return ret;
199 env->kvm_sw_tlb = true;
200 return 0;
204 #if defined(TARGET_PPC64)
205 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
206 struct kvm_ppc_smmu_info *info)
208 CPUPPCState *env = &cpu->env;
209 CPUState *cs = CPU(cpu);
211 memset(info, 0, sizeof(*info));
213 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
214 * need to "guess" what the supported page sizes are.
216 * For that to work we make a few assumptions:
218 * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
219 * KVM which only supports 4K and 16M pages, but supports them
220 * regardless of the backing store characteritics. We also don't
221 * support 1T segments.
223 * This is safe as if HV KVM ever supports that capability or PR
224 * KVM grows supports for more page/segment sizes, those versions
225 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
226 * will not hit this fallback
228 * - Else we are running HV KVM. This means we only support page
229 * sizes that fit in the backing store. Additionally we only
230 * advertize 64K pages if the processor is ARCH 2.06 and we assume
231 * P7 encodings for the SLB and hash table. Here too, we assume
232 * support for any newer processor will mean a kernel that
233 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
234 * this fallback.
236 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
237 /* No flags */
238 info->flags = 0;
239 info->slb_size = 64;
241 /* Standard 4k base page size segment */
242 info->sps[0].page_shift = 12;
243 info->sps[0].slb_enc = 0;
244 info->sps[0].enc[0].page_shift = 12;
245 info->sps[0].enc[0].pte_enc = 0;
247 /* Standard 16M large page size segment */
248 info->sps[1].page_shift = 24;
249 info->sps[1].slb_enc = SLB_VSID_L;
250 info->sps[1].enc[0].page_shift = 24;
251 info->sps[1].enc[0].pte_enc = 0;
252 } else {
253 int i = 0;
255 /* HV KVM has backing store size restrictions */
256 info->flags = KVM_PPC_PAGE_SIZES_REAL;
258 if (env->mmu_model & POWERPC_MMU_1TSEG) {
259 info->flags |= KVM_PPC_1T_SEGMENTS;
262 if (env->mmu_model == POWERPC_MMU_2_06) {
263 info->slb_size = 32;
264 } else {
265 info->slb_size = 64;
268 /* Standard 4k base page size segment */
269 info->sps[i].page_shift = 12;
270 info->sps[i].slb_enc = 0;
271 info->sps[i].enc[0].page_shift = 12;
272 info->sps[i].enc[0].pte_enc = 0;
273 i++;
275 /* 64K on MMU 2.06 */
276 if (env->mmu_model == POWERPC_MMU_2_06) {
277 info->sps[i].page_shift = 16;
278 info->sps[i].slb_enc = 0x110;
279 info->sps[i].enc[0].page_shift = 16;
280 info->sps[i].enc[0].pte_enc = 1;
281 i++;
284 /* Standard 16M large page size segment */
285 info->sps[i].page_shift = 24;
286 info->sps[i].slb_enc = SLB_VSID_L;
287 info->sps[i].enc[0].page_shift = 24;
288 info->sps[i].enc[0].pte_enc = 0;
292 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
294 CPUState *cs = CPU(cpu);
295 int ret;
297 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
298 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
299 if (ret == 0) {
300 return;
304 kvm_get_fallback_smmu_info(cpu, info);
307 static long gethugepagesize(const char *mem_path)
309 struct statfs fs;
310 int ret;
312 do {
313 ret = statfs(mem_path, &fs);
314 } while (ret != 0 && errno == EINTR);
316 if (ret != 0) {
317 fprintf(stderr, "Couldn't statfs() memory path: %s\n",
318 strerror(errno));
319 exit(1);
322 #define HUGETLBFS_MAGIC 0x958458f6
324 if (fs.f_type != HUGETLBFS_MAGIC) {
325 /* Explicit mempath, but it's ordinary pages */
326 return getpagesize();
329 /* It's hugepage, return the huge page size */
330 return fs.f_bsize;
333 static int find_max_supported_pagesize(Object *obj, void *opaque)
335 char *mem_path;
336 long *hpsize_min = opaque;
338 if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) {
339 mem_path = object_property_get_str(obj, "mem-path", NULL);
340 if (mem_path) {
341 long hpsize = gethugepagesize(mem_path);
342 if (hpsize < *hpsize_min) {
343 *hpsize_min = hpsize;
345 } else {
346 *hpsize_min = getpagesize();
350 return 0;
353 static long getrampagesize(void)
355 long hpsize = LONG_MAX;
356 Object *memdev_root;
358 if (mem_path) {
359 return gethugepagesize(mem_path);
362 /* it's possible we have memory-backend objects with
363 * hugepage-backed RAM. these may get mapped into system
364 * address space via -numa parameters or memory hotplug
365 * hooks. we want to take these into account, but we
366 * also want to make sure these supported hugepage
367 * sizes are applicable across the entire range of memory
368 * we may boot from, so we take the min across all
369 * backends, and assume normal pages in cases where a
370 * backend isn't backed by hugepages.
372 memdev_root = object_resolve_path("/objects", NULL);
373 if (!memdev_root) {
374 return getpagesize();
377 object_child_foreach(memdev_root, find_max_supported_pagesize, &hpsize);
379 return (hpsize == LONG_MAX) ? getpagesize() : hpsize;
382 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
384 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
385 return true;
388 return (1ul << shift) <= rampgsize;
391 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
393 static struct kvm_ppc_smmu_info smmu_info;
394 static bool has_smmu_info;
395 CPUPPCState *env = &cpu->env;
396 long rampagesize;
397 int iq, ik, jq, jk;
399 /* We only handle page sizes for 64-bit server guests for now */
400 if (!(env->mmu_model & POWERPC_MMU_64)) {
401 return;
404 /* Collect MMU info from kernel if not already */
405 if (!has_smmu_info) {
406 kvm_get_smmu_info(cpu, &smmu_info);
407 has_smmu_info = true;
410 rampagesize = getrampagesize();
412 /* Convert to QEMU form */
413 memset(&env->sps, 0, sizeof(env->sps));
416 * XXX This loop should be an entry wide AND of the capabilities that
417 * the selected CPU has with the capabilities that KVM supports.
419 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
420 struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
421 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
423 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
424 ksps->page_shift)) {
425 continue;
427 qsps->page_shift = ksps->page_shift;
428 qsps->slb_enc = ksps->slb_enc;
429 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
430 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
431 ksps->enc[jk].page_shift)) {
432 continue;
434 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
435 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
436 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
437 break;
440 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
441 break;
444 env->slb_nr = smmu_info.slb_size;
445 if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
446 env->mmu_model &= ~POWERPC_MMU_1TSEG;
449 #else /* defined (TARGET_PPC64) */
451 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
455 #endif /* !defined (TARGET_PPC64) */
457 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
459 return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
462 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
463 * book3s supports only 1 watchpoint, so array size
464 * of 4 is sufficient for now.
466 #define MAX_HW_BKPTS 4
468 static struct HWBreakpoint {
469 target_ulong addr;
470 int type;
471 } hw_debug_points[MAX_HW_BKPTS];
473 static CPUWatchpoint hw_watchpoint;
475 /* Default there is no breakpoint and watchpoint supported */
476 static int max_hw_breakpoint;
477 static int max_hw_watchpoint;
478 static int nb_hw_breakpoint;
479 static int nb_hw_watchpoint;
481 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
483 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
484 max_hw_breakpoint = 2;
485 max_hw_watchpoint = 2;
488 if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
489 fprintf(stderr, "Error initializing h/w breakpoints\n");
490 return;
494 int kvm_arch_init_vcpu(CPUState *cs)
496 PowerPCCPU *cpu = POWERPC_CPU(cs);
497 CPUPPCState *cenv = &cpu->env;
498 int ret;
500 /* Gather server mmu info from KVM and update the CPU state */
501 kvm_fixup_page_sizes(cpu);
503 /* Synchronize sregs with kvm */
504 ret = kvm_arch_sync_sregs(cpu);
505 if (ret) {
506 return ret;
509 idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
511 /* Some targets support access to KVM's guest TLB. */
512 switch (cenv->mmu_model) {
513 case POWERPC_MMU_BOOKE206:
514 ret = kvm_booke206_tlb_init(cpu);
515 break;
516 default:
517 break;
520 kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
521 kvmppc_hw_debug_points_init(cenv);
523 return ret;
526 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
528 CPUPPCState *env = &cpu->env;
529 CPUState *cs = CPU(cpu);
530 struct kvm_dirty_tlb dirty_tlb;
531 unsigned char *bitmap;
532 int ret;
534 if (!env->kvm_sw_tlb) {
535 return;
538 bitmap = g_malloc((env->nb_tlb + 7) / 8);
539 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
541 dirty_tlb.bitmap = (uintptr_t)bitmap;
542 dirty_tlb.num_dirty = env->nb_tlb;
544 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
545 if (ret) {
546 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
547 __func__, strerror(-ret));
550 g_free(bitmap);
553 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
555 PowerPCCPU *cpu = POWERPC_CPU(cs);
556 CPUPPCState *env = &cpu->env;
557 union {
558 uint32_t u32;
559 uint64_t u64;
560 } val;
561 struct kvm_one_reg reg = {
562 .id = id,
563 .addr = (uintptr_t) &val,
565 int ret;
567 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
568 if (ret != 0) {
569 trace_kvm_failed_spr_get(spr, strerror(errno));
570 } else {
571 switch (id & KVM_REG_SIZE_MASK) {
572 case KVM_REG_SIZE_U32:
573 env->spr[spr] = val.u32;
574 break;
576 case KVM_REG_SIZE_U64:
577 env->spr[spr] = val.u64;
578 break;
580 default:
581 /* Don't handle this size yet */
582 abort();
587 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
589 PowerPCCPU *cpu = POWERPC_CPU(cs);
590 CPUPPCState *env = &cpu->env;
591 union {
592 uint32_t u32;
593 uint64_t u64;
594 } val;
595 struct kvm_one_reg reg = {
596 .id = id,
597 .addr = (uintptr_t) &val,
599 int ret;
601 switch (id & KVM_REG_SIZE_MASK) {
602 case KVM_REG_SIZE_U32:
603 val.u32 = env->spr[spr];
604 break;
606 case KVM_REG_SIZE_U64:
607 val.u64 = env->spr[spr];
608 break;
610 default:
611 /* Don't handle this size yet */
612 abort();
615 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
616 if (ret != 0) {
617 trace_kvm_failed_spr_set(spr, strerror(errno));
621 static int kvm_put_fp(CPUState *cs)
623 PowerPCCPU *cpu = POWERPC_CPU(cs);
624 CPUPPCState *env = &cpu->env;
625 struct kvm_one_reg reg;
626 int i;
627 int ret;
629 if (env->insns_flags & PPC_FLOAT) {
630 uint64_t fpscr = env->fpscr;
631 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
633 reg.id = KVM_REG_PPC_FPSCR;
634 reg.addr = (uintptr_t)&fpscr;
635 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
636 if (ret < 0) {
637 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
638 return ret;
641 for (i = 0; i < 32; i++) {
642 uint64_t vsr[2];
644 vsr[0] = float64_val(env->fpr[i]);
645 vsr[1] = env->vsr[i];
646 reg.addr = (uintptr_t) &vsr;
647 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
649 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
650 if (ret < 0) {
651 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
652 i, strerror(errno));
653 return ret;
658 if (env->insns_flags & PPC_ALTIVEC) {
659 reg.id = KVM_REG_PPC_VSCR;
660 reg.addr = (uintptr_t)&env->vscr;
661 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
662 if (ret < 0) {
663 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
664 return ret;
667 for (i = 0; i < 32; i++) {
668 reg.id = KVM_REG_PPC_VR(i);
669 reg.addr = (uintptr_t)&env->avr[i];
670 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
671 if (ret < 0) {
672 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
673 return ret;
678 return 0;
681 static int kvm_get_fp(CPUState *cs)
683 PowerPCCPU *cpu = POWERPC_CPU(cs);
684 CPUPPCState *env = &cpu->env;
685 struct kvm_one_reg reg;
686 int i;
687 int ret;
689 if (env->insns_flags & PPC_FLOAT) {
690 uint64_t fpscr;
691 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
693 reg.id = KVM_REG_PPC_FPSCR;
694 reg.addr = (uintptr_t)&fpscr;
695 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
696 if (ret < 0) {
697 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
698 return ret;
699 } else {
700 env->fpscr = fpscr;
703 for (i = 0; i < 32; i++) {
704 uint64_t vsr[2];
706 reg.addr = (uintptr_t) &vsr;
707 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
709 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
710 if (ret < 0) {
711 DPRINTF("Unable to get %s%d from KVM: %s\n",
712 vsx ? "VSR" : "FPR", i, strerror(errno));
713 return ret;
714 } else {
715 env->fpr[i] = vsr[0];
716 if (vsx) {
717 env->vsr[i] = vsr[1];
723 if (env->insns_flags & PPC_ALTIVEC) {
724 reg.id = KVM_REG_PPC_VSCR;
725 reg.addr = (uintptr_t)&env->vscr;
726 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
727 if (ret < 0) {
728 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
729 return ret;
732 for (i = 0; i < 32; i++) {
733 reg.id = KVM_REG_PPC_VR(i);
734 reg.addr = (uintptr_t)&env->avr[i];
735 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
736 if (ret < 0) {
737 DPRINTF("Unable to get VR%d from KVM: %s\n",
738 i, strerror(errno));
739 return ret;
744 return 0;
747 #if defined(TARGET_PPC64)
748 static int kvm_get_vpa(CPUState *cs)
750 PowerPCCPU *cpu = POWERPC_CPU(cs);
751 CPUPPCState *env = &cpu->env;
752 struct kvm_one_reg reg;
753 int ret;
755 reg.id = KVM_REG_PPC_VPA_ADDR;
756 reg.addr = (uintptr_t)&env->vpa_addr;
757 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
758 if (ret < 0) {
759 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
760 return ret;
763 assert((uintptr_t)&env->slb_shadow_size
764 == ((uintptr_t)&env->slb_shadow_addr + 8));
765 reg.id = KVM_REG_PPC_VPA_SLB;
766 reg.addr = (uintptr_t)&env->slb_shadow_addr;
767 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
768 if (ret < 0) {
769 DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
770 strerror(errno));
771 return ret;
774 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
775 reg.id = KVM_REG_PPC_VPA_DTL;
776 reg.addr = (uintptr_t)&env->dtl_addr;
777 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
778 if (ret < 0) {
779 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
780 strerror(errno));
781 return ret;
784 return 0;
787 static int kvm_put_vpa(CPUState *cs)
789 PowerPCCPU *cpu = POWERPC_CPU(cs);
790 CPUPPCState *env = &cpu->env;
791 struct kvm_one_reg reg;
792 int ret;
794 /* SLB shadow or DTL can't be registered unless a master VPA is
795 * registered. That means when restoring state, if a VPA *is*
796 * registered, we need to set that up first. If not, we need to
797 * deregister the others before deregistering the master VPA */
798 assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
800 if (env->vpa_addr) {
801 reg.id = KVM_REG_PPC_VPA_ADDR;
802 reg.addr = (uintptr_t)&env->vpa_addr;
803 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
804 if (ret < 0) {
805 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
806 return ret;
810 assert((uintptr_t)&env->slb_shadow_size
811 == ((uintptr_t)&env->slb_shadow_addr + 8));
812 reg.id = KVM_REG_PPC_VPA_SLB;
813 reg.addr = (uintptr_t)&env->slb_shadow_addr;
814 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
815 if (ret < 0) {
816 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
817 return ret;
820 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
821 reg.id = KVM_REG_PPC_VPA_DTL;
822 reg.addr = (uintptr_t)&env->dtl_addr;
823 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
824 if (ret < 0) {
825 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
826 strerror(errno));
827 return ret;
830 if (!env->vpa_addr) {
831 reg.id = KVM_REG_PPC_VPA_ADDR;
832 reg.addr = (uintptr_t)&env->vpa_addr;
833 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
834 if (ret < 0) {
835 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
836 return ret;
840 return 0;
842 #endif /* TARGET_PPC64 */
844 int kvm_arch_put_registers(CPUState *cs, int level)
846 PowerPCCPU *cpu = POWERPC_CPU(cs);
847 CPUPPCState *env = &cpu->env;
848 struct kvm_regs regs;
849 int ret;
850 int i;
852 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
853 if (ret < 0) {
854 return ret;
857 regs.ctr = env->ctr;
858 regs.lr = env->lr;
859 regs.xer = cpu_read_xer(env);
860 regs.msr = env->msr;
861 regs.pc = env->nip;
863 regs.srr0 = env->spr[SPR_SRR0];
864 regs.srr1 = env->spr[SPR_SRR1];
866 regs.sprg0 = env->spr[SPR_SPRG0];
867 regs.sprg1 = env->spr[SPR_SPRG1];
868 regs.sprg2 = env->spr[SPR_SPRG2];
869 regs.sprg3 = env->spr[SPR_SPRG3];
870 regs.sprg4 = env->spr[SPR_SPRG4];
871 regs.sprg5 = env->spr[SPR_SPRG5];
872 regs.sprg6 = env->spr[SPR_SPRG6];
873 regs.sprg7 = env->spr[SPR_SPRG7];
875 regs.pid = env->spr[SPR_BOOKE_PID];
877 for (i = 0;i < 32; i++)
878 regs.gpr[i] = env->gpr[i];
880 regs.cr = 0;
881 for (i = 0; i < 8; i++) {
882 regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
885 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
886 if (ret < 0)
887 return ret;
889 kvm_put_fp(cs);
891 if (env->tlb_dirty) {
892 kvm_sw_tlb_put(cpu);
893 env->tlb_dirty = false;
896 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
897 struct kvm_sregs sregs;
899 sregs.pvr = env->spr[SPR_PVR];
901 sregs.u.s.sdr1 = env->spr[SPR_SDR1];
903 /* Sync SLB */
904 #ifdef TARGET_PPC64
905 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
906 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
907 if (env->slb[i].esid & SLB_ESID_V) {
908 sregs.u.s.ppc64.slb[i].slbe |= i;
910 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
912 #endif
914 /* Sync SRs */
915 for (i = 0; i < 16; i++) {
916 sregs.u.s.ppc32.sr[i] = env->sr[i];
919 /* Sync BATs */
920 for (i = 0; i < 8; i++) {
921 /* Beware. We have to swap upper and lower bits here */
922 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
923 | env->DBAT[1][i];
924 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
925 | env->IBAT[1][i];
928 ret = kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
929 if (ret) {
930 return ret;
934 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
935 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
938 if (cap_one_reg) {
939 int i;
941 /* We deliberately ignore errors here, for kernels which have
942 * the ONE_REG calls, but don't support the specific
943 * registers, there's a reasonable chance things will still
944 * work, at least until we try to migrate. */
945 for (i = 0; i < 1024; i++) {
946 uint64_t id = env->spr_cb[i].one_reg_id;
948 if (id != 0) {
949 kvm_put_one_spr(cs, id, i);
953 #ifdef TARGET_PPC64
954 if (msr_ts) {
955 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
956 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
958 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
959 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
961 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
962 kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
963 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
964 kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
965 kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
966 kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
967 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
968 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
969 kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
970 kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
973 if (cap_papr) {
974 if (kvm_put_vpa(cs) < 0) {
975 DPRINTF("Warning: Unable to set VPA information to KVM\n");
979 kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
980 #endif /* TARGET_PPC64 */
983 return ret;
986 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
988 env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
991 int kvm_arch_get_registers(CPUState *cs)
993 PowerPCCPU *cpu = POWERPC_CPU(cs);
994 CPUPPCState *env = &cpu->env;
995 struct kvm_regs regs;
996 struct kvm_sregs sregs;
997 uint32_t cr;
998 int i, ret;
1000 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1001 if (ret < 0)
1002 return ret;
1004 cr = regs.cr;
1005 for (i = 7; i >= 0; i--) {
1006 env->crf[i] = cr & 15;
1007 cr >>= 4;
1010 env->ctr = regs.ctr;
1011 env->lr = regs.lr;
1012 cpu_write_xer(env, regs.xer);
1013 env->msr = regs.msr;
1014 env->nip = regs.pc;
1016 env->spr[SPR_SRR0] = regs.srr0;
1017 env->spr[SPR_SRR1] = regs.srr1;
1019 env->spr[SPR_SPRG0] = regs.sprg0;
1020 env->spr[SPR_SPRG1] = regs.sprg1;
1021 env->spr[SPR_SPRG2] = regs.sprg2;
1022 env->spr[SPR_SPRG3] = regs.sprg3;
1023 env->spr[SPR_SPRG4] = regs.sprg4;
1024 env->spr[SPR_SPRG5] = regs.sprg5;
1025 env->spr[SPR_SPRG6] = regs.sprg6;
1026 env->spr[SPR_SPRG7] = regs.sprg7;
1028 env->spr[SPR_BOOKE_PID] = regs.pid;
1030 for (i = 0;i < 32; i++)
1031 env->gpr[i] = regs.gpr[i];
1033 kvm_get_fp(cs);
1035 if (cap_booke_sregs) {
1036 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
1037 if (ret < 0) {
1038 return ret;
1041 if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1042 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1043 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1044 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1045 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1046 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1047 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1048 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1049 env->spr[SPR_DECR] = sregs.u.e.dec;
1050 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1051 env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1052 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1055 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1056 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1057 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1058 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1059 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1060 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1063 if (sregs.u.e.features & KVM_SREGS_E_64) {
1064 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1067 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1068 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1071 if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1072 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1073 kvm_sync_excp(env, POWERPC_EXCP_CRITICAL, SPR_BOOKE_IVOR0);
1074 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1075 kvm_sync_excp(env, POWERPC_EXCP_MCHECK, SPR_BOOKE_IVOR1);
1076 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1077 kvm_sync_excp(env, POWERPC_EXCP_DSI, SPR_BOOKE_IVOR2);
1078 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1079 kvm_sync_excp(env, POWERPC_EXCP_ISI, SPR_BOOKE_IVOR3);
1080 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1081 kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL, SPR_BOOKE_IVOR4);
1082 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1083 kvm_sync_excp(env, POWERPC_EXCP_ALIGN, SPR_BOOKE_IVOR5);
1084 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1085 kvm_sync_excp(env, POWERPC_EXCP_PROGRAM, SPR_BOOKE_IVOR6);
1086 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1087 kvm_sync_excp(env, POWERPC_EXCP_FPU, SPR_BOOKE_IVOR7);
1088 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1089 kvm_sync_excp(env, POWERPC_EXCP_SYSCALL, SPR_BOOKE_IVOR8);
1090 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1091 kvm_sync_excp(env, POWERPC_EXCP_APU, SPR_BOOKE_IVOR9);
1092 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1093 kvm_sync_excp(env, POWERPC_EXCP_DECR, SPR_BOOKE_IVOR10);
1094 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1095 kvm_sync_excp(env, POWERPC_EXCP_FIT, SPR_BOOKE_IVOR11);
1096 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1097 kvm_sync_excp(env, POWERPC_EXCP_WDT, SPR_BOOKE_IVOR12);
1098 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1099 kvm_sync_excp(env, POWERPC_EXCP_DTLB, SPR_BOOKE_IVOR13);
1100 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1101 kvm_sync_excp(env, POWERPC_EXCP_ITLB, SPR_BOOKE_IVOR14);
1102 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1103 kvm_sync_excp(env, POWERPC_EXCP_DEBUG, SPR_BOOKE_IVOR15);
1105 if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1106 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1107 kvm_sync_excp(env, POWERPC_EXCP_SPEU, SPR_BOOKE_IVOR32);
1108 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1109 kvm_sync_excp(env, POWERPC_EXCP_EFPDI, SPR_BOOKE_IVOR33);
1110 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1111 kvm_sync_excp(env, POWERPC_EXCP_EFPRI, SPR_BOOKE_IVOR34);
1114 if (sregs.u.e.features & KVM_SREGS_E_PM) {
1115 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1116 kvm_sync_excp(env, POWERPC_EXCP_EPERFM, SPR_BOOKE_IVOR35);
1119 if (sregs.u.e.features & KVM_SREGS_E_PC) {
1120 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1121 kvm_sync_excp(env, POWERPC_EXCP_DOORI, SPR_BOOKE_IVOR36);
1122 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1123 kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1127 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1128 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1129 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1130 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1131 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1132 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1133 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1134 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1135 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1136 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1137 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1140 if (sregs.u.e.features & KVM_SREGS_EXP) {
1141 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1144 if (sregs.u.e.features & KVM_SREGS_E_PD) {
1145 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1146 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1149 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1150 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1151 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1152 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1154 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1155 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1156 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1161 if (cap_segstate) {
1162 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
1163 if (ret < 0) {
1164 return ret;
1167 if (!env->external_htab) {
1168 ppc_store_sdr1(env, sregs.u.s.sdr1);
1171 /* Sync SLB */
1172 #ifdef TARGET_PPC64
1174 * The packed SLB array we get from KVM_GET_SREGS only contains
1175 * information about valid entries. So we flush our internal
1176 * copy to get rid of stale ones, then put all valid SLB entries
1177 * back in.
1179 memset(env->slb, 0, sizeof(env->slb));
1180 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1181 target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1182 target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1184 * Only restore valid entries
1186 if (rb & SLB_ESID_V) {
1187 ppc_store_slb(env, rb, rs);
1190 #endif
1192 /* Sync SRs */
1193 for (i = 0; i < 16; i++) {
1194 env->sr[i] = sregs.u.s.ppc32.sr[i];
1197 /* Sync BATs */
1198 for (i = 0; i < 8; i++) {
1199 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1200 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1201 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1202 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1206 if (cap_hior) {
1207 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1210 if (cap_one_reg) {
1211 int i;
1213 /* We deliberately ignore errors here, for kernels which have
1214 * the ONE_REG calls, but don't support the specific
1215 * registers, there's a reasonable chance things will still
1216 * work, at least until we try to migrate. */
1217 for (i = 0; i < 1024; i++) {
1218 uint64_t id = env->spr_cb[i].one_reg_id;
1220 if (id != 0) {
1221 kvm_get_one_spr(cs, id, i);
1225 #ifdef TARGET_PPC64
1226 if (msr_ts) {
1227 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1228 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1230 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1231 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1233 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1234 kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1235 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1236 kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1237 kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1238 kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1239 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1240 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1241 kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1242 kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1245 if (cap_papr) {
1246 if (kvm_get_vpa(cs) < 0) {
1247 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1251 kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1252 #endif
1255 return 0;
1258 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1260 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1262 if (irq != PPC_INTERRUPT_EXT) {
1263 return 0;
1266 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1267 return 0;
1270 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1272 return 0;
1275 #if defined(TARGET_PPCEMB)
1276 #define PPC_INPUT_INT PPC40x_INPUT_INT
1277 #elif defined(TARGET_PPC64)
1278 #define PPC_INPUT_INT PPC970_INPUT_INT
1279 #else
1280 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1281 #endif
1283 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1285 PowerPCCPU *cpu = POWERPC_CPU(cs);
1286 CPUPPCState *env = &cpu->env;
1287 int r;
1288 unsigned irq;
1290 qemu_mutex_lock_iothread();
1292 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1293 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1294 if (!cap_interrupt_level &&
1295 run->ready_for_interrupt_injection &&
1296 (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1297 (env->irq_input_state & (1<<PPC_INPUT_INT)))
1299 /* For now KVM disregards the 'irq' argument. However, in the
1300 * future KVM could cache it in-kernel to avoid a heavyweight exit
1301 * when reading the UIC.
1303 irq = KVM_INTERRUPT_SET;
1305 DPRINTF("injected interrupt %d\n", irq);
1306 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1307 if (r < 0) {
1308 printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1311 /* Always wake up soon in case the interrupt was level based */
1312 timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1313 (get_ticks_per_sec() / 50));
1316 /* We don't know if there are more interrupts pending after this. However,
1317 * the guest will return to userspace in the course of handling this one
1318 * anyways, so we will get a chance to deliver the rest. */
1320 qemu_mutex_unlock_iothread();
1323 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1325 return MEMTXATTRS_UNSPECIFIED;
1328 int kvm_arch_process_async_events(CPUState *cs)
1330 return cs->halted;
1333 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1335 CPUState *cs = CPU(cpu);
1336 CPUPPCState *env = &cpu->env;
1338 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1339 cs->halted = 1;
1340 cs->exception_index = EXCP_HLT;
1343 return 0;
1346 /* map dcr access to existing qemu dcr emulation */
1347 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1349 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1350 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1352 return 0;
1355 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1357 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1358 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1360 return 0;
1363 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1365 /* Mixed endian case is not handled */
1366 uint32_t sc = debug_inst_opcode;
1368 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1369 sizeof(sc), 0) ||
1370 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1371 return -EINVAL;
1374 return 0;
1377 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1379 uint32_t sc;
1381 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1382 sc != debug_inst_opcode ||
1383 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1384 sizeof(sc), 1)) {
1385 return -EINVAL;
1388 return 0;
1391 static int find_hw_breakpoint(target_ulong addr, int type)
1393 int n;
1395 assert((nb_hw_breakpoint + nb_hw_watchpoint)
1396 <= ARRAY_SIZE(hw_debug_points));
1398 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1399 if (hw_debug_points[n].addr == addr &&
1400 hw_debug_points[n].type == type) {
1401 return n;
1405 return -1;
1408 static int find_hw_watchpoint(target_ulong addr, int *flag)
1410 int n;
1412 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1413 if (n >= 0) {
1414 *flag = BP_MEM_ACCESS;
1415 return n;
1418 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1419 if (n >= 0) {
1420 *flag = BP_MEM_WRITE;
1421 return n;
1424 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1425 if (n >= 0) {
1426 *flag = BP_MEM_READ;
1427 return n;
1430 return -1;
1433 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1434 target_ulong len, int type)
1436 if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1437 return -ENOBUFS;
1440 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1441 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1443 switch (type) {
1444 case GDB_BREAKPOINT_HW:
1445 if (nb_hw_breakpoint >= max_hw_breakpoint) {
1446 return -ENOBUFS;
1449 if (find_hw_breakpoint(addr, type) >= 0) {
1450 return -EEXIST;
1453 nb_hw_breakpoint++;
1454 break;
1456 case GDB_WATCHPOINT_WRITE:
1457 case GDB_WATCHPOINT_READ:
1458 case GDB_WATCHPOINT_ACCESS:
1459 if (nb_hw_watchpoint >= max_hw_watchpoint) {
1460 return -ENOBUFS;
1463 if (find_hw_breakpoint(addr, type) >= 0) {
1464 return -EEXIST;
1467 nb_hw_watchpoint++;
1468 break;
1470 default:
1471 return -ENOSYS;
1474 return 0;
1477 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1478 target_ulong len, int type)
1480 int n;
1482 n = find_hw_breakpoint(addr, type);
1483 if (n < 0) {
1484 return -ENOENT;
1487 switch (type) {
1488 case GDB_BREAKPOINT_HW:
1489 nb_hw_breakpoint--;
1490 break;
1492 case GDB_WATCHPOINT_WRITE:
1493 case GDB_WATCHPOINT_READ:
1494 case GDB_WATCHPOINT_ACCESS:
1495 nb_hw_watchpoint--;
1496 break;
1498 default:
1499 return -ENOSYS;
1501 hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1503 return 0;
1506 void kvm_arch_remove_all_hw_breakpoints(void)
1508 nb_hw_breakpoint = nb_hw_watchpoint = 0;
1511 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1513 int n;
1515 /* Software Breakpoint updates */
1516 if (kvm_sw_breakpoints_active(cs)) {
1517 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1520 assert((nb_hw_breakpoint + nb_hw_watchpoint)
1521 <= ARRAY_SIZE(hw_debug_points));
1522 assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1524 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1525 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1526 memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1527 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1528 switch (hw_debug_points[n].type) {
1529 case GDB_BREAKPOINT_HW:
1530 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1531 break;
1532 case GDB_WATCHPOINT_WRITE:
1533 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1534 break;
1535 case GDB_WATCHPOINT_READ:
1536 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1537 break;
1538 case GDB_WATCHPOINT_ACCESS:
1539 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1540 KVMPPC_DEBUG_WATCH_READ;
1541 break;
1542 default:
1543 cpu_abort(cs, "Unsupported breakpoint type\n");
1545 dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1550 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1552 CPUState *cs = CPU(cpu);
1553 CPUPPCState *env = &cpu->env;
1554 struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1555 int handle = 0;
1556 int n;
1557 int flag = 0;
1559 if (cs->singlestep_enabled) {
1560 handle = 1;
1561 } else if (arch_info->status) {
1562 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1563 if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1564 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1565 if (n >= 0) {
1566 handle = 1;
1568 } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1569 KVMPPC_DEBUG_WATCH_WRITE)) {
1570 n = find_hw_watchpoint(arch_info->address, &flag);
1571 if (n >= 0) {
1572 handle = 1;
1573 cs->watchpoint_hit = &hw_watchpoint;
1574 hw_watchpoint.vaddr = hw_debug_points[n].addr;
1575 hw_watchpoint.flags = flag;
1579 } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1580 handle = 1;
1581 } else {
1582 /* QEMU is not able to handle debug exception, so inject
1583 * program exception to guest;
1584 * Yes program exception NOT debug exception !!
1585 * When QEMU is using debug resources then debug exception must
1586 * be always set. To achieve this we set MSR_DE and also set
1587 * MSRP_DEP so guest cannot change MSR_DE.
1588 * When emulating debug resource for guest we want guest
1589 * to control MSR_DE (enable/disable debug interrupt on need).
1590 * Supporting both configurations are NOT possible.
1591 * So the result is that we cannot share debug resources
1592 * between QEMU and Guest on BOOKE architecture.
1593 * In the current design QEMU gets the priority over guest,
1594 * this means that if QEMU is using debug resources then guest
1595 * cannot use them;
1596 * For software breakpoint QEMU uses a privileged instruction;
1597 * So there cannot be any reason that we are here for guest
1598 * set debug exception, only possibility is guest executed a
1599 * privileged / illegal instruction and that's why we are
1600 * injecting a program interrupt.
1603 cpu_synchronize_state(cs);
1604 /* env->nip is PC, so increment this by 4 to use
1605 * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1607 env->nip += 4;
1608 cs->exception_index = POWERPC_EXCP_PROGRAM;
1609 env->error_code = POWERPC_EXCP_INVAL;
1610 ppc_cpu_do_interrupt(cs);
1613 return handle;
1616 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1618 PowerPCCPU *cpu = POWERPC_CPU(cs);
1619 CPUPPCState *env = &cpu->env;
1620 int ret;
1622 qemu_mutex_lock_iothread();
1624 switch (run->exit_reason) {
1625 case KVM_EXIT_DCR:
1626 if (run->dcr.is_write) {
1627 DPRINTF("handle dcr write\n");
1628 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1629 } else {
1630 DPRINTF("handle dcr read\n");
1631 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1633 break;
1634 case KVM_EXIT_HLT:
1635 DPRINTF("handle halt\n");
1636 ret = kvmppc_handle_halt(cpu);
1637 break;
1638 #if defined(TARGET_PPC64)
1639 case KVM_EXIT_PAPR_HCALL:
1640 DPRINTF("handle PAPR hypercall\n");
1641 run->papr_hcall.ret = spapr_hypercall(cpu,
1642 run->papr_hcall.nr,
1643 run->papr_hcall.args);
1644 ret = 0;
1645 break;
1646 #endif
1647 case KVM_EXIT_EPR:
1648 DPRINTF("handle epr\n");
1649 run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1650 ret = 0;
1651 break;
1652 case KVM_EXIT_WATCHDOG:
1653 DPRINTF("handle watchdog expiry\n");
1654 watchdog_perform_action();
1655 ret = 0;
1656 break;
1658 case KVM_EXIT_DEBUG:
1659 DPRINTF("handle debug exception\n");
1660 if (kvm_handle_debug(cpu, run)) {
1661 ret = EXCP_DEBUG;
1662 break;
1664 /* re-enter, this exception was guest-internal */
1665 ret = 0;
1666 break;
1668 default:
1669 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1670 ret = -1;
1671 break;
1674 qemu_mutex_unlock_iothread();
1675 return ret;
1678 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1680 CPUState *cs = CPU(cpu);
1681 uint32_t bits = tsr_bits;
1682 struct kvm_one_reg reg = {
1683 .id = KVM_REG_PPC_OR_TSR,
1684 .addr = (uintptr_t) &bits,
1687 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1690 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1693 CPUState *cs = CPU(cpu);
1694 uint32_t bits = tsr_bits;
1695 struct kvm_one_reg reg = {
1696 .id = KVM_REG_PPC_CLEAR_TSR,
1697 .addr = (uintptr_t) &bits,
1700 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1703 int kvmppc_set_tcr(PowerPCCPU *cpu)
1705 CPUState *cs = CPU(cpu);
1706 CPUPPCState *env = &cpu->env;
1707 uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1709 struct kvm_one_reg reg = {
1710 .id = KVM_REG_PPC_TCR,
1711 .addr = (uintptr_t) &tcr,
1714 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1717 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1719 CPUState *cs = CPU(cpu);
1720 int ret;
1722 if (!kvm_enabled()) {
1723 return -1;
1726 if (!cap_ppc_watchdog) {
1727 printf("warning: KVM does not support watchdog");
1728 return -1;
1731 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1732 if (ret < 0) {
1733 fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1734 __func__, strerror(-ret));
1735 return ret;
1738 return ret;
1741 static int read_cpuinfo(const char *field, char *value, int len)
1743 FILE *f;
1744 int ret = -1;
1745 int field_len = strlen(field);
1746 char line[512];
1748 f = fopen("/proc/cpuinfo", "r");
1749 if (!f) {
1750 return -1;
1753 do {
1754 if (!fgets(line, sizeof(line), f)) {
1755 break;
1757 if (!strncmp(line, field, field_len)) {
1758 pstrcpy(value, len, line);
1759 ret = 0;
1760 break;
1762 } while(*line);
1764 fclose(f);
1766 return ret;
1769 uint32_t kvmppc_get_tbfreq(void)
1771 char line[512];
1772 char *ns;
1773 uint32_t retval = get_ticks_per_sec();
1775 if (read_cpuinfo("timebase", line, sizeof(line))) {
1776 return retval;
1779 if (!(ns = strchr(line, ':'))) {
1780 return retval;
1783 ns++;
1785 return atoi(ns);
1788 bool kvmppc_get_host_serial(char **value)
1790 return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1791 NULL);
1794 bool kvmppc_get_host_model(char **value)
1796 return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1799 /* Try to find a device tree node for a CPU with clock-frequency property */
1800 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1802 struct dirent *dirp;
1803 DIR *dp;
1805 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1806 printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1807 return -1;
1810 buf[0] = '\0';
1811 while ((dirp = readdir(dp)) != NULL) {
1812 FILE *f;
1813 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1814 dirp->d_name);
1815 f = fopen(buf, "r");
1816 if (f) {
1817 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1818 fclose(f);
1819 break;
1821 buf[0] = '\0';
1823 closedir(dp);
1824 if (buf[0] == '\0') {
1825 printf("Unknown host!\n");
1826 return -1;
1829 return 0;
1832 /* Read a CPU node property from the host device tree that's a single
1833 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1834 * (can't find or open the property, or doesn't understand the
1835 * format) */
1836 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1838 char buf[PATH_MAX], *tmp;
1839 union {
1840 uint32_t v32;
1841 uint64_t v64;
1842 } u;
1843 FILE *f;
1844 int len;
1846 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1847 return -1;
1850 tmp = g_strdup_printf("%s/%s", buf, propname);
1852 f = fopen(tmp, "rb");
1853 g_free(tmp);
1854 if (!f) {
1855 return -1;
1858 len = fread(&u, 1, sizeof(u), f);
1859 fclose(f);
1860 switch (len) {
1861 case 4:
1862 /* property is a 32-bit quantity */
1863 return be32_to_cpu(u.v32);
1864 case 8:
1865 return be64_to_cpu(u.v64);
1868 return 0;
1871 uint64_t kvmppc_get_clockfreq(void)
1873 return kvmppc_read_int_cpu_dt("clock-frequency");
1876 uint32_t kvmppc_get_vmx(void)
1878 return kvmppc_read_int_cpu_dt("ibm,vmx");
1881 uint32_t kvmppc_get_dfp(void)
1883 return kvmppc_read_int_cpu_dt("ibm,dfp");
1886 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1888 PowerPCCPU *cpu = ppc_env_get_cpu(env);
1889 CPUState *cs = CPU(cpu);
1891 if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1892 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1893 return 0;
1896 return 1;
1899 int kvmppc_get_hasidle(CPUPPCState *env)
1901 struct kvm_ppc_pvinfo pvinfo;
1903 if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1904 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1905 return 1;
1908 return 0;
1911 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1913 uint32_t *hc = (uint32_t*)buf;
1914 struct kvm_ppc_pvinfo pvinfo;
1916 if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1917 memcpy(buf, pvinfo.hcall, buf_len);
1918 return 0;
1922 * Fallback to always fail hypercalls regardless of endianness:
1924 * tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
1925 * li r3, -1
1926 * b .+8 (becomes nop in wrong endian)
1927 * bswap32(li r3, -1)
1930 hc[0] = cpu_to_be32(0x08000048);
1931 hc[1] = cpu_to_be32(0x3860ffff);
1932 hc[2] = cpu_to_be32(0x48000008);
1933 hc[3] = cpu_to_be32(bswap32(0x3860ffff));
1935 return 0;
1938 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
1940 return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
1943 void kvmppc_enable_logical_ci_hcalls(void)
1946 * FIXME: it would be nice if we could detect the cases where
1947 * we're using a device which requires the in kernel
1948 * implementation of these hcalls, but the kernel lacks them and
1949 * produce a warning.
1951 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
1952 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
1955 void kvmppc_enable_set_mode_hcall(void)
1957 kvmppc_enable_hcall(kvm_state, H_SET_MODE);
1960 void kvmppc_set_papr(PowerPCCPU *cpu)
1962 CPUState *cs = CPU(cpu);
1963 int ret;
1965 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
1966 if (ret) {
1967 cpu_abort(cs, "This KVM version does not support PAPR\n");
1970 /* Update the capability flag so we sync the right information
1971 * with kvm */
1972 cap_papr = 1;
1975 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t cpu_version)
1977 return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &cpu_version);
1980 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
1982 CPUState *cs = CPU(cpu);
1983 int ret;
1985 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
1986 if (ret && mpic_proxy) {
1987 cpu_abort(cs, "This KVM version does not support EPR\n");
1991 int kvmppc_smt_threads(void)
1993 return cap_ppc_smt ? cap_ppc_smt : 1;
1996 #ifdef TARGET_PPC64
1997 off_t kvmppc_alloc_rma(void **rma)
1999 off_t size;
2000 int fd;
2001 struct kvm_allocate_rma ret;
2003 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2004 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2005 * not necessary on this hardware
2006 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2008 * FIXME: We should allow the user to force contiguous RMA
2009 * allocation in the cap_ppc_rma==1 case.
2011 if (cap_ppc_rma < 2) {
2012 return 0;
2015 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
2016 if (fd < 0) {
2017 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2018 strerror(errno));
2019 return -1;
2022 size = MIN(ret.rma_size, 256ul << 20);
2024 *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2025 if (*rma == MAP_FAILED) {
2026 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
2027 return -1;
2030 return size;
2033 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2035 struct kvm_ppc_smmu_info info;
2036 long rampagesize, best_page_shift;
2037 int i;
2039 if (cap_ppc_rma >= 2) {
2040 return current_size;
2043 /* Find the largest hardware supported page size that's less than
2044 * or equal to the (logical) backing page size of guest RAM */
2045 kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2046 rampagesize = getrampagesize();
2047 best_page_shift = 0;
2049 for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2050 struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2052 if (!sps->page_shift) {
2053 continue;
2056 if ((sps->page_shift > best_page_shift)
2057 && ((1UL << sps->page_shift) <= rampagesize)) {
2058 best_page_shift = sps->page_shift;
2062 return MIN(current_size,
2063 1ULL << (best_page_shift + hash_shift - 7));
2065 #endif
2067 bool kvmppc_spapr_use_multitce(void)
2069 return cap_spapr_multitce;
2072 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
2073 bool vfio_accel)
2075 struct kvm_create_spapr_tce args = {
2076 .liobn = liobn,
2077 .window_size = window_size,
2079 long len;
2080 int fd;
2081 void *table;
2083 /* Must set fd to -1 so we don't try to munmap when called for
2084 * destroying the table, which the upper layers -will- do
2086 *pfd = -1;
2087 if (!cap_spapr_tce || (vfio_accel && !cap_spapr_vfio)) {
2088 return NULL;
2091 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2092 if (fd < 0) {
2093 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2094 liobn);
2095 return NULL;
2098 len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
2099 /* FIXME: round this up to page size */
2101 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2102 if (table == MAP_FAILED) {
2103 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2104 liobn);
2105 close(fd);
2106 return NULL;
2109 *pfd = fd;
2110 return table;
2113 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2115 long len;
2117 if (fd < 0) {
2118 return -1;
2121 len = nb_table * sizeof(uint64_t);
2122 if ((munmap(table, len) < 0) ||
2123 (close(fd) < 0)) {
2124 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2125 strerror(errno));
2126 /* Leak the table */
2129 return 0;
2132 int kvmppc_reset_htab(int shift_hint)
2134 uint32_t shift = shift_hint;
2136 if (!kvm_enabled()) {
2137 /* Full emulation, tell caller to allocate htab itself */
2138 return 0;
2140 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2141 int ret;
2142 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2143 if (ret == -ENOTTY) {
2144 /* At least some versions of PR KVM advertise the
2145 * capability, but don't implement the ioctl(). Oops.
2146 * Return 0 so that we allocate the htab in qemu, as is
2147 * correct for PR. */
2148 return 0;
2149 } else if (ret < 0) {
2150 return ret;
2152 return shift;
2155 /* We have a kernel that predates the htab reset calls. For PR
2156 * KVM, we need to allocate the htab ourselves, for an HV KVM of
2157 * this era, it has allocated a 16MB fixed size hash table
2158 * already. Kernels of this era have the GET_PVINFO capability
2159 * only on PR, so we use this hack to determine the right
2160 * answer */
2161 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
2162 /* PR - tell caller to allocate htab */
2163 return 0;
2164 } else {
2165 /* HV - assume 16MB kernel allocated htab */
2166 return 24;
2170 static inline uint32_t mfpvr(void)
2172 uint32_t pvr;
2174 asm ("mfpvr %0"
2175 : "=r"(pvr));
2176 return pvr;
2179 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2181 if (on) {
2182 *word |= flags;
2183 } else {
2184 *word &= ~flags;
2188 static void kvmppc_host_cpu_initfn(Object *obj)
2190 assert(kvm_enabled());
2193 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2195 DeviceClass *dc = DEVICE_CLASS(oc);
2196 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2197 uint32_t vmx = kvmppc_get_vmx();
2198 uint32_t dfp = kvmppc_get_dfp();
2199 uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2200 uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2202 /* Now fix up the class with information we can query from the host */
2203 pcc->pvr = mfpvr();
2205 if (vmx != -1) {
2206 /* Only override when we know what the host supports */
2207 alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
2208 alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
2210 if (dfp != -1) {
2211 /* Only override when we know what the host supports */
2212 alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
2215 if (dcache_size != -1) {
2216 pcc->l1_dcache_size = dcache_size;
2219 if (icache_size != -1) {
2220 pcc->l1_icache_size = icache_size;
2223 /* Reason: kvmppc_host_cpu_initfn() dies when !kvm_enabled() */
2224 dc->cannot_destroy_with_object_finalize_yet = true;
2227 bool kvmppc_has_cap_epr(void)
2229 return cap_epr;
2232 bool kvmppc_has_cap_htab_fd(void)
2234 return cap_htab_fd;
2237 bool kvmppc_has_cap_fixup_hcalls(void)
2239 return cap_fixup_hcalls;
2242 static PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc)
2244 ObjectClass *oc = OBJECT_CLASS(pcc);
2246 while (oc && !object_class_is_abstract(oc)) {
2247 oc = object_class_get_parent(oc);
2249 assert(oc);
2251 return POWERPC_CPU_CLASS(oc);
2254 static int kvm_ppc_register_host_cpu_type(void)
2256 TypeInfo type_info = {
2257 .name = TYPE_HOST_POWERPC_CPU,
2258 .instance_init = kvmppc_host_cpu_initfn,
2259 .class_init = kvmppc_host_cpu_class_init,
2261 uint32_t host_pvr = mfpvr();
2262 PowerPCCPUClass *pvr_pcc;
2263 DeviceClass *dc;
2265 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2266 if (pvr_pcc == NULL) {
2267 pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2269 if (pvr_pcc == NULL) {
2270 return -1;
2272 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2273 type_register(&type_info);
2275 /* Register generic family CPU class for a family */
2276 pvr_pcc = ppc_cpu_get_family_class(pvr_pcc);
2277 dc = DEVICE_CLASS(pvr_pcc);
2278 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2279 type_info.name = g_strdup_printf("%s-"TYPE_POWERPC_CPU, dc->desc);
2280 type_register(&type_info);
2282 return 0;
2285 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2287 struct kvm_rtas_token_args args = {
2288 .token = token,
2291 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2292 return -ENOENT;
2295 strncpy(args.name, function, sizeof(args.name));
2297 return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2300 int kvmppc_get_htab_fd(bool write)
2302 struct kvm_get_htab_fd s = {
2303 .flags = write ? KVM_GET_HTAB_WRITE : 0,
2304 .start_index = 0,
2307 if (!cap_htab_fd) {
2308 fprintf(stderr, "KVM version doesn't support saving the hash table\n");
2309 return -1;
2312 return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2315 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2317 int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2318 uint8_t buf[bufsize];
2319 ssize_t rc;
2321 do {
2322 rc = read(fd, buf, bufsize);
2323 if (rc < 0) {
2324 fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2325 strerror(errno));
2326 return rc;
2327 } else if (rc) {
2328 uint8_t *buffer = buf;
2329 ssize_t n = rc;
2330 while (n) {
2331 struct kvm_get_htab_header *head =
2332 (struct kvm_get_htab_header *) buffer;
2333 size_t chunksize = sizeof(*head) +
2334 HASH_PTE_SIZE_64 * head->n_valid;
2336 qemu_put_be32(f, head->index);
2337 qemu_put_be16(f, head->n_valid);
2338 qemu_put_be16(f, head->n_invalid);
2339 qemu_put_buffer(f, (void *)(head + 1),
2340 HASH_PTE_SIZE_64 * head->n_valid);
2342 buffer += chunksize;
2343 n -= chunksize;
2346 } while ((rc != 0)
2347 && ((max_ns < 0)
2348 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2350 return (rc == 0) ? 1 : 0;
2353 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2354 uint16_t n_valid, uint16_t n_invalid)
2356 struct kvm_get_htab_header *buf;
2357 size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2358 ssize_t rc;
2360 buf = alloca(chunksize);
2361 buf->index = index;
2362 buf->n_valid = n_valid;
2363 buf->n_invalid = n_invalid;
2365 qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2367 rc = write(fd, buf, chunksize);
2368 if (rc < 0) {
2369 fprintf(stderr, "Error writing KVM hash table: %s\n",
2370 strerror(errno));
2371 return rc;
2373 if (rc != chunksize) {
2374 /* We should never get a short write on a single chunk */
2375 fprintf(stderr, "Short write, restoring KVM hash table\n");
2376 return -1;
2378 return 0;
2381 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2383 return true;
2386 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
2388 return 1;
2391 int kvm_arch_on_sigbus(int code, void *addr)
2393 return 1;
2396 void kvm_arch_init_irq_routing(KVMState *s)
2400 struct kvm_get_htab_buf {
2401 struct kvm_get_htab_header header;
2403 * We require one extra byte for read
2405 target_ulong hpte[(HPTES_PER_GROUP * 2) + 1];
2408 uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index)
2410 int htab_fd;
2411 struct kvm_get_htab_fd ghf;
2412 struct kvm_get_htab_buf *hpte_buf;
2414 ghf.flags = 0;
2415 ghf.start_index = pte_index;
2416 htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2417 if (htab_fd < 0) {
2418 goto error_out;
2421 hpte_buf = g_malloc0(sizeof(*hpte_buf));
2423 * Read the hpte group
2425 if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) {
2426 goto out_close;
2429 close(htab_fd);
2430 return (uint64_t)(uintptr_t) hpte_buf->hpte;
2432 out_close:
2433 g_free(hpte_buf);
2434 close(htab_fd);
2435 error_out:
2436 return 0;
2439 void kvmppc_hash64_free_pteg(uint64_t token)
2441 struct kvm_get_htab_buf *htab_buf;
2443 htab_buf = container_of((void *)(uintptr_t) token, struct kvm_get_htab_buf,
2444 hpte);
2445 g_free(htab_buf);
2446 return;
2449 void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
2450 target_ulong pte0, target_ulong pte1)
2452 int htab_fd;
2453 struct kvm_get_htab_fd ghf;
2454 struct kvm_get_htab_buf hpte_buf;
2456 ghf.flags = 0;
2457 ghf.start_index = 0; /* Ignored */
2458 htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2459 if (htab_fd < 0) {
2460 goto error_out;
2463 hpte_buf.header.n_valid = 1;
2464 hpte_buf.header.n_invalid = 0;
2465 hpte_buf.header.index = pte_index;
2466 hpte_buf.hpte[0] = pte0;
2467 hpte_buf.hpte[1] = pte1;
2469 * Write the hpte entry.
2470 * CAUTION: write() has the warn_unused_result attribute. Hence we
2471 * need to check the return value, even though we do nothing.
2473 if (write(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) {
2474 goto out_close;
2477 out_close:
2478 close(htab_fd);
2479 return;
2481 error_out:
2482 return;
2485 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2486 uint64_t address, uint32_t data)
2488 return 0;
2491 int kvm_arch_msi_data_to_gsi(uint32_t data)
2493 return data & 0xffff;
2496 int kvmppc_enable_hwrng(void)
2498 if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2499 return -1;
2502 return kvmppc_enable_hcall(kvm_state, H_RANDOM);