xen-hvm: Clean up xen_hvm_init() error handling
[qemu/ar7.git] / target-ppc / kvm.c
blob9940a9046220c88fe172a493d40e2e4e7b0410e2
1 /*
2 * PowerPC implementation of KVM hooks
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
7 * Authors:
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
17 #include <dirent.h>
18 #include <sys/types.h>
19 #include <sys/ioctl.h>
20 #include <sys/mman.h>
21 #include <sys/vfs.h>
23 #include <linux/kvm.h>
25 #include "qemu-common.h"
26 #include "qemu/timer.h"
27 #include "sysemu/sysemu.h"
28 #include "sysemu/kvm.h"
29 #include "kvm_ppc.h"
30 #include "cpu.h"
31 #include "sysemu/cpus.h"
32 #include "sysemu/device_tree.h"
33 #include "mmu-hash64.h"
35 #include "hw/sysbus.h"
36 #include "hw/ppc/spapr.h"
37 #include "hw/ppc/spapr_vio.h"
38 #include "hw/ppc/ppc.h"
39 #include "sysemu/watchdog.h"
40 #include "trace.h"
41 #include "exec/gdbstub.h"
42 #include "exec/memattrs.h"
43 #include "sysemu/hostmem.h"
45 //#define DEBUG_KVM
47 #ifdef DEBUG_KVM
48 #define DPRINTF(fmt, ...) \
49 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
50 #else
51 #define DPRINTF(fmt, ...) \
52 do { } while (0)
53 #endif
55 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
57 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
58 KVM_CAP_LAST_INFO
61 static int cap_interrupt_unset = false;
62 static int cap_interrupt_level = false;
63 static int cap_segstate;
64 static int cap_booke_sregs;
65 static int cap_ppc_smt;
66 static int cap_ppc_rma;
67 static int cap_spapr_tce;
68 static int cap_spapr_multitce;
69 static int cap_spapr_vfio;
70 static int cap_hior;
71 static int cap_one_reg;
72 static int cap_epr;
73 static int cap_ppc_watchdog;
74 static int cap_papr;
75 static int cap_htab_fd;
76 static int cap_fixup_hcalls;
78 static uint32_t debug_inst_opcode;
80 /* XXX We have a race condition where we actually have a level triggered
81 * interrupt, but the infrastructure can't expose that yet, so the guest
82 * takes but ignores it, goes to sleep and never gets notified that there's
83 * still an interrupt pending.
85 * As a quick workaround, let's just wake up again 20 ms after we injected
86 * an interrupt. That way we can assure that we're always reinjecting
87 * interrupts in case the guest swallowed them.
89 static QEMUTimer *idle_timer;
91 static void kvm_kick_cpu(void *opaque)
93 PowerPCCPU *cpu = opaque;
95 qemu_cpu_kick(CPU(cpu));
98 static int kvm_ppc_register_host_cpu_type(void);
100 int kvm_arch_init(MachineState *ms, KVMState *s)
102 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
103 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
104 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
105 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
106 cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
107 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
108 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
109 cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
110 cap_spapr_vfio = false;
111 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
112 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
113 cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
114 cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
115 /* Note: we don't set cap_papr here, because this capability is
116 * only activated after this by kvmppc_set_papr() */
117 cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
118 cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
120 if (!cap_interrupt_level) {
121 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
122 "VM to stall at times!\n");
125 kvm_ppc_register_host_cpu_type();
127 return 0;
130 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
132 CPUPPCState *cenv = &cpu->env;
133 CPUState *cs = CPU(cpu);
134 struct kvm_sregs sregs;
135 int ret;
137 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
138 /* What we're really trying to say is "if we're on BookE, we use
139 the native PVR for now". This is the only sane way to check
140 it though, so we potentially confuse users that they can run
141 BookE guests on BookS. Let's hope nobody dares enough :) */
142 return 0;
143 } else {
144 if (!cap_segstate) {
145 fprintf(stderr, "kvm error: missing PVR setting capability\n");
146 return -ENOSYS;
150 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
151 if (ret) {
152 return ret;
155 sregs.pvr = cenv->spr[SPR_PVR];
156 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
159 /* Set up a shared TLB array with KVM */
160 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
162 CPUPPCState *env = &cpu->env;
163 CPUState *cs = CPU(cpu);
164 struct kvm_book3e_206_tlb_params params = {};
165 struct kvm_config_tlb cfg = {};
166 unsigned int entries = 0;
167 int ret, i;
169 if (!kvm_enabled() ||
170 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
171 return 0;
174 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
176 for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
177 params.tlb_sizes[i] = booke206_tlb_size(env, i);
178 params.tlb_ways[i] = booke206_tlb_ways(env, i);
179 entries += params.tlb_sizes[i];
182 assert(entries == env->nb_tlb);
183 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
185 env->tlb_dirty = true;
187 cfg.array = (uintptr_t)env->tlb.tlbm;
188 cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
189 cfg.params = (uintptr_t)&params;
190 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
192 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
193 if (ret < 0) {
194 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
195 __func__, strerror(-ret));
196 return ret;
199 env->kvm_sw_tlb = true;
200 return 0;
204 #if defined(TARGET_PPC64)
205 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
206 struct kvm_ppc_smmu_info *info)
208 CPUPPCState *env = &cpu->env;
209 CPUState *cs = CPU(cpu);
211 memset(info, 0, sizeof(*info));
213 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
214 * need to "guess" what the supported page sizes are.
216 * For that to work we make a few assumptions:
218 * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
219 * KVM which only supports 4K and 16M pages, but supports them
220 * regardless of the backing store characteritics. We also don't
221 * support 1T segments.
223 * This is safe as if HV KVM ever supports that capability or PR
224 * KVM grows supports for more page/segment sizes, those versions
225 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
226 * will not hit this fallback
228 * - Else we are running HV KVM. This means we only support page
229 * sizes that fit in the backing store. Additionally we only
230 * advertize 64K pages if the processor is ARCH 2.06 and we assume
231 * P7 encodings for the SLB and hash table. Here too, we assume
232 * support for any newer processor will mean a kernel that
233 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
234 * this fallback.
236 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
237 /* No flags */
238 info->flags = 0;
239 info->slb_size = 64;
241 /* Standard 4k base page size segment */
242 info->sps[0].page_shift = 12;
243 info->sps[0].slb_enc = 0;
244 info->sps[0].enc[0].page_shift = 12;
245 info->sps[0].enc[0].pte_enc = 0;
247 /* Standard 16M large page size segment */
248 info->sps[1].page_shift = 24;
249 info->sps[1].slb_enc = SLB_VSID_L;
250 info->sps[1].enc[0].page_shift = 24;
251 info->sps[1].enc[0].pte_enc = 0;
252 } else {
253 int i = 0;
255 /* HV KVM has backing store size restrictions */
256 info->flags = KVM_PPC_PAGE_SIZES_REAL;
258 if (env->mmu_model & POWERPC_MMU_1TSEG) {
259 info->flags |= KVM_PPC_1T_SEGMENTS;
262 if (env->mmu_model == POWERPC_MMU_2_06 ||
263 env->mmu_model == POWERPC_MMU_2_07) {
264 info->slb_size = 32;
265 } else {
266 info->slb_size = 64;
269 /* Standard 4k base page size segment */
270 info->sps[i].page_shift = 12;
271 info->sps[i].slb_enc = 0;
272 info->sps[i].enc[0].page_shift = 12;
273 info->sps[i].enc[0].pte_enc = 0;
274 i++;
276 /* 64K on MMU 2.06 and later */
277 if (env->mmu_model == POWERPC_MMU_2_06 ||
278 env->mmu_model == POWERPC_MMU_2_07) {
279 info->sps[i].page_shift = 16;
280 info->sps[i].slb_enc = 0x110;
281 info->sps[i].enc[0].page_shift = 16;
282 info->sps[i].enc[0].pte_enc = 1;
283 i++;
286 /* Standard 16M large page size segment */
287 info->sps[i].page_shift = 24;
288 info->sps[i].slb_enc = SLB_VSID_L;
289 info->sps[i].enc[0].page_shift = 24;
290 info->sps[i].enc[0].pte_enc = 0;
294 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
296 CPUState *cs = CPU(cpu);
297 int ret;
299 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
300 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
301 if (ret == 0) {
302 return;
306 kvm_get_fallback_smmu_info(cpu, info);
309 static long gethugepagesize(const char *mem_path)
311 struct statfs fs;
312 int ret;
314 do {
315 ret = statfs(mem_path, &fs);
316 } while (ret != 0 && errno == EINTR);
318 if (ret != 0) {
319 fprintf(stderr, "Couldn't statfs() memory path: %s\n",
320 strerror(errno));
321 exit(1);
324 #define HUGETLBFS_MAGIC 0x958458f6
326 if (fs.f_type != HUGETLBFS_MAGIC) {
327 /* Explicit mempath, but it's ordinary pages */
328 return getpagesize();
331 /* It's hugepage, return the huge page size */
332 return fs.f_bsize;
335 static int find_max_supported_pagesize(Object *obj, void *opaque)
337 char *mem_path;
338 long *hpsize_min = opaque;
340 if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) {
341 mem_path = object_property_get_str(obj, "mem-path", NULL);
342 if (mem_path) {
343 long hpsize = gethugepagesize(mem_path);
344 if (hpsize < *hpsize_min) {
345 *hpsize_min = hpsize;
347 } else {
348 *hpsize_min = getpagesize();
352 return 0;
355 static long getrampagesize(void)
357 long hpsize = LONG_MAX;
358 Object *memdev_root;
360 if (mem_path) {
361 return gethugepagesize(mem_path);
364 /* it's possible we have memory-backend objects with
365 * hugepage-backed RAM. these may get mapped into system
366 * address space via -numa parameters or memory hotplug
367 * hooks. we want to take these into account, but we
368 * also want to make sure these supported hugepage
369 * sizes are applicable across the entire range of memory
370 * we may boot from, so we take the min across all
371 * backends, and assume normal pages in cases where a
372 * backend isn't backed by hugepages.
374 memdev_root = object_resolve_path("/objects", NULL);
375 if (!memdev_root) {
376 return getpagesize();
379 object_child_foreach(memdev_root, find_max_supported_pagesize, &hpsize);
381 return (hpsize == LONG_MAX) ? getpagesize() : hpsize;
384 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
386 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
387 return true;
390 return (1ul << shift) <= rampgsize;
393 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
395 static struct kvm_ppc_smmu_info smmu_info;
396 static bool has_smmu_info;
397 CPUPPCState *env = &cpu->env;
398 long rampagesize;
399 int iq, ik, jq, jk;
401 /* We only handle page sizes for 64-bit server guests for now */
402 if (!(env->mmu_model & POWERPC_MMU_64)) {
403 return;
406 /* Collect MMU info from kernel if not already */
407 if (!has_smmu_info) {
408 kvm_get_smmu_info(cpu, &smmu_info);
409 has_smmu_info = true;
412 rampagesize = getrampagesize();
414 /* Convert to QEMU form */
415 memset(&env->sps, 0, sizeof(env->sps));
417 /* If we have HV KVM, we need to forbid CI large pages if our
418 * host page size is smaller than 64K.
420 if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
421 env->ci_large_pages = getpagesize() >= 0x10000;
425 * XXX This loop should be an entry wide AND of the capabilities that
426 * the selected CPU has with the capabilities that KVM supports.
428 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
429 struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
430 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
432 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
433 ksps->page_shift)) {
434 continue;
436 qsps->page_shift = ksps->page_shift;
437 qsps->slb_enc = ksps->slb_enc;
438 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
439 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
440 ksps->enc[jk].page_shift)) {
441 continue;
443 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
444 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
445 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
446 break;
449 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
450 break;
453 env->slb_nr = smmu_info.slb_size;
454 if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
455 env->mmu_model &= ~POWERPC_MMU_1TSEG;
458 #else /* defined (TARGET_PPC64) */
460 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
464 #endif /* !defined (TARGET_PPC64) */
466 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
468 return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
471 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
472 * book3s supports only 1 watchpoint, so array size
473 * of 4 is sufficient for now.
475 #define MAX_HW_BKPTS 4
477 static struct HWBreakpoint {
478 target_ulong addr;
479 int type;
480 } hw_debug_points[MAX_HW_BKPTS];
482 static CPUWatchpoint hw_watchpoint;
484 /* Default there is no breakpoint and watchpoint supported */
485 static int max_hw_breakpoint;
486 static int max_hw_watchpoint;
487 static int nb_hw_breakpoint;
488 static int nb_hw_watchpoint;
490 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
492 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
493 max_hw_breakpoint = 2;
494 max_hw_watchpoint = 2;
497 if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
498 fprintf(stderr, "Error initializing h/w breakpoints\n");
499 return;
503 int kvm_arch_init_vcpu(CPUState *cs)
505 PowerPCCPU *cpu = POWERPC_CPU(cs);
506 CPUPPCState *cenv = &cpu->env;
507 int ret;
509 /* Gather server mmu info from KVM and update the CPU state */
510 kvm_fixup_page_sizes(cpu);
512 /* Synchronize sregs with kvm */
513 ret = kvm_arch_sync_sregs(cpu);
514 if (ret) {
515 return ret;
518 idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
520 /* Some targets support access to KVM's guest TLB. */
521 switch (cenv->mmu_model) {
522 case POWERPC_MMU_BOOKE206:
523 ret = kvm_booke206_tlb_init(cpu);
524 break;
525 default:
526 break;
529 kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
530 kvmppc_hw_debug_points_init(cenv);
532 return ret;
535 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
537 CPUPPCState *env = &cpu->env;
538 CPUState *cs = CPU(cpu);
539 struct kvm_dirty_tlb dirty_tlb;
540 unsigned char *bitmap;
541 int ret;
543 if (!env->kvm_sw_tlb) {
544 return;
547 bitmap = g_malloc((env->nb_tlb + 7) / 8);
548 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
550 dirty_tlb.bitmap = (uintptr_t)bitmap;
551 dirty_tlb.num_dirty = env->nb_tlb;
553 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
554 if (ret) {
555 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
556 __func__, strerror(-ret));
559 g_free(bitmap);
562 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
564 PowerPCCPU *cpu = POWERPC_CPU(cs);
565 CPUPPCState *env = &cpu->env;
566 union {
567 uint32_t u32;
568 uint64_t u64;
569 } val;
570 struct kvm_one_reg reg = {
571 .id = id,
572 .addr = (uintptr_t) &val,
574 int ret;
576 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
577 if (ret != 0) {
578 trace_kvm_failed_spr_get(spr, strerror(errno));
579 } else {
580 switch (id & KVM_REG_SIZE_MASK) {
581 case KVM_REG_SIZE_U32:
582 env->spr[spr] = val.u32;
583 break;
585 case KVM_REG_SIZE_U64:
586 env->spr[spr] = val.u64;
587 break;
589 default:
590 /* Don't handle this size yet */
591 abort();
596 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
598 PowerPCCPU *cpu = POWERPC_CPU(cs);
599 CPUPPCState *env = &cpu->env;
600 union {
601 uint32_t u32;
602 uint64_t u64;
603 } val;
604 struct kvm_one_reg reg = {
605 .id = id,
606 .addr = (uintptr_t) &val,
608 int ret;
610 switch (id & KVM_REG_SIZE_MASK) {
611 case KVM_REG_SIZE_U32:
612 val.u32 = env->spr[spr];
613 break;
615 case KVM_REG_SIZE_U64:
616 val.u64 = env->spr[spr];
617 break;
619 default:
620 /* Don't handle this size yet */
621 abort();
624 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
625 if (ret != 0) {
626 trace_kvm_failed_spr_set(spr, strerror(errno));
630 static int kvm_put_fp(CPUState *cs)
632 PowerPCCPU *cpu = POWERPC_CPU(cs);
633 CPUPPCState *env = &cpu->env;
634 struct kvm_one_reg reg;
635 int i;
636 int ret;
638 if (env->insns_flags & PPC_FLOAT) {
639 uint64_t fpscr = env->fpscr;
640 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
642 reg.id = KVM_REG_PPC_FPSCR;
643 reg.addr = (uintptr_t)&fpscr;
644 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
645 if (ret < 0) {
646 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
647 return ret;
650 for (i = 0; i < 32; i++) {
651 uint64_t vsr[2];
653 vsr[0] = float64_val(env->fpr[i]);
654 vsr[1] = env->vsr[i];
655 reg.addr = (uintptr_t) &vsr;
656 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
658 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
659 if (ret < 0) {
660 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
661 i, strerror(errno));
662 return ret;
667 if (env->insns_flags & PPC_ALTIVEC) {
668 reg.id = KVM_REG_PPC_VSCR;
669 reg.addr = (uintptr_t)&env->vscr;
670 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
671 if (ret < 0) {
672 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
673 return ret;
676 for (i = 0; i < 32; i++) {
677 reg.id = KVM_REG_PPC_VR(i);
678 reg.addr = (uintptr_t)&env->avr[i];
679 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
680 if (ret < 0) {
681 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
682 return ret;
687 return 0;
690 static int kvm_get_fp(CPUState *cs)
692 PowerPCCPU *cpu = POWERPC_CPU(cs);
693 CPUPPCState *env = &cpu->env;
694 struct kvm_one_reg reg;
695 int i;
696 int ret;
698 if (env->insns_flags & PPC_FLOAT) {
699 uint64_t fpscr;
700 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
702 reg.id = KVM_REG_PPC_FPSCR;
703 reg.addr = (uintptr_t)&fpscr;
704 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
705 if (ret < 0) {
706 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
707 return ret;
708 } else {
709 env->fpscr = fpscr;
712 for (i = 0; i < 32; i++) {
713 uint64_t vsr[2];
715 reg.addr = (uintptr_t) &vsr;
716 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
718 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
719 if (ret < 0) {
720 DPRINTF("Unable to get %s%d from KVM: %s\n",
721 vsx ? "VSR" : "FPR", i, strerror(errno));
722 return ret;
723 } else {
724 env->fpr[i] = vsr[0];
725 if (vsx) {
726 env->vsr[i] = vsr[1];
732 if (env->insns_flags & PPC_ALTIVEC) {
733 reg.id = KVM_REG_PPC_VSCR;
734 reg.addr = (uintptr_t)&env->vscr;
735 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
736 if (ret < 0) {
737 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
738 return ret;
741 for (i = 0; i < 32; i++) {
742 reg.id = KVM_REG_PPC_VR(i);
743 reg.addr = (uintptr_t)&env->avr[i];
744 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
745 if (ret < 0) {
746 DPRINTF("Unable to get VR%d from KVM: %s\n",
747 i, strerror(errno));
748 return ret;
753 return 0;
756 #if defined(TARGET_PPC64)
757 static int kvm_get_vpa(CPUState *cs)
759 PowerPCCPU *cpu = POWERPC_CPU(cs);
760 CPUPPCState *env = &cpu->env;
761 struct kvm_one_reg reg;
762 int ret;
764 reg.id = KVM_REG_PPC_VPA_ADDR;
765 reg.addr = (uintptr_t)&env->vpa_addr;
766 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
767 if (ret < 0) {
768 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
769 return ret;
772 assert((uintptr_t)&env->slb_shadow_size
773 == ((uintptr_t)&env->slb_shadow_addr + 8));
774 reg.id = KVM_REG_PPC_VPA_SLB;
775 reg.addr = (uintptr_t)&env->slb_shadow_addr;
776 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
777 if (ret < 0) {
778 DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
779 strerror(errno));
780 return ret;
783 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
784 reg.id = KVM_REG_PPC_VPA_DTL;
785 reg.addr = (uintptr_t)&env->dtl_addr;
786 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
787 if (ret < 0) {
788 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
789 strerror(errno));
790 return ret;
793 return 0;
796 static int kvm_put_vpa(CPUState *cs)
798 PowerPCCPU *cpu = POWERPC_CPU(cs);
799 CPUPPCState *env = &cpu->env;
800 struct kvm_one_reg reg;
801 int ret;
803 /* SLB shadow or DTL can't be registered unless a master VPA is
804 * registered. That means when restoring state, if a VPA *is*
805 * registered, we need to set that up first. If not, we need to
806 * deregister the others before deregistering the master VPA */
807 assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
809 if (env->vpa_addr) {
810 reg.id = KVM_REG_PPC_VPA_ADDR;
811 reg.addr = (uintptr_t)&env->vpa_addr;
812 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
813 if (ret < 0) {
814 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
815 return ret;
819 assert((uintptr_t)&env->slb_shadow_size
820 == ((uintptr_t)&env->slb_shadow_addr + 8));
821 reg.id = KVM_REG_PPC_VPA_SLB;
822 reg.addr = (uintptr_t)&env->slb_shadow_addr;
823 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
824 if (ret < 0) {
825 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
826 return ret;
829 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
830 reg.id = KVM_REG_PPC_VPA_DTL;
831 reg.addr = (uintptr_t)&env->dtl_addr;
832 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
833 if (ret < 0) {
834 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
835 strerror(errno));
836 return ret;
839 if (!env->vpa_addr) {
840 reg.id = KVM_REG_PPC_VPA_ADDR;
841 reg.addr = (uintptr_t)&env->vpa_addr;
842 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
843 if (ret < 0) {
844 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
845 return ret;
849 return 0;
851 #endif /* TARGET_PPC64 */
853 int kvm_arch_put_registers(CPUState *cs, int level)
855 PowerPCCPU *cpu = POWERPC_CPU(cs);
856 CPUPPCState *env = &cpu->env;
857 struct kvm_regs regs;
858 int ret;
859 int i;
861 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
862 if (ret < 0) {
863 return ret;
866 regs.ctr = env->ctr;
867 regs.lr = env->lr;
868 regs.xer = cpu_read_xer(env);
869 regs.msr = env->msr;
870 regs.pc = env->nip;
872 regs.srr0 = env->spr[SPR_SRR0];
873 regs.srr1 = env->spr[SPR_SRR1];
875 regs.sprg0 = env->spr[SPR_SPRG0];
876 regs.sprg1 = env->spr[SPR_SPRG1];
877 regs.sprg2 = env->spr[SPR_SPRG2];
878 regs.sprg3 = env->spr[SPR_SPRG3];
879 regs.sprg4 = env->spr[SPR_SPRG4];
880 regs.sprg5 = env->spr[SPR_SPRG5];
881 regs.sprg6 = env->spr[SPR_SPRG6];
882 regs.sprg7 = env->spr[SPR_SPRG7];
884 regs.pid = env->spr[SPR_BOOKE_PID];
886 for (i = 0;i < 32; i++)
887 regs.gpr[i] = env->gpr[i];
889 regs.cr = 0;
890 for (i = 0; i < 8; i++) {
891 regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
894 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
895 if (ret < 0)
896 return ret;
898 kvm_put_fp(cs);
900 if (env->tlb_dirty) {
901 kvm_sw_tlb_put(cpu);
902 env->tlb_dirty = false;
905 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
906 struct kvm_sregs sregs;
908 sregs.pvr = env->spr[SPR_PVR];
910 sregs.u.s.sdr1 = env->spr[SPR_SDR1];
912 /* Sync SLB */
913 #ifdef TARGET_PPC64
914 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
915 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
916 if (env->slb[i].esid & SLB_ESID_V) {
917 sregs.u.s.ppc64.slb[i].slbe |= i;
919 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
921 #endif
923 /* Sync SRs */
924 for (i = 0; i < 16; i++) {
925 sregs.u.s.ppc32.sr[i] = env->sr[i];
928 /* Sync BATs */
929 for (i = 0; i < 8; i++) {
930 /* Beware. We have to swap upper and lower bits here */
931 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
932 | env->DBAT[1][i];
933 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
934 | env->IBAT[1][i];
937 ret = kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
938 if (ret) {
939 return ret;
943 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
944 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
947 if (cap_one_reg) {
948 int i;
950 /* We deliberately ignore errors here, for kernels which have
951 * the ONE_REG calls, but don't support the specific
952 * registers, there's a reasonable chance things will still
953 * work, at least until we try to migrate. */
954 for (i = 0; i < 1024; i++) {
955 uint64_t id = env->spr_cb[i].one_reg_id;
957 if (id != 0) {
958 kvm_put_one_spr(cs, id, i);
962 #ifdef TARGET_PPC64
963 if (msr_ts) {
964 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
965 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
967 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
968 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
970 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
971 kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
972 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
973 kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
974 kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
975 kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
976 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
977 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
978 kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
979 kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
982 if (cap_papr) {
983 if (kvm_put_vpa(cs) < 0) {
984 DPRINTF("Warning: Unable to set VPA information to KVM\n");
988 kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
989 #endif /* TARGET_PPC64 */
992 return ret;
995 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
997 env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1000 int kvm_arch_get_registers(CPUState *cs)
1002 PowerPCCPU *cpu = POWERPC_CPU(cs);
1003 CPUPPCState *env = &cpu->env;
1004 struct kvm_regs regs;
1005 struct kvm_sregs sregs;
1006 uint32_t cr;
1007 int i, ret;
1009 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1010 if (ret < 0)
1011 return ret;
1013 cr = regs.cr;
1014 for (i = 7; i >= 0; i--) {
1015 env->crf[i] = cr & 15;
1016 cr >>= 4;
1019 env->ctr = regs.ctr;
1020 env->lr = regs.lr;
1021 cpu_write_xer(env, regs.xer);
1022 env->msr = regs.msr;
1023 env->nip = regs.pc;
1025 env->spr[SPR_SRR0] = regs.srr0;
1026 env->spr[SPR_SRR1] = regs.srr1;
1028 env->spr[SPR_SPRG0] = regs.sprg0;
1029 env->spr[SPR_SPRG1] = regs.sprg1;
1030 env->spr[SPR_SPRG2] = regs.sprg2;
1031 env->spr[SPR_SPRG3] = regs.sprg3;
1032 env->spr[SPR_SPRG4] = regs.sprg4;
1033 env->spr[SPR_SPRG5] = regs.sprg5;
1034 env->spr[SPR_SPRG6] = regs.sprg6;
1035 env->spr[SPR_SPRG7] = regs.sprg7;
1037 env->spr[SPR_BOOKE_PID] = regs.pid;
1039 for (i = 0;i < 32; i++)
1040 env->gpr[i] = regs.gpr[i];
1042 kvm_get_fp(cs);
1044 if (cap_booke_sregs) {
1045 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
1046 if (ret < 0) {
1047 return ret;
1050 if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1051 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1052 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1053 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1054 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1055 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1056 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1057 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1058 env->spr[SPR_DECR] = sregs.u.e.dec;
1059 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1060 env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1061 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1064 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1065 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1066 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1067 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1068 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1069 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1072 if (sregs.u.e.features & KVM_SREGS_E_64) {
1073 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1076 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1077 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1080 if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1081 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1082 kvm_sync_excp(env, POWERPC_EXCP_CRITICAL, SPR_BOOKE_IVOR0);
1083 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1084 kvm_sync_excp(env, POWERPC_EXCP_MCHECK, SPR_BOOKE_IVOR1);
1085 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1086 kvm_sync_excp(env, POWERPC_EXCP_DSI, SPR_BOOKE_IVOR2);
1087 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1088 kvm_sync_excp(env, POWERPC_EXCP_ISI, SPR_BOOKE_IVOR3);
1089 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1090 kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL, SPR_BOOKE_IVOR4);
1091 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1092 kvm_sync_excp(env, POWERPC_EXCP_ALIGN, SPR_BOOKE_IVOR5);
1093 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1094 kvm_sync_excp(env, POWERPC_EXCP_PROGRAM, SPR_BOOKE_IVOR6);
1095 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1096 kvm_sync_excp(env, POWERPC_EXCP_FPU, SPR_BOOKE_IVOR7);
1097 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1098 kvm_sync_excp(env, POWERPC_EXCP_SYSCALL, SPR_BOOKE_IVOR8);
1099 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1100 kvm_sync_excp(env, POWERPC_EXCP_APU, SPR_BOOKE_IVOR9);
1101 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1102 kvm_sync_excp(env, POWERPC_EXCP_DECR, SPR_BOOKE_IVOR10);
1103 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1104 kvm_sync_excp(env, POWERPC_EXCP_FIT, SPR_BOOKE_IVOR11);
1105 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1106 kvm_sync_excp(env, POWERPC_EXCP_WDT, SPR_BOOKE_IVOR12);
1107 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1108 kvm_sync_excp(env, POWERPC_EXCP_DTLB, SPR_BOOKE_IVOR13);
1109 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1110 kvm_sync_excp(env, POWERPC_EXCP_ITLB, SPR_BOOKE_IVOR14);
1111 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1112 kvm_sync_excp(env, POWERPC_EXCP_DEBUG, SPR_BOOKE_IVOR15);
1114 if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1115 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1116 kvm_sync_excp(env, POWERPC_EXCP_SPEU, SPR_BOOKE_IVOR32);
1117 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1118 kvm_sync_excp(env, POWERPC_EXCP_EFPDI, SPR_BOOKE_IVOR33);
1119 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1120 kvm_sync_excp(env, POWERPC_EXCP_EFPRI, SPR_BOOKE_IVOR34);
1123 if (sregs.u.e.features & KVM_SREGS_E_PM) {
1124 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1125 kvm_sync_excp(env, POWERPC_EXCP_EPERFM, SPR_BOOKE_IVOR35);
1128 if (sregs.u.e.features & KVM_SREGS_E_PC) {
1129 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1130 kvm_sync_excp(env, POWERPC_EXCP_DOORI, SPR_BOOKE_IVOR36);
1131 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1132 kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1136 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1137 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1138 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1139 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1140 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1141 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1142 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1143 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1144 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1145 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1146 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1149 if (sregs.u.e.features & KVM_SREGS_EXP) {
1150 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1153 if (sregs.u.e.features & KVM_SREGS_E_PD) {
1154 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1155 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1158 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1159 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1160 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1161 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1163 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1164 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1165 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1170 if (cap_segstate) {
1171 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
1172 if (ret < 0) {
1173 return ret;
1176 if (!env->external_htab) {
1177 ppc_store_sdr1(env, sregs.u.s.sdr1);
1180 /* Sync SLB */
1181 #ifdef TARGET_PPC64
1183 * The packed SLB array we get from KVM_GET_SREGS only contains
1184 * information about valid entries. So we flush our internal
1185 * copy to get rid of stale ones, then put all valid SLB entries
1186 * back in.
1188 memset(env->slb, 0, sizeof(env->slb));
1189 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1190 target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1191 target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1193 * Only restore valid entries
1195 if (rb & SLB_ESID_V) {
1196 ppc_store_slb(env, rb, rs);
1199 #endif
1201 /* Sync SRs */
1202 for (i = 0; i < 16; i++) {
1203 env->sr[i] = sregs.u.s.ppc32.sr[i];
1206 /* Sync BATs */
1207 for (i = 0; i < 8; i++) {
1208 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1209 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1210 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1211 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1215 if (cap_hior) {
1216 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1219 if (cap_one_reg) {
1220 int i;
1222 /* We deliberately ignore errors here, for kernels which have
1223 * the ONE_REG calls, but don't support the specific
1224 * registers, there's a reasonable chance things will still
1225 * work, at least until we try to migrate. */
1226 for (i = 0; i < 1024; i++) {
1227 uint64_t id = env->spr_cb[i].one_reg_id;
1229 if (id != 0) {
1230 kvm_get_one_spr(cs, id, i);
1234 #ifdef TARGET_PPC64
1235 if (msr_ts) {
1236 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1237 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1239 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1240 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1242 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1243 kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1244 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1245 kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1246 kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1247 kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1248 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1249 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1250 kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1251 kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1254 if (cap_papr) {
1255 if (kvm_get_vpa(cs) < 0) {
1256 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1260 kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1261 #endif
1264 return 0;
1267 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1269 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1271 if (irq != PPC_INTERRUPT_EXT) {
1272 return 0;
1275 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1276 return 0;
1279 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1281 return 0;
1284 #if defined(TARGET_PPCEMB)
1285 #define PPC_INPUT_INT PPC40x_INPUT_INT
1286 #elif defined(TARGET_PPC64)
1287 #define PPC_INPUT_INT PPC970_INPUT_INT
1288 #else
1289 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1290 #endif
1292 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1294 PowerPCCPU *cpu = POWERPC_CPU(cs);
1295 CPUPPCState *env = &cpu->env;
1296 int r;
1297 unsigned irq;
1299 qemu_mutex_lock_iothread();
1301 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1302 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1303 if (!cap_interrupt_level &&
1304 run->ready_for_interrupt_injection &&
1305 (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1306 (env->irq_input_state & (1<<PPC_INPUT_INT)))
1308 /* For now KVM disregards the 'irq' argument. However, in the
1309 * future KVM could cache it in-kernel to avoid a heavyweight exit
1310 * when reading the UIC.
1312 irq = KVM_INTERRUPT_SET;
1314 DPRINTF("injected interrupt %d\n", irq);
1315 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1316 if (r < 0) {
1317 printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1320 /* Always wake up soon in case the interrupt was level based */
1321 timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1322 (get_ticks_per_sec() / 50));
1325 /* We don't know if there are more interrupts pending after this. However,
1326 * the guest will return to userspace in the course of handling this one
1327 * anyways, so we will get a chance to deliver the rest. */
1329 qemu_mutex_unlock_iothread();
1332 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1334 return MEMTXATTRS_UNSPECIFIED;
1337 int kvm_arch_process_async_events(CPUState *cs)
1339 return cs->halted;
1342 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1344 CPUState *cs = CPU(cpu);
1345 CPUPPCState *env = &cpu->env;
1347 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1348 cs->halted = 1;
1349 cs->exception_index = EXCP_HLT;
1352 return 0;
1355 /* map dcr access to existing qemu dcr emulation */
1356 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1358 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1359 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1361 return 0;
1364 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1366 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1367 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1369 return 0;
1372 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1374 /* Mixed endian case is not handled */
1375 uint32_t sc = debug_inst_opcode;
1377 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1378 sizeof(sc), 0) ||
1379 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1380 return -EINVAL;
1383 return 0;
1386 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1388 uint32_t sc;
1390 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1391 sc != debug_inst_opcode ||
1392 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1393 sizeof(sc), 1)) {
1394 return -EINVAL;
1397 return 0;
1400 static int find_hw_breakpoint(target_ulong addr, int type)
1402 int n;
1404 assert((nb_hw_breakpoint + nb_hw_watchpoint)
1405 <= ARRAY_SIZE(hw_debug_points));
1407 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1408 if (hw_debug_points[n].addr == addr &&
1409 hw_debug_points[n].type == type) {
1410 return n;
1414 return -1;
1417 static int find_hw_watchpoint(target_ulong addr, int *flag)
1419 int n;
1421 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1422 if (n >= 0) {
1423 *flag = BP_MEM_ACCESS;
1424 return n;
1427 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1428 if (n >= 0) {
1429 *flag = BP_MEM_WRITE;
1430 return n;
1433 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1434 if (n >= 0) {
1435 *flag = BP_MEM_READ;
1436 return n;
1439 return -1;
1442 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1443 target_ulong len, int type)
1445 if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1446 return -ENOBUFS;
1449 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1450 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1452 switch (type) {
1453 case GDB_BREAKPOINT_HW:
1454 if (nb_hw_breakpoint >= max_hw_breakpoint) {
1455 return -ENOBUFS;
1458 if (find_hw_breakpoint(addr, type) >= 0) {
1459 return -EEXIST;
1462 nb_hw_breakpoint++;
1463 break;
1465 case GDB_WATCHPOINT_WRITE:
1466 case GDB_WATCHPOINT_READ:
1467 case GDB_WATCHPOINT_ACCESS:
1468 if (nb_hw_watchpoint >= max_hw_watchpoint) {
1469 return -ENOBUFS;
1472 if (find_hw_breakpoint(addr, type) >= 0) {
1473 return -EEXIST;
1476 nb_hw_watchpoint++;
1477 break;
1479 default:
1480 return -ENOSYS;
1483 return 0;
1486 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1487 target_ulong len, int type)
1489 int n;
1491 n = find_hw_breakpoint(addr, type);
1492 if (n < 0) {
1493 return -ENOENT;
1496 switch (type) {
1497 case GDB_BREAKPOINT_HW:
1498 nb_hw_breakpoint--;
1499 break;
1501 case GDB_WATCHPOINT_WRITE:
1502 case GDB_WATCHPOINT_READ:
1503 case GDB_WATCHPOINT_ACCESS:
1504 nb_hw_watchpoint--;
1505 break;
1507 default:
1508 return -ENOSYS;
1510 hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1512 return 0;
1515 void kvm_arch_remove_all_hw_breakpoints(void)
1517 nb_hw_breakpoint = nb_hw_watchpoint = 0;
1520 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1522 int n;
1524 /* Software Breakpoint updates */
1525 if (kvm_sw_breakpoints_active(cs)) {
1526 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1529 assert((nb_hw_breakpoint + nb_hw_watchpoint)
1530 <= ARRAY_SIZE(hw_debug_points));
1531 assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1533 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1534 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1535 memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1536 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1537 switch (hw_debug_points[n].type) {
1538 case GDB_BREAKPOINT_HW:
1539 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1540 break;
1541 case GDB_WATCHPOINT_WRITE:
1542 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1543 break;
1544 case GDB_WATCHPOINT_READ:
1545 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1546 break;
1547 case GDB_WATCHPOINT_ACCESS:
1548 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1549 KVMPPC_DEBUG_WATCH_READ;
1550 break;
1551 default:
1552 cpu_abort(cs, "Unsupported breakpoint type\n");
1554 dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1559 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1561 CPUState *cs = CPU(cpu);
1562 CPUPPCState *env = &cpu->env;
1563 struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1564 int handle = 0;
1565 int n;
1566 int flag = 0;
1568 if (cs->singlestep_enabled) {
1569 handle = 1;
1570 } else if (arch_info->status) {
1571 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1572 if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1573 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1574 if (n >= 0) {
1575 handle = 1;
1577 } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1578 KVMPPC_DEBUG_WATCH_WRITE)) {
1579 n = find_hw_watchpoint(arch_info->address, &flag);
1580 if (n >= 0) {
1581 handle = 1;
1582 cs->watchpoint_hit = &hw_watchpoint;
1583 hw_watchpoint.vaddr = hw_debug_points[n].addr;
1584 hw_watchpoint.flags = flag;
1588 } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1589 handle = 1;
1590 } else {
1591 /* QEMU is not able to handle debug exception, so inject
1592 * program exception to guest;
1593 * Yes program exception NOT debug exception !!
1594 * When QEMU is using debug resources then debug exception must
1595 * be always set. To achieve this we set MSR_DE and also set
1596 * MSRP_DEP so guest cannot change MSR_DE.
1597 * When emulating debug resource for guest we want guest
1598 * to control MSR_DE (enable/disable debug interrupt on need).
1599 * Supporting both configurations are NOT possible.
1600 * So the result is that we cannot share debug resources
1601 * between QEMU and Guest on BOOKE architecture.
1602 * In the current design QEMU gets the priority over guest,
1603 * this means that if QEMU is using debug resources then guest
1604 * cannot use them;
1605 * For software breakpoint QEMU uses a privileged instruction;
1606 * So there cannot be any reason that we are here for guest
1607 * set debug exception, only possibility is guest executed a
1608 * privileged / illegal instruction and that's why we are
1609 * injecting a program interrupt.
1612 cpu_synchronize_state(cs);
1613 /* env->nip is PC, so increment this by 4 to use
1614 * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1616 env->nip += 4;
1617 cs->exception_index = POWERPC_EXCP_PROGRAM;
1618 env->error_code = POWERPC_EXCP_INVAL;
1619 ppc_cpu_do_interrupt(cs);
1622 return handle;
1625 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1627 PowerPCCPU *cpu = POWERPC_CPU(cs);
1628 CPUPPCState *env = &cpu->env;
1629 int ret;
1631 qemu_mutex_lock_iothread();
1633 switch (run->exit_reason) {
1634 case KVM_EXIT_DCR:
1635 if (run->dcr.is_write) {
1636 DPRINTF("handle dcr write\n");
1637 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1638 } else {
1639 DPRINTF("handle dcr read\n");
1640 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1642 break;
1643 case KVM_EXIT_HLT:
1644 DPRINTF("handle halt\n");
1645 ret = kvmppc_handle_halt(cpu);
1646 break;
1647 #if defined(TARGET_PPC64)
1648 case KVM_EXIT_PAPR_HCALL:
1649 DPRINTF("handle PAPR hypercall\n");
1650 run->papr_hcall.ret = spapr_hypercall(cpu,
1651 run->papr_hcall.nr,
1652 run->papr_hcall.args);
1653 ret = 0;
1654 break;
1655 #endif
1656 case KVM_EXIT_EPR:
1657 DPRINTF("handle epr\n");
1658 run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1659 ret = 0;
1660 break;
1661 case KVM_EXIT_WATCHDOG:
1662 DPRINTF("handle watchdog expiry\n");
1663 watchdog_perform_action();
1664 ret = 0;
1665 break;
1667 case KVM_EXIT_DEBUG:
1668 DPRINTF("handle debug exception\n");
1669 if (kvm_handle_debug(cpu, run)) {
1670 ret = EXCP_DEBUG;
1671 break;
1673 /* re-enter, this exception was guest-internal */
1674 ret = 0;
1675 break;
1677 default:
1678 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1679 ret = -1;
1680 break;
1683 qemu_mutex_unlock_iothread();
1684 return ret;
1687 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1689 CPUState *cs = CPU(cpu);
1690 uint32_t bits = tsr_bits;
1691 struct kvm_one_reg reg = {
1692 .id = KVM_REG_PPC_OR_TSR,
1693 .addr = (uintptr_t) &bits,
1696 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1699 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1702 CPUState *cs = CPU(cpu);
1703 uint32_t bits = tsr_bits;
1704 struct kvm_one_reg reg = {
1705 .id = KVM_REG_PPC_CLEAR_TSR,
1706 .addr = (uintptr_t) &bits,
1709 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1712 int kvmppc_set_tcr(PowerPCCPU *cpu)
1714 CPUState *cs = CPU(cpu);
1715 CPUPPCState *env = &cpu->env;
1716 uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1718 struct kvm_one_reg reg = {
1719 .id = KVM_REG_PPC_TCR,
1720 .addr = (uintptr_t) &tcr,
1723 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1726 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1728 CPUState *cs = CPU(cpu);
1729 int ret;
1731 if (!kvm_enabled()) {
1732 return -1;
1735 if (!cap_ppc_watchdog) {
1736 printf("warning: KVM does not support watchdog");
1737 return -1;
1740 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1741 if (ret < 0) {
1742 fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1743 __func__, strerror(-ret));
1744 return ret;
1747 return ret;
1750 static int read_cpuinfo(const char *field, char *value, int len)
1752 FILE *f;
1753 int ret = -1;
1754 int field_len = strlen(field);
1755 char line[512];
1757 f = fopen("/proc/cpuinfo", "r");
1758 if (!f) {
1759 return -1;
1762 do {
1763 if (!fgets(line, sizeof(line), f)) {
1764 break;
1766 if (!strncmp(line, field, field_len)) {
1767 pstrcpy(value, len, line);
1768 ret = 0;
1769 break;
1771 } while(*line);
1773 fclose(f);
1775 return ret;
1778 uint32_t kvmppc_get_tbfreq(void)
1780 char line[512];
1781 char *ns;
1782 uint32_t retval = get_ticks_per_sec();
1784 if (read_cpuinfo("timebase", line, sizeof(line))) {
1785 return retval;
1788 if (!(ns = strchr(line, ':'))) {
1789 return retval;
1792 ns++;
1794 return atoi(ns);
1797 bool kvmppc_get_host_serial(char **value)
1799 return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1800 NULL);
1803 bool kvmppc_get_host_model(char **value)
1805 return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1808 /* Try to find a device tree node for a CPU with clock-frequency property */
1809 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1811 struct dirent *dirp;
1812 DIR *dp;
1814 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1815 printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1816 return -1;
1819 buf[0] = '\0';
1820 while ((dirp = readdir(dp)) != NULL) {
1821 FILE *f;
1822 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1823 dirp->d_name);
1824 f = fopen(buf, "r");
1825 if (f) {
1826 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1827 fclose(f);
1828 break;
1830 buf[0] = '\0';
1832 closedir(dp);
1833 if (buf[0] == '\0') {
1834 printf("Unknown host!\n");
1835 return -1;
1838 return 0;
1841 static uint64_t kvmppc_read_int_dt(const char *filename)
1843 union {
1844 uint32_t v32;
1845 uint64_t v64;
1846 } u;
1847 FILE *f;
1848 int len;
1850 f = fopen(filename, "rb");
1851 if (!f) {
1852 return -1;
1855 len = fread(&u, 1, sizeof(u), f);
1856 fclose(f);
1857 switch (len) {
1858 case 4:
1859 /* property is a 32-bit quantity */
1860 return be32_to_cpu(u.v32);
1861 case 8:
1862 return be64_to_cpu(u.v64);
1865 return 0;
1868 /* Read a CPU node property from the host device tree that's a single
1869 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1870 * (can't find or open the property, or doesn't understand the
1871 * format) */
1872 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1874 char buf[PATH_MAX], *tmp;
1875 uint64_t val;
1877 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1878 return -1;
1881 tmp = g_strdup_printf("%s/%s", buf, propname);
1882 val = kvmppc_read_int_dt(tmp);
1883 g_free(tmp);
1885 return val;
1888 uint64_t kvmppc_get_clockfreq(void)
1890 return kvmppc_read_int_cpu_dt("clock-frequency");
1893 uint32_t kvmppc_get_vmx(void)
1895 return kvmppc_read_int_cpu_dt("ibm,vmx");
1898 uint32_t kvmppc_get_dfp(void)
1900 return kvmppc_read_int_cpu_dt("ibm,dfp");
1903 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1905 PowerPCCPU *cpu = ppc_env_get_cpu(env);
1906 CPUState *cs = CPU(cpu);
1908 if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1909 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1910 return 0;
1913 return 1;
1916 int kvmppc_get_hasidle(CPUPPCState *env)
1918 struct kvm_ppc_pvinfo pvinfo;
1920 if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1921 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1922 return 1;
1925 return 0;
1928 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1930 uint32_t *hc = (uint32_t*)buf;
1931 struct kvm_ppc_pvinfo pvinfo;
1933 if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1934 memcpy(buf, pvinfo.hcall, buf_len);
1935 return 0;
1939 * Fallback to always fail hypercalls regardless of endianness:
1941 * tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
1942 * li r3, -1
1943 * b .+8 (becomes nop in wrong endian)
1944 * bswap32(li r3, -1)
1947 hc[0] = cpu_to_be32(0x08000048);
1948 hc[1] = cpu_to_be32(0x3860ffff);
1949 hc[2] = cpu_to_be32(0x48000008);
1950 hc[3] = cpu_to_be32(bswap32(0x3860ffff));
1952 return 0;
1955 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
1957 return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
1960 void kvmppc_enable_logical_ci_hcalls(void)
1963 * FIXME: it would be nice if we could detect the cases where
1964 * we're using a device which requires the in kernel
1965 * implementation of these hcalls, but the kernel lacks them and
1966 * produce a warning.
1968 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
1969 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
1972 void kvmppc_enable_set_mode_hcall(void)
1974 kvmppc_enable_hcall(kvm_state, H_SET_MODE);
1977 void kvmppc_set_papr(PowerPCCPU *cpu)
1979 CPUState *cs = CPU(cpu);
1980 int ret;
1982 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
1983 if (ret) {
1984 cpu_abort(cs, "This KVM version does not support PAPR\n");
1987 /* Update the capability flag so we sync the right information
1988 * with kvm */
1989 cap_papr = 1;
1992 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t cpu_version)
1994 return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &cpu_version);
1997 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
1999 CPUState *cs = CPU(cpu);
2000 int ret;
2002 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2003 if (ret && mpic_proxy) {
2004 cpu_abort(cs, "This KVM version does not support EPR\n");
2008 int kvmppc_smt_threads(void)
2010 return cap_ppc_smt ? cap_ppc_smt : 1;
2013 #ifdef TARGET_PPC64
2014 off_t kvmppc_alloc_rma(void **rma)
2016 off_t size;
2017 int fd;
2018 struct kvm_allocate_rma ret;
2020 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2021 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2022 * not necessary on this hardware
2023 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2025 * FIXME: We should allow the user to force contiguous RMA
2026 * allocation in the cap_ppc_rma==1 case.
2028 if (cap_ppc_rma < 2) {
2029 return 0;
2032 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
2033 if (fd < 0) {
2034 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2035 strerror(errno));
2036 return -1;
2039 size = MIN(ret.rma_size, 256ul << 20);
2041 *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2042 if (*rma == MAP_FAILED) {
2043 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
2044 return -1;
2047 return size;
2050 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2052 struct kvm_ppc_smmu_info info;
2053 long rampagesize, best_page_shift;
2054 int i;
2056 if (cap_ppc_rma >= 2) {
2057 return current_size;
2060 /* Find the largest hardware supported page size that's less than
2061 * or equal to the (logical) backing page size of guest RAM */
2062 kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2063 rampagesize = getrampagesize();
2064 best_page_shift = 0;
2066 for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2067 struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2069 if (!sps->page_shift) {
2070 continue;
2073 if ((sps->page_shift > best_page_shift)
2074 && ((1UL << sps->page_shift) <= rampagesize)) {
2075 best_page_shift = sps->page_shift;
2079 return MIN(current_size,
2080 1ULL << (best_page_shift + hash_shift - 7));
2082 #endif
2084 bool kvmppc_spapr_use_multitce(void)
2086 return cap_spapr_multitce;
2089 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
2090 bool need_vfio)
2092 struct kvm_create_spapr_tce args = {
2093 .liobn = liobn,
2094 .window_size = window_size,
2096 long len;
2097 int fd;
2098 void *table;
2100 /* Must set fd to -1 so we don't try to munmap when called for
2101 * destroying the table, which the upper layers -will- do
2103 *pfd = -1;
2104 if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2105 return NULL;
2108 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2109 if (fd < 0) {
2110 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2111 liobn);
2112 return NULL;
2115 len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
2116 /* FIXME: round this up to page size */
2118 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2119 if (table == MAP_FAILED) {
2120 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2121 liobn);
2122 close(fd);
2123 return NULL;
2126 *pfd = fd;
2127 return table;
2130 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2132 long len;
2134 if (fd < 0) {
2135 return -1;
2138 len = nb_table * sizeof(uint64_t);
2139 if ((munmap(table, len) < 0) ||
2140 (close(fd) < 0)) {
2141 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2142 strerror(errno));
2143 /* Leak the table */
2146 return 0;
2149 int kvmppc_reset_htab(int shift_hint)
2151 uint32_t shift = shift_hint;
2153 if (!kvm_enabled()) {
2154 /* Full emulation, tell caller to allocate htab itself */
2155 return 0;
2157 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2158 int ret;
2159 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2160 if (ret == -ENOTTY) {
2161 /* At least some versions of PR KVM advertise the
2162 * capability, but don't implement the ioctl(). Oops.
2163 * Return 0 so that we allocate the htab in qemu, as is
2164 * correct for PR. */
2165 return 0;
2166 } else if (ret < 0) {
2167 return ret;
2169 return shift;
2172 /* We have a kernel that predates the htab reset calls. For PR
2173 * KVM, we need to allocate the htab ourselves, for an HV KVM of
2174 * this era, it has allocated a 16MB fixed size hash table
2175 * already. Kernels of this era have the GET_PVINFO capability
2176 * only on PR, so we use this hack to determine the right
2177 * answer */
2178 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
2179 /* PR - tell caller to allocate htab */
2180 return 0;
2181 } else {
2182 /* HV - assume 16MB kernel allocated htab */
2183 return 24;
2187 static inline uint32_t mfpvr(void)
2189 uint32_t pvr;
2191 asm ("mfpvr %0"
2192 : "=r"(pvr));
2193 return pvr;
2196 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2198 if (on) {
2199 *word |= flags;
2200 } else {
2201 *word &= ~flags;
2205 static void kvmppc_host_cpu_initfn(Object *obj)
2207 assert(kvm_enabled());
2210 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2212 DeviceClass *dc = DEVICE_CLASS(oc);
2213 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2214 uint32_t vmx = kvmppc_get_vmx();
2215 uint32_t dfp = kvmppc_get_dfp();
2216 uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2217 uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2219 /* Now fix up the class with information we can query from the host */
2220 pcc->pvr = mfpvr();
2222 if (vmx != -1) {
2223 /* Only override when we know what the host supports */
2224 alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
2225 alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
2227 if (dfp != -1) {
2228 /* Only override when we know what the host supports */
2229 alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
2232 if (dcache_size != -1) {
2233 pcc->l1_dcache_size = dcache_size;
2236 if (icache_size != -1) {
2237 pcc->l1_icache_size = icache_size;
2240 /* Reason: kvmppc_host_cpu_initfn() dies when !kvm_enabled() */
2241 dc->cannot_destroy_with_object_finalize_yet = true;
2244 bool kvmppc_has_cap_epr(void)
2246 return cap_epr;
2249 bool kvmppc_has_cap_htab_fd(void)
2251 return cap_htab_fd;
2254 bool kvmppc_has_cap_fixup_hcalls(void)
2256 return cap_fixup_hcalls;
2259 static PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc)
2261 ObjectClass *oc = OBJECT_CLASS(pcc);
2263 while (oc && !object_class_is_abstract(oc)) {
2264 oc = object_class_get_parent(oc);
2266 assert(oc);
2268 return POWERPC_CPU_CLASS(oc);
2271 static int kvm_ppc_register_host_cpu_type(void)
2273 TypeInfo type_info = {
2274 .name = TYPE_HOST_POWERPC_CPU,
2275 .instance_init = kvmppc_host_cpu_initfn,
2276 .class_init = kvmppc_host_cpu_class_init,
2278 uint32_t host_pvr = mfpvr();
2279 PowerPCCPUClass *pvr_pcc;
2280 DeviceClass *dc;
2282 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2283 if (pvr_pcc == NULL) {
2284 pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2286 if (pvr_pcc == NULL) {
2287 return -1;
2289 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2290 type_register(&type_info);
2292 /* Register generic family CPU class for a family */
2293 pvr_pcc = ppc_cpu_get_family_class(pvr_pcc);
2294 dc = DEVICE_CLASS(pvr_pcc);
2295 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2296 type_info.name = g_strdup_printf("%s-"TYPE_POWERPC_CPU, dc->desc);
2297 type_register(&type_info);
2299 return 0;
2302 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2304 struct kvm_rtas_token_args args = {
2305 .token = token,
2308 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2309 return -ENOENT;
2312 strncpy(args.name, function, sizeof(args.name));
2314 return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2317 int kvmppc_get_htab_fd(bool write)
2319 struct kvm_get_htab_fd s = {
2320 .flags = write ? KVM_GET_HTAB_WRITE : 0,
2321 .start_index = 0,
2324 if (!cap_htab_fd) {
2325 fprintf(stderr, "KVM version doesn't support saving the hash table\n");
2326 return -1;
2329 return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2332 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2334 int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2335 uint8_t buf[bufsize];
2336 ssize_t rc;
2338 do {
2339 rc = read(fd, buf, bufsize);
2340 if (rc < 0) {
2341 fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2342 strerror(errno));
2343 return rc;
2344 } else if (rc) {
2345 uint8_t *buffer = buf;
2346 ssize_t n = rc;
2347 while (n) {
2348 struct kvm_get_htab_header *head =
2349 (struct kvm_get_htab_header *) buffer;
2350 size_t chunksize = sizeof(*head) +
2351 HASH_PTE_SIZE_64 * head->n_valid;
2353 qemu_put_be32(f, head->index);
2354 qemu_put_be16(f, head->n_valid);
2355 qemu_put_be16(f, head->n_invalid);
2356 qemu_put_buffer(f, (void *)(head + 1),
2357 HASH_PTE_SIZE_64 * head->n_valid);
2359 buffer += chunksize;
2360 n -= chunksize;
2363 } while ((rc != 0)
2364 && ((max_ns < 0)
2365 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2367 return (rc == 0) ? 1 : 0;
2370 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2371 uint16_t n_valid, uint16_t n_invalid)
2373 struct kvm_get_htab_header *buf;
2374 size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2375 ssize_t rc;
2377 buf = alloca(chunksize);
2378 buf->index = index;
2379 buf->n_valid = n_valid;
2380 buf->n_invalid = n_invalid;
2382 qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2384 rc = write(fd, buf, chunksize);
2385 if (rc < 0) {
2386 fprintf(stderr, "Error writing KVM hash table: %s\n",
2387 strerror(errno));
2388 return rc;
2390 if (rc != chunksize) {
2391 /* We should never get a short write on a single chunk */
2392 fprintf(stderr, "Short write, restoring KVM hash table\n");
2393 return -1;
2395 return 0;
2398 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2400 return true;
2403 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
2405 return 1;
2408 int kvm_arch_on_sigbus(int code, void *addr)
2410 return 1;
2413 void kvm_arch_init_irq_routing(KVMState *s)
2417 struct kvm_get_htab_buf {
2418 struct kvm_get_htab_header header;
2420 * We require one extra byte for read
2422 target_ulong hpte[(HPTES_PER_GROUP * 2) + 1];
2425 uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index)
2427 int htab_fd;
2428 struct kvm_get_htab_fd ghf;
2429 struct kvm_get_htab_buf *hpte_buf;
2431 ghf.flags = 0;
2432 ghf.start_index = pte_index;
2433 htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2434 if (htab_fd < 0) {
2435 goto error_out;
2438 hpte_buf = g_malloc0(sizeof(*hpte_buf));
2440 * Read the hpte group
2442 if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) {
2443 goto out_close;
2446 close(htab_fd);
2447 return (uint64_t)(uintptr_t) hpte_buf->hpte;
2449 out_close:
2450 g_free(hpte_buf);
2451 close(htab_fd);
2452 error_out:
2453 return 0;
2456 void kvmppc_hash64_free_pteg(uint64_t token)
2458 struct kvm_get_htab_buf *htab_buf;
2460 htab_buf = container_of((void *)(uintptr_t) token, struct kvm_get_htab_buf,
2461 hpte);
2462 g_free(htab_buf);
2463 return;
2466 void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
2467 target_ulong pte0, target_ulong pte1)
2469 int htab_fd;
2470 struct kvm_get_htab_fd ghf;
2471 struct kvm_get_htab_buf hpte_buf;
2473 ghf.flags = 0;
2474 ghf.start_index = 0; /* Ignored */
2475 htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2476 if (htab_fd < 0) {
2477 goto error_out;
2480 hpte_buf.header.n_valid = 1;
2481 hpte_buf.header.n_invalid = 0;
2482 hpte_buf.header.index = pte_index;
2483 hpte_buf.hpte[0] = pte0;
2484 hpte_buf.hpte[1] = pte1;
2486 * Write the hpte entry.
2487 * CAUTION: write() has the warn_unused_result attribute. Hence we
2488 * need to check the return value, even though we do nothing.
2490 if (write(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) {
2491 goto out_close;
2494 out_close:
2495 close(htab_fd);
2496 return;
2498 error_out:
2499 return;
2502 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2503 uint64_t address, uint32_t data, PCIDevice *dev)
2505 return 0;
2508 int kvm_arch_msi_data_to_gsi(uint32_t data)
2510 return data & 0xffff;
2513 int kvmppc_enable_hwrng(void)
2515 if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2516 return -1;
2519 return kvmppc_enable_hcall(kvm_state, H_RANDOM);