Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging
[qemu/ar7.git] / target-ppc / kvm.c
bloba18d4d5654b7ee81183d34b1b7579dd6f63faec3
1 /*
2 * PowerPC implementation of KVM hooks
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
7 * Authors:
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
17 #include "qemu/osdep.h"
18 #include <dirent.h>
19 #include <sys/ioctl.h>
20 #include <sys/vfs.h>
22 #include <linux/kvm.h>
24 #include "qemu-common.h"
25 #include "qemu/error-report.h"
26 #include "cpu.h"
27 #include "qemu/timer.h"
28 #include "sysemu/sysemu.h"
29 #include "sysemu/kvm.h"
30 #include "sysemu/numa.h"
31 #include "kvm_ppc.h"
32 #include "sysemu/cpus.h"
33 #include "sysemu/device_tree.h"
34 #include "mmu-hash64.h"
36 #include "hw/sysbus.h"
37 #include "hw/ppc/spapr.h"
38 #include "hw/ppc/spapr_vio.h"
39 #include "hw/ppc/spapr_cpu_core.h"
40 #include "hw/ppc/ppc.h"
41 #include "sysemu/watchdog.h"
42 #include "trace.h"
43 #include "exec/gdbstub.h"
44 #include "exec/memattrs.h"
45 #include "sysemu/hostmem.h"
46 #include "qemu/cutils.h"
47 #if defined(TARGET_PPC64)
48 #include "hw/ppc/spapr_cpu_core.h"
49 #endif
51 //#define DEBUG_KVM
53 #ifdef DEBUG_KVM
54 #define DPRINTF(fmt, ...) \
55 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
56 #else
57 #define DPRINTF(fmt, ...) \
58 do { } while (0)
59 #endif
61 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
63 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
64 KVM_CAP_LAST_INFO
67 static int cap_interrupt_unset = false;
68 static int cap_interrupt_level = false;
69 static int cap_segstate;
70 static int cap_booke_sregs;
71 static int cap_ppc_smt;
72 static int cap_ppc_rma;
73 static int cap_spapr_tce;
74 static int cap_spapr_multitce;
75 static int cap_spapr_vfio;
76 static int cap_hior;
77 static int cap_one_reg;
78 static int cap_epr;
79 static int cap_ppc_watchdog;
80 static int cap_papr;
81 static int cap_htab_fd;
82 static int cap_fixup_hcalls;
84 static uint32_t debug_inst_opcode;
86 /* XXX We have a race condition where we actually have a level triggered
87 * interrupt, but the infrastructure can't expose that yet, so the guest
88 * takes but ignores it, goes to sleep and never gets notified that there's
89 * still an interrupt pending.
91 * As a quick workaround, let's just wake up again 20 ms after we injected
92 * an interrupt. That way we can assure that we're always reinjecting
93 * interrupts in case the guest swallowed them.
95 static QEMUTimer *idle_timer;
97 static void kvm_kick_cpu(void *opaque)
99 PowerPCCPU *cpu = opaque;
101 qemu_cpu_kick(CPU(cpu));
104 static int kvm_ppc_register_host_cpu_type(void);
106 int kvm_arch_init(MachineState *ms, KVMState *s)
108 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
109 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
110 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
111 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
112 cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
113 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
114 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
115 cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
116 cap_spapr_vfio = false;
117 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
118 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
119 cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
120 cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
121 /* Note: we don't set cap_papr here, because this capability is
122 * only activated after this by kvmppc_set_papr() */
123 cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
124 cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
126 if (!cap_interrupt_level) {
127 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
128 "VM to stall at times!\n");
131 kvm_ppc_register_host_cpu_type();
133 return 0;
136 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
138 CPUPPCState *cenv = &cpu->env;
139 CPUState *cs = CPU(cpu);
140 struct kvm_sregs sregs;
141 int ret;
143 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
144 /* What we're really trying to say is "if we're on BookE, we use
145 the native PVR for now". This is the only sane way to check
146 it though, so we potentially confuse users that they can run
147 BookE guests on BookS. Let's hope nobody dares enough :) */
148 return 0;
149 } else {
150 if (!cap_segstate) {
151 fprintf(stderr, "kvm error: missing PVR setting capability\n");
152 return -ENOSYS;
156 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
157 if (ret) {
158 return ret;
161 sregs.pvr = cenv->spr[SPR_PVR];
162 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
165 /* Set up a shared TLB array with KVM */
166 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
168 CPUPPCState *env = &cpu->env;
169 CPUState *cs = CPU(cpu);
170 struct kvm_book3e_206_tlb_params params = {};
171 struct kvm_config_tlb cfg = {};
172 unsigned int entries = 0;
173 int ret, i;
175 if (!kvm_enabled() ||
176 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
177 return 0;
180 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
182 for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
183 params.tlb_sizes[i] = booke206_tlb_size(env, i);
184 params.tlb_ways[i] = booke206_tlb_ways(env, i);
185 entries += params.tlb_sizes[i];
188 assert(entries == env->nb_tlb);
189 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
191 env->tlb_dirty = true;
193 cfg.array = (uintptr_t)env->tlb.tlbm;
194 cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
195 cfg.params = (uintptr_t)&params;
196 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
198 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
199 if (ret < 0) {
200 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
201 __func__, strerror(-ret));
202 return ret;
205 env->kvm_sw_tlb = true;
206 return 0;
210 #if defined(TARGET_PPC64)
211 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
212 struct kvm_ppc_smmu_info *info)
214 CPUPPCState *env = &cpu->env;
215 CPUState *cs = CPU(cpu);
217 memset(info, 0, sizeof(*info));
219 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
220 * need to "guess" what the supported page sizes are.
222 * For that to work we make a few assumptions:
224 * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
225 * KVM which only supports 4K and 16M pages, but supports them
226 * regardless of the backing store characteritics. We also don't
227 * support 1T segments.
229 * This is safe as if HV KVM ever supports that capability or PR
230 * KVM grows supports for more page/segment sizes, those versions
231 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
232 * will not hit this fallback
234 * - Else we are running HV KVM. This means we only support page
235 * sizes that fit in the backing store. Additionally we only
236 * advertize 64K pages if the processor is ARCH 2.06 and we assume
237 * P7 encodings for the SLB and hash table. Here too, we assume
238 * support for any newer processor will mean a kernel that
239 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
240 * this fallback.
242 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
243 /* No flags */
244 info->flags = 0;
245 info->slb_size = 64;
247 /* Standard 4k base page size segment */
248 info->sps[0].page_shift = 12;
249 info->sps[0].slb_enc = 0;
250 info->sps[0].enc[0].page_shift = 12;
251 info->sps[0].enc[0].pte_enc = 0;
253 /* Standard 16M large page size segment */
254 info->sps[1].page_shift = 24;
255 info->sps[1].slb_enc = SLB_VSID_L;
256 info->sps[1].enc[0].page_shift = 24;
257 info->sps[1].enc[0].pte_enc = 0;
258 } else {
259 int i = 0;
261 /* HV KVM has backing store size restrictions */
262 info->flags = KVM_PPC_PAGE_SIZES_REAL;
264 if (env->mmu_model & POWERPC_MMU_1TSEG) {
265 info->flags |= KVM_PPC_1T_SEGMENTS;
268 if (env->mmu_model == POWERPC_MMU_2_06 ||
269 env->mmu_model == POWERPC_MMU_2_07) {
270 info->slb_size = 32;
271 } else {
272 info->slb_size = 64;
275 /* Standard 4k base page size segment */
276 info->sps[i].page_shift = 12;
277 info->sps[i].slb_enc = 0;
278 info->sps[i].enc[0].page_shift = 12;
279 info->sps[i].enc[0].pte_enc = 0;
280 i++;
282 /* 64K on MMU 2.06 and later */
283 if (env->mmu_model == POWERPC_MMU_2_06 ||
284 env->mmu_model == POWERPC_MMU_2_07) {
285 info->sps[i].page_shift = 16;
286 info->sps[i].slb_enc = 0x110;
287 info->sps[i].enc[0].page_shift = 16;
288 info->sps[i].enc[0].pte_enc = 1;
289 i++;
292 /* Standard 16M large page size segment */
293 info->sps[i].page_shift = 24;
294 info->sps[i].slb_enc = SLB_VSID_L;
295 info->sps[i].enc[0].page_shift = 24;
296 info->sps[i].enc[0].pte_enc = 0;
300 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
302 CPUState *cs = CPU(cpu);
303 int ret;
305 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
306 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
307 if (ret == 0) {
308 return;
312 kvm_get_fallback_smmu_info(cpu, info);
315 static long gethugepagesize(const char *mem_path)
317 struct statfs fs;
318 int ret;
320 do {
321 ret = statfs(mem_path, &fs);
322 } while (ret != 0 && errno == EINTR);
324 if (ret != 0) {
325 fprintf(stderr, "Couldn't statfs() memory path: %s\n",
326 strerror(errno));
327 exit(1);
330 #define HUGETLBFS_MAGIC 0x958458f6
332 if (fs.f_type != HUGETLBFS_MAGIC) {
333 /* Explicit mempath, but it's ordinary pages */
334 return getpagesize();
337 /* It's hugepage, return the huge page size */
338 return fs.f_bsize;
342 * FIXME TOCTTOU: this iterates over memory backends' mem-path, which
343 * may or may not name the same files / on the same filesystem now as
344 * when we actually open and map them. Iterate over the file
345 * descriptors instead, and use qemu_fd_getpagesize().
347 static int find_max_supported_pagesize(Object *obj, void *opaque)
349 char *mem_path;
350 long *hpsize_min = opaque;
352 if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) {
353 mem_path = object_property_get_str(obj, "mem-path", NULL);
354 if (mem_path) {
355 long hpsize = gethugepagesize(mem_path);
356 if (hpsize < *hpsize_min) {
357 *hpsize_min = hpsize;
359 } else {
360 *hpsize_min = getpagesize();
364 return 0;
367 static long getrampagesize(void)
369 long hpsize = LONG_MAX;
370 long mainrampagesize;
371 Object *memdev_root;
373 if (mem_path) {
374 mainrampagesize = gethugepagesize(mem_path);
375 } else {
376 mainrampagesize = getpagesize();
379 /* it's possible we have memory-backend objects with
380 * hugepage-backed RAM. these may get mapped into system
381 * address space via -numa parameters or memory hotplug
382 * hooks. we want to take these into account, but we
383 * also want to make sure these supported hugepage
384 * sizes are applicable across the entire range of memory
385 * we may boot from, so we take the min across all
386 * backends, and assume normal pages in cases where a
387 * backend isn't backed by hugepages.
389 memdev_root = object_resolve_path("/objects", NULL);
390 if (memdev_root) {
391 object_child_foreach(memdev_root, find_max_supported_pagesize, &hpsize);
393 if (hpsize == LONG_MAX) {
394 /* No additional memory regions found ==> Report main RAM page size */
395 return mainrampagesize;
398 /* If NUMA is disabled or the NUMA nodes are not backed with a
399 * memory-backend, then there is at least one node using "normal" RAM,
400 * so if its page size is smaller we have got to report that size instead.
402 if (hpsize > mainrampagesize &&
403 (nb_numa_nodes == 0 || numa_info[0].node_memdev == NULL)) {
404 static bool warned;
405 if (!warned) {
406 error_report("Huge page support disabled (n/a for main memory).");
407 warned = true;
409 return mainrampagesize;
412 return hpsize;
415 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
417 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
418 return true;
421 return (1ul << shift) <= rampgsize;
424 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
426 static struct kvm_ppc_smmu_info smmu_info;
427 static bool has_smmu_info;
428 CPUPPCState *env = &cpu->env;
429 long rampagesize;
430 int iq, ik, jq, jk;
431 bool has_64k_pages = false;
433 /* We only handle page sizes for 64-bit server guests for now */
434 if (!(env->mmu_model & POWERPC_MMU_64)) {
435 return;
438 /* Collect MMU info from kernel if not already */
439 if (!has_smmu_info) {
440 kvm_get_smmu_info(cpu, &smmu_info);
441 has_smmu_info = true;
444 rampagesize = getrampagesize();
446 /* Convert to QEMU form */
447 memset(&env->sps, 0, sizeof(env->sps));
449 /* If we have HV KVM, we need to forbid CI large pages if our
450 * host page size is smaller than 64K.
452 if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
453 env->ci_large_pages = getpagesize() >= 0x10000;
457 * XXX This loop should be an entry wide AND of the capabilities that
458 * the selected CPU has with the capabilities that KVM supports.
460 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
461 struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
462 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
464 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
465 ksps->page_shift)) {
466 continue;
468 qsps->page_shift = ksps->page_shift;
469 qsps->slb_enc = ksps->slb_enc;
470 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
471 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
472 ksps->enc[jk].page_shift)) {
473 continue;
475 if (ksps->enc[jk].page_shift == 16) {
476 has_64k_pages = true;
478 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
479 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
480 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
481 break;
484 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
485 break;
488 env->slb_nr = smmu_info.slb_size;
489 if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
490 env->mmu_model &= ~POWERPC_MMU_1TSEG;
492 if (!has_64k_pages) {
493 env->mmu_model &= ~POWERPC_MMU_64K;
496 #else /* defined (TARGET_PPC64) */
498 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
502 #endif /* !defined (TARGET_PPC64) */
504 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
506 return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
509 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
510 * book3s supports only 1 watchpoint, so array size
511 * of 4 is sufficient for now.
513 #define MAX_HW_BKPTS 4
515 static struct HWBreakpoint {
516 target_ulong addr;
517 int type;
518 } hw_debug_points[MAX_HW_BKPTS];
520 static CPUWatchpoint hw_watchpoint;
522 /* Default there is no breakpoint and watchpoint supported */
523 static int max_hw_breakpoint;
524 static int max_hw_watchpoint;
525 static int nb_hw_breakpoint;
526 static int nb_hw_watchpoint;
528 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
530 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
531 max_hw_breakpoint = 2;
532 max_hw_watchpoint = 2;
535 if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
536 fprintf(stderr, "Error initializing h/w breakpoints\n");
537 return;
541 int kvm_arch_init_vcpu(CPUState *cs)
543 PowerPCCPU *cpu = POWERPC_CPU(cs);
544 CPUPPCState *cenv = &cpu->env;
545 int ret;
547 /* Gather server mmu info from KVM and update the CPU state */
548 kvm_fixup_page_sizes(cpu);
550 /* Synchronize sregs with kvm */
551 ret = kvm_arch_sync_sregs(cpu);
552 if (ret) {
553 if (ret == -EINVAL) {
554 error_report("Register sync failed... If you're using kvm-hv.ko,"
555 " only \"-cpu host\" is possible");
557 return ret;
560 idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
562 /* Some targets support access to KVM's guest TLB. */
563 switch (cenv->mmu_model) {
564 case POWERPC_MMU_BOOKE206:
565 ret = kvm_booke206_tlb_init(cpu);
566 break;
567 default:
568 break;
571 kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
572 kvmppc_hw_debug_points_init(cenv);
574 return ret;
577 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
579 CPUPPCState *env = &cpu->env;
580 CPUState *cs = CPU(cpu);
581 struct kvm_dirty_tlb dirty_tlb;
582 unsigned char *bitmap;
583 int ret;
585 if (!env->kvm_sw_tlb) {
586 return;
589 bitmap = g_malloc((env->nb_tlb + 7) / 8);
590 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
592 dirty_tlb.bitmap = (uintptr_t)bitmap;
593 dirty_tlb.num_dirty = env->nb_tlb;
595 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
596 if (ret) {
597 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
598 __func__, strerror(-ret));
601 g_free(bitmap);
604 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
606 PowerPCCPU *cpu = POWERPC_CPU(cs);
607 CPUPPCState *env = &cpu->env;
608 union {
609 uint32_t u32;
610 uint64_t u64;
611 } val;
612 struct kvm_one_reg reg = {
613 .id = id,
614 .addr = (uintptr_t) &val,
616 int ret;
618 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
619 if (ret != 0) {
620 trace_kvm_failed_spr_get(spr, strerror(errno));
621 } else {
622 switch (id & KVM_REG_SIZE_MASK) {
623 case KVM_REG_SIZE_U32:
624 env->spr[spr] = val.u32;
625 break;
627 case KVM_REG_SIZE_U64:
628 env->spr[spr] = val.u64;
629 break;
631 default:
632 /* Don't handle this size yet */
633 abort();
638 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
640 PowerPCCPU *cpu = POWERPC_CPU(cs);
641 CPUPPCState *env = &cpu->env;
642 union {
643 uint32_t u32;
644 uint64_t u64;
645 } val;
646 struct kvm_one_reg reg = {
647 .id = id,
648 .addr = (uintptr_t) &val,
650 int ret;
652 switch (id & KVM_REG_SIZE_MASK) {
653 case KVM_REG_SIZE_U32:
654 val.u32 = env->spr[spr];
655 break;
657 case KVM_REG_SIZE_U64:
658 val.u64 = env->spr[spr];
659 break;
661 default:
662 /* Don't handle this size yet */
663 abort();
666 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
667 if (ret != 0) {
668 trace_kvm_failed_spr_set(spr, strerror(errno));
672 static int kvm_put_fp(CPUState *cs)
674 PowerPCCPU *cpu = POWERPC_CPU(cs);
675 CPUPPCState *env = &cpu->env;
676 struct kvm_one_reg reg;
677 int i;
678 int ret;
680 if (env->insns_flags & PPC_FLOAT) {
681 uint64_t fpscr = env->fpscr;
682 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
684 reg.id = KVM_REG_PPC_FPSCR;
685 reg.addr = (uintptr_t)&fpscr;
686 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
687 if (ret < 0) {
688 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
689 return ret;
692 for (i = 0; i < 32; i++) {
693 uint64_t vsr[2];
695 #ifdef HOST_WORDS_BIGENDIAN
696 vsr[0] = float64_val(env->fpr[i]);
697 vsr[1] = env->vsr[i];
698 #else
699 vsr[0] = env->vsr[i];
700 vsr[1] = float64_val(env->fpr[i]);
701 #endif
702 reg.addr = (uintptr_t) &vsr;
703 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
705 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
706 if (ret < 0) {
707 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
708 i, strerror(errno));
709 return ret;
714 if (env->insns_flags & PPC_ALTIVEC) {
715 reg.id = KVM_REG_PPC_VSCR;
716 reg.addr = (uintptr_t)&env->vscr;
717 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
718 if (ret < 0) {
719 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
720 return ret;
723 for (i = 0; i < 32; i++) {
724 reg.id = KVM_REG_PPC_VR(i);
725 reg.addr = (uintptr_t)&env->avr[i];
726 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
727 if (ret < 0) {
728 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
729 return ret;
734 return 0;
737 static int kvm_get_fp(CPUState *cs)
739 PowerPCCPU *cpu = POWERPC_CPU(cs);
740 CPUPPCState *env = &cpu->env;
741 struct kvm_one_reg reg;
742 int i;
743 int ret;
745 if (env->insns_flags & PPC_FLOAT) {
746 uint64_t fpscr;
747 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
749 reg.id = KVM_REG_PPC_FPSCR;
750 reg.addr = (uintptr_t)&fpscr;
751 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
752 if (ret < 0) {
753 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
754 return ret;
755 } else {
756 env->fpscr = fpscr;
759 for (i = 0; i < 32; i++) {
760 uint64_t vsr[2];
762 reg.addr = (uintptr_t) &vsr;
763 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
765 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
766 if (ret < 0) {
767 DPRINTF("Unable to get %s%d from KVM: %s\n",
768 vsx ? "VSR" : "FPR", i, strerror(errno));
769 return ret;
770 } else {
771 #ifdef HOST_WORDS_BIGENDIAN
772 env->fpr[i] = vsr[0];
773 if (vsx) {
774 env->vsr[i] = vsr[1];
776 #else
777 env->fpr[i] = vsr[1];
778 if (vsx) {
779 env->vsr[i] = vsr[0];
781 #endif
786 if (env->insns_flags & PPC_ALTIVEC) {
787 reg.id = KVM_REG_PPC_VSCR;
788 reg.addr = (uintptr_t)&env->vscr;
789 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
790 if (ret < 0) {
791 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
792 return ret;
795 for (i = 0; i < 32; i++) {
796 reg.id = KVM_REG_PPC_VR(i);
797 reg.addr = (uintptr_t)&env->avr[i];
798 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
799 if (ret < 0) {
800 DPRINTF("Unable to get VR%d from KVM: %s\n",
801 i, strerror(errno));
802 return ret;
807 return 0;
810 #if defined(TARGET_PPC64)
811 static int kvm_get_vpa(CPUState *cs)
813 PowerPCCPU *cpu = POWERPC_CPU(cs);
814 CPUPPCState *env = &cpu->env;
815 struct kvm_one_reg reg;
816 int ret;
818 reg.id = KVM_REG_PPC_VPA_ADDR;
819 reg.addr = (uintptr_t)&env->vpa_addr;
820 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
821 if (ret < 0) {
822 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
823 return ret;
826 assert((uintptr_t)&env->slb_shadow_size
827 == ((uintptr_t)&env->slb_shadow_addr + 8));
828 reg.id = KVM_REG_PPC_VPA_SLB;
829 reg.addr = (uintptr_t)&env->slb_shadow_addr;
830 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
831 if (ret < 0) {
832 DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
833 strerror(errno));
834 return ret;
837 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
838 reg.id = KVM_REG_PPC_VPA_DTL;
839 reg.addr = (uintptr_t)&env->dtl_addr;
840 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
841 if (ret < 0) {
842 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
843 strerror(errno));
844 return ret;
847 return 0;
850 static int kvm_put_vpa(CPUState *cs)
852 PowerPCCPU *cpu = POWERPC_CPU(cs);
853 CPUPPCState *env = &cpu->env;
854 struct kvm_one_reg reg;
855 int ret;
857 /* SLB shadow or DTL can't be registered unless a master VPA is
858 * registered. That means when restoring state, if a VPA *is*
859 * registered, we need to set that up first. If not, we need to
860 * deregister the others before deregistering the master VPA */
861 assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
863 if (env->vpa_addr) {
864 reg.id = KVM_REG_PPC_VPA_ADDR;
865 reg.addr = (uintptr_t)&env->vpa_addr;
866 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
867 if (ret < 0) {
868 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
869 return ret;
873 assert((uintptr_t)&env->slb_shadow_size
874 == ((uintptr_t)&env->slb_shadow_addr + 8));
875 reg.id = KVM_REG_PPC_VPA_SLB;
876 reg.addr = (uintptr_t)&env->slb_shadow_addr;
877 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
878 if (ret < 0) {
879 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
880 return ret;
883 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
884 reg.id = KVM_REG_PPC_VPA_DTL;
885 reg.addr = (uintptr_t)&env->dtl_addr;
886 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
887 if (ret < 0) {
888 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
889 strerror(errno));
890 return ret;
893 if (!env->vpa_addr) {
894 reg.id = KVM_REG_PPC_VPA_ADDR;
895 reg.addr = (uintptr_t)&env->vpa_addr;
896 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
897 if (ret < 0) {
898 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
899 return ret;
903 return 0;
905 #endif /* TARGET_PPC64 */
907 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
909 CPUPPCState *env = &cpu->env;
910 struct kvm_sregs sregs;
911 int i;
913 sregs.pvr = env->spr[SPR_PVR];
915 sregs.u.s.sdr1 = env->spr[SPR_SDR1];
917 /* Sync SLB */
918 #ifdef TARGET_PPC64
919 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
920 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
921 if (env->slb[i].esid & SLB_ESID_V) {
922 sregs.u.s.ppc64.slb[i].slbe |= i;
924 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
926 #endif
928 /* Sync SRs */
929 for (i = 0; i < 16; i++) {
930 sregs.u.s.ppc32.sr[i] = env->sr[i];
933 /* Sync BATs */
934 for (i = 0; i < 8; i++) {
935 /* Beware. We have to swap upper and lower bits here */
936 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
937 | env->DBAT[1][i];
938 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
939 | env->IBAT[1][i];
942 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
945 int kvm_arch_put_registers(CPUState *cs, int level)
947 PowerPCCPU *cpu = POWERPC_CPU(cs);
948 CPUPPCState *env = &cpu->env;
949 struct kvm_regs regs;
950 int ret;
951 int i;
953 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
954 if (ret < 0) {
955 return ret;
958 regs.ctr = env->ctr;
959 regs.lr = env->lr;
960 regs.xer = cpu_read_xer(env);
961 regs.msr = env->msr;
962 regs.pc = env->nip;
964 regs.srr0 = env->spr[SPR_SRR0];
965 regs.srr1 = env->spr[SPR_SRR1];
967 regs.sprg0 = env->spr[SPR_SPRG0];
968 regs.sprg1 = env->spr[SPR_SPRG1];
969 regs.sprg2 = env->spr[SPR_SPRG2];
970 regs.sprg3 = env->spr[SPR_SPRG3];
971 regs.sprg4 = env->spr[SPR_SPRG4];
972 regs.sprg5 = env->spr[SPR_SPRG5];
973 regs.sprg6 = env->spr[SPR_SPRG6];
974 regs.sprg7 = env->spr[SPR_SPRG7];
976 regs.pid = env->spr[SPR_BOOKE_PID];
978 for (i = 0;i < 32; i++)
979 regs.gpr[i] = env->gpr[i];
981 regs.cr = 0;
982 for (i = 0; i < 8; i++) {
983 regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
986 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
987 if (ret < 0)
988 return ret;
990 kvm_put_fp(cs);
992 if (env->tlb_dirty) {
993 kvm_sw_tlb_put(cpu);
994 env->tlb_dirty = false;
997 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
998 ret = kvmppc_put_books_sregs(cpu);
999 if (ret < 0) {
1000 return ret;
1004 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
1005 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1008 if (cap_one_reg) {
1009 int i;
1011 /* We deliberately ignore errors here, for kernels which have
1012 * the ONE_REG calls, but don't support the specific
1013 * registers, there's a reasonable chance things will still
1014 * work, at least until we try to migrate. */
1015 for (i = 0; i < 1024; i++) {
1016 uint64_t id = env->spr_cb[i].one_reg_id;
1018 if (id != 0) {
1019 kvm_put_one_spr(cs, id, i);
1023 #ifdef TARGET_PPC64
1024 if (msr_ts) {
1025 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1026 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1028 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1029 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1031 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1032 kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1033 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1034 kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1035 kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1036 kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1037 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1038 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1039 kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1040 kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1043 if (cap_papr) {
1044 if (kvm_put_vpa(cs) < 0) {
1045 DPRINTF("Warning: Unable to set VPA information to KVM\n");
1049 kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1050 #endif /* TARGET_PPC64 */
1053 return ret;
1056 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1058 env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1061 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1063 CPUPPCState *env = &cpu->env;
1064 struct kvm_sregs sregs;
1065 int ret;
1067 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1068 if (ret < 0) {
1069 return ret;
1072 if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1073 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1074 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1075 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1076 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1077 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1078 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1079 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1080 env->spr[SPR_DECR] = sregs.u.e.dec;
1081 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1082 env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1083 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1086 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1087 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1088 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1089 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1090 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1091 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1094 if (sregs.u.e.features & KVM_SREGS_E_64) {
1095 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1098 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1099 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1102 if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1103 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1104 kvm_sync_excp(env, POWERPC_EXCP_CRITICAL, SPR_BOOKE_IVOR0);
1105 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1106 kvm_sync_excp(env, POWERPC_EXCP_MCHECK, SPR_BOOKE_IVOR1);
1107 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1108 kvm_sync_excp(env, POWERPC_EXCP_DSI, SPR_BOOKE_IVOR2);
1109 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1110 kvm_sync_excp(env, POWERPC_EXCP_ISI, SPR_BOOKE_IVOR3);
1111 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1112 kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL, SPR_BOOKE_IVOR4);
1113 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1114 kvm_sync_excp(env, POWERPC_EXCP_ALIGN, SPR_BOOKE_IVOR5);
1115 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1116 kvm_sync_excp(env, POWERPC_EXCP_PROGRAM, SPR_BOOKE_IVOR6);
1117 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1118 kvm_sync_excp(env, POWERPC_EXCP_FPU, SPR_BOOKE_IVOR7);
1119 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1120 kvm_sync_excp(env, POWERPC_EXCP_SYSCALL, SPR_BOOKE_IVOR8);
1121 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1122 kvm_sync_excp(env, POWERPC_EXCP_APU, SPR_BOOKE_IVOR9);
1123 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1124 kvm_sync_excp(env, POWERPC_EXCP_DECR, SPR_BOOKE_IVOR10);
1125 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1126 kvm_sync_excp(env, POWERPC_EXCP_FIT, SPR_BOOKE_IVOR11);
1127 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1128 kvm_sync_excp(env, POWERPC_EXCP_WDT, SPR_BOOKE_IVOR12);
1129 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1130 kvm_sync_excp(env, POWERPC_EXCP_DTLB, SPR_BOOKE_IVOR13);
1131 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1132 kvm_sync_excp(env, POWERPC_EXCP_ITLB, SPR_BOOKE_IVOR14);
1133 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1134 kvm_sync_excp(env, POWERPC_EXCP_DEBUG, SPR_BOOKE_IVOR15);
1136 if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1137 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1138 kvm_sync_excp(env, POWERPC_EXCP_SPEU, SPR_BOOKE_IVOR32);
1139 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1140 kvm_sync_excp(env, POWERPC_EXCP_EFPDI, SPR_BOOKE_IVOR33);
1141 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1142 kvm_sync_excp(env, POWERPC_EXCP_EFPRI, SPR_BOOKE_IVOR34);
1145 if (sregs.u.e.features & KVM_SREGS_E_PM) {
1146 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1147 kvm_sync_excp(env, POWERPC_EXCP_EPERFM, SPR_BOOKE_IVOR35);
1150 if (sregs.u.e.features & KVM_SREGS_E_PC) {
1151 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1152 kvm_sync_excp(env, POWERPC_EXCP_DOORI, SPR_BOOKE_IVOR36);
1153 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1154 kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1158 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1159 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1160 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1161 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1162 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1163 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1164 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1165 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1166 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1167 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1168 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1171 if (sregs.u.e.features & KVM_SREGS_EXP) {
1172 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1175 if (sregs.u.e.features & KVM_SREGS_E_PD) {
1176 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1177 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1180 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1181 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1182 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1183 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1185 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1186 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1187 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1191 return 0;
1194 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1196 CPUPPCState *env = &cpu->env;
1197 struct kvm_sregs sregs;
1198 int ret;
1199 int i;
1201 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1202 if (ret < 0) {
1203 return ret;
1206 if (!env->external_htab) {
1207 ppc_store_sdr1(env, sregs.u.s.sdr1);
1210 /* Sync SLB */
1211 #ifdef TARGET_PPC64
1213 * The packed SLB array we get from KVM_GET_SREGS only contains
1214 * information about valid entries. So we flush our internal copy
1215 * to get rid of stale ones, then put all valid SLB entries back
1216 * in.
1218 memset(env->slb, 0, sizeof(env->slb));
1219 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1220 target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1221 target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1223 * Only restore valid entries
1225 if (rb & SLB_ESID_V) {
1226 ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1229 #endif
1231 /* Sync SRs */
1232 for (i = 0; i < 16; i++) {
1233 env->sr[i] = sregs.u.s.ppc32.sr[i];
1236 /* Sync BATs */
1237 for (i = 0; i < 8; i++) {
1238 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1239 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1240 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1241 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1244 return 0;
1247 int kvm_arch_get_registers(CPUState *cs)
1249 PowerPCCPU *cpu = POWERPC_CPU(cs);
1250 CPUPPCState *env = &cpu->env;
1251 struct kvm_regs regs;
1252 uint32_t cr;
1253 int i, ret;
1255 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1256 if (ret < 0)
1257 return ret;
1259 cr = regs.cr;
1260 for (i = 7; i >= 0; i--) {
1261 env->crf[i] = cr & 15;
1262 cr >>= 4;
1265 env->ctr = regs.ctr;
1266 env->lr = regs.lr;
1267 cpu_write_xer(env, regs.xer);
1268 env->msr = regs.msr;
1269 env->nip = regs.pc;
1271 env->spr[SPR_SRR0] = regs.srr0;
1272 env->spr[SPR_SRR1] = regs.srr1;
1274 env->spr[SPR_SPRG0] = regs.sprg0;
1275 env->spr[SPR_SPRG1] = regs.sprg1;
1276 env->spr[SPR_SPRG2] = regs.sprg2;
1277 env->spr[SPR_SPRG3] = regs.sprg3;
1278 env->spr[SPR_SPRG4] = regs.sprg4;
1279 env->spr[SPR_SPRG5] = regs.sprg5;
1280 env->spr[SPR_SPRG6] = regs.sprg6;
1281 env->spr[SPR_SPRG7] = regs.sprg7;
1283 env->spr[SPR_BOOKE_PID] = regs.pid;
1285 for (i = 0;i < 32; i++)
1286 env->gpr[i] = regs.gpr[i];
1288 kvm_get_fp(cs);
1290 if (cap_booke_sregs) {
1291 ret = kvmppc_get_booke_sregs(cpu);
1292 if (ret < 0) {
1293 return ret;
1297 if (cap_segstate) {
1298 ret = kvmppc_get_books_sregs(cpu);
1299 if (ret < 0) {
1300 return ret;
1304 if (cap_hior) {
1305 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1308 if (cap_one_reg) {
1309 int i;
1311 /* We deliberately ignore errors here, for kernels which have
1312 * the ONE_REG calls, but don't support the specific
1313 * registers, there's a reasonable chance things will still
1314 * work, at least until we try to migrate. */
1315 for (i = 0; i < 1024; i++) {
1316 uint64_t id = env->spr_cb[i].one_reg_id;
1318 if (id != 0) {
1319 kvm_get_one_spr(cs, id, i);
1323 #ifdef TARGET_PPC64
1324 if (msr_ts) {
1325 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1326 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1328 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1329 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1331 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1332 kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1333 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1334 kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1335 kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1336 kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1337 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1338 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1339 kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1340 kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1343 if (cap_papr) {
1344 if (kvm_get_vpa(cs) < 0) {
1345 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1349 kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1350 #endif
1353 return 0;
1356 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1358 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1360 if (irq != PPC_INTERRUPT_EXT) {
1361 return 0;
1364 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1365 return 0;
1368 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1370 return 0;
1373 #if defined(TARGET_PPCEMB)
1374 #define PPC_INPUT_INT PPC40x_INPUT_INT
1375 #elif defined(TARGET_PPC64)
1376 #define PPC_INPUT_INT PPC970_INPUT_INT
1377 #else
1378 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1379 #endif
1381 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1383 PowerPCCPU *cpu = POWERPC_CPU(cs);
1384 CPUPPCState *env = &cpu->env;
1385 int r;
1386 unsigned irq;
1388 qemu_mutex_lock_iothread();
1390 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1391 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1392 if (!cap_interrupt_level &&
1393 run->ready_for_interrupt_injection &&
1394 (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1395 (env->irq_input_state & (1<<PPC_INPUT_INT)))
1397 /* For now KVM disregards the 'irq' argument. However, in the
1398 * future KVM could cache it in-kernel to avoid a heavyweight exit
1399 * when reading the UIC.
1401 irq = KVM_INTERRUPT_SET;
1403 DPRINTF("injected interrupt %d\n", irq);
1404 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1405 if (r < 0) {
1406 printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1409 /* Always wake up soon in case the interrupt was level based */
1410 timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1411 (NANOSECONDS_PER_SECOND / 50));
1414 /* We don't know if there are more interrupts pending after this. However,
1415 * the guest will return to userspace in the course of handling this one
1416 * anyways, so we will get a chance to deliver the rest. */
1418 qemu_mutex_unlock_iothread();
1421 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1423 return MEMTXATTRS_UNSPECIFIED;
1426 int kvm_arch_process_async_events(CPUState *cs)
1428 return cs->halted;
1431 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1433 CPUState *cs = CPU(cpu);
1434 CPUPPCState *env = &cpu->env;
1436 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1437 cs->halted = 1;
1438 cs->exception_index = EXCP_HLT;
1441 return 0;
1444 /* map dcr access to existing qemu dcr emulation */
1445 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1447 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1448 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1450 return 0;
1453 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1455 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1456 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1458 return 0;
1461 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1463 /* Mixed endian case is not handled */
1464 uint32_t sc = debug_inst_opcode;
1466 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1467 sizeof(sc), 0) ||
1468 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1469 return -EINVAL;
1472 return 0;
1475 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1477 uint32_t sc;
1479 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1480 sc != debug_inst_opcode ||
1481 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1482 sizeof(sc), 1)) {
1483 return -EINVAL;
1486 return 0;
1489 static int find_hw_breakpoint(target_ulong addr, int type)
1491 int n;
1493 assert((nb_hw_breakpoint + nb_hw_watchpoint)
1494 <= ARRAY_SIZE(hw_debug_points));
1496 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1497 if (hw_debug_points[n].addr == addr &&
1498 hw_debug_points[n].type == type) {
1499 return n;
1503 return -1;
1506 static int find_hw_watchpoint(target_ulong addr, int *flag)
1508 int n;
1510 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1511 if (n >= 0) {
1512 *flag = BP_MEM_ACCESS;
1513 return n;
1516 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1517 if (n >= 0) {
1518 *flag = BP_MEM_WRITE;
1519 return n;
1522 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1523 if (n >= 0) {
1524 *flag = BP_MEM_READ;
1525 return n;
1528 return -1;
1531 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1532 target_ulong len, int type)
1534 if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1535 return -ENOBUFS;
1538 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1539 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1541 switch (type) {
1542 case GDB_BREAKPOINT_HW:
1543 if (nb_hw_breakpoint >= max_hw_breakpoint) {
1544 return -ENOBUFS;
1547 if (find_hw_breakpoint(addr, type) >= 0) {
1548 return -EEXIST;
1551 nb_hw_breakpoint++;
1552 break;
1554 case GDB_WATCHPOINT_WRITE:
1555 case GDB_WATCHPOINT_READ:
1556 case GDB_WATCHPOINT_ACCESS:
1557 if (nb_hw_watchpoint >= max_hw_watchpoint) {
1558 return -ENOBUFS;
1561 if (find_hw_breakpoint(addr, type) >= 0) {
1562 return -EEXIST;
1565 nb_hw_watchpoint++;
1566 break;
1568 default:
1569 return -ENOSYS;
1572 return 0;
1575 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1576 target_ulong len, int type)
1578 int n;
1580 n = find_hw_breakpoint(addr, type);
1581 if (n < 0) {
1582 return -ENOENT;
1585 switch (type) {
1586 case GDB_BREAKPOINT_HW:
1587 nb_hw_breakpoint--;
1588 break;
1590 case GDB_WATCHPOINT_WRITE:
1591 case GDB_WATCHPOINT_READ:
1592 case GDB_WATCHPOINT_ACCESS:
1593 nb_hw_watchpoint--;
1594 break;
1596 default:
1597 return -ENOSYS;
1599 hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1601 return 0;
1604 void kvm_arch_remove_all_hw_breakpoints(void)
1606 nb_hw_breakpoint = nb_hw_watchpoint = 0;
1609 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1611 int n;
1613 /* Software Breakpoint updates */
1614 if (kvm_sw_breakpoints_active(cs)) {
1615 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1618 assert((nb_hw_breakpoint + nb_hw_watchpoint)
1619 <= ARRAY_SIZE(hw_debug_points));
1620 assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1622 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1623 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1624 memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1625 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1626 switch (hw_debug_points[n].type) {
1627 case GDB_BREAKPOINT_HW:
1628 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1629 break;
1630 case GDB_WATCHPOINT_WRITE:
1631 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1632 break;
1633 case GDB_WATCHPOINT_READ:
1634 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1635 break;
1636 case GDB_WATCHPOINT_ACCESS:
1637 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1638 KVMPPC_DEBUG_WATCH_READ;
1639 break;
1640 default:
1641 cpu_abort(cs, "Unsupported breakpoint type\n");
1643 dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1648 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1650 CPUState *cs = CPU(cpu);
1651 CPUPPCState *env = &cpu->env;
1652 struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1653 int handle = 0;
1654 int n;
1655 int flag = 0;
1657 if (cs->singlestep_enabled) {
1658 handle = 1;
1659 } else if (arch_info->status) {
1660 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1661 if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1662 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1663 if (n >= 0) {
1664 handle = 1;
1666 } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1667 KVMPPC_DEBUG_WATCH_WRITE)) {
1668 n = find_hw_watchpoint(arch_info->address, &flag);
1669 if (n >= 0) {
1670 handle = 1;
1671 cs->watchpoint_hit = &hw_watchpoint;
1672 hw_watchpoint.vaddr = hw_debug_points[n].addr;
1673 hw_watchpoint.flags = flag;
1677 } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1678 handle = 1;
1679 } else {
1680 /* QEMU is not able to handle debug exception, so inject
1681 * program exception to guest;
1682 * Yes program exception NOT debug exception !!
1683 * When QEMU is using debug resources then debug exception must
1684 * be always set. To achieve this we set MSR_DE and also set
1685 * MSRP_DEP so guest cannot change MSR_DE.
1686 * When emulating debug resource for guest we want guest
1687 * to control MSR_DE (enable/disable debug interrupt on need).
1688 * Supporting both configurations are NOT possible.
1689 * So the result is that we cannot share debug resources
1690 * between QEMU and Guest on BOOKE architecture.
1691 * In the current design QEMU gets the priority over guest,
1692 * this means that if QEMU is using debug resources then guest
1693 * cannot use them;
1694 * For software breakpoint QEMU uses a privileged instruction;
1695 * So there cannot be any reason that we are here for guest
1696 * set debug exception, only possibility is guest executed a
1697 * privileged / illegal instruction and that's why we are
1698 * injecting a program interrupt.
1701 cpu_synchronize_state(cs);
1702 /* env->nip is PC, so increment this by 4 to use
1703 * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1705 env->nip += 4;
1706 cs->exception_index = POWERPC_EXCP_PROGRAM;
1707 env->error_code = POWERPC_EXCP_INVAL;
1708 ppc_cpu_do_interrupt(cs);
1711 return handle;
1714 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1716 PowerPCCPU *cpu = POWERPC_CPU(cs);
1717 CPUPPCState *env = &cpu->env;
1718 int ret;
1720 qemu_mutex_lock_iothread();
1722 switch (run->exit_reason) {
1723 case KVM_EXIT_DCR:
1724 if (run->dcr.is_write) {
1725 DPRINTF("handle dcr write\n");
1726 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1727 } else {
1728 DPRINTF("handle dcr read\n");
1729 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1731 break;
1732 case KVM_EXIT_HLT:
1733 DPRINTF("handle halt\n");
1734 ret = kvmppc_handle_halt(cpu);
1735 break;
1736 #if defined(TARGET_PPC64)
1737 case KVM_EXIT_PAPR_HCALL:
1738 DPRINTF("handle PAPR hypercall\n");
1739 run->papr_hcall.ret = spapr_hypercall(cpu,
1740 run->papr_hcall.nr,
1741 run->papr_hcall.args);
1742 ret = 0;
1743 break;
1744 #endif
1745 case KVM_EXIT_EPR:
1746 DPRINTF("handle epr\n");
1747 run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1748 ret = 0;
1749 break;
1750 case KVM_EXIT_WATCHDOG:
1751 DPRINTF("handle watchdog expiry\n");
1752 watchdog_perform_action();
1753 ret = 0;
1754 break;
1756 case KVM_EXIT_DEBUG:
1757 DPRINTF("handle debug exception\n");
1758 if (kvm_handle_debug(cpu, run)) {
1759 ret = EXCP_DEBUG;
1760 break;
1762 /* re-enter, this exception was guest-internal */
1763 ret = 0;
1764 break;
1766 default:
1767 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1768 ret = -1;
1769 break;
1772 qemu_mutex_unlock_iothread();
1773 return ret;
1776 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1778 CPUState *cs = CPU(cpu);
1779 uint32_t bits = tsr_bits;
1780 struct kvm_one_reg reg = {
1781 .id = KVM_REG_PPC_OR_TSR,
1782 .addr = (uintptr_t) &bits,
1785 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1788 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1791 CPUState *cs = CPU(cpu);
1792 uint32_t bits = tsr_bits;
1793 struct kvm_one_reg reg = {
1794 .id = KVM_REG_PPC_CLEAR_TSR,
1795 .addr = (uintptr_t) &bits,
1798 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1801 int kvmppc_set_tcr(PowerPCCPU *cpu)
1803 CPUState *cs = CPU(cpu);
1804 CPUPPCState *env = &cpu->env;
1805 uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1807 struct kvm_one_reg reg = {
1808 .id = KVM_REG_PPC_TCR,
1809 .addr = (uintptr_t) &tcr,
1812 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1815 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1817 CPUState *cs = CPU(cpu);
1818 int ret;
1820 if (!kvm_enabled()) {
1821 return -1;
1824 if (!cap_ppc_watchdog) {
1825 printf("warning: KVM does not support watchdog");
1826 return -1;
1829 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1830 if (ret < 0) {
1831 fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1832 __func__, strerror(-ret));
1833 return ret;
1836 return ret;
1839 static int read_cpuinfo(const char *field, char *value, int len)
1841 FILE *f;
1842 int ret = -1;
1843 int field_len = strlen(field);
1844 char line[512];
1846 f = fopen("/proc/cpuinfo", "r");
1847 if (!f) {
1848 return -1;
1851 do {
1852 if (!fgets(line, sizeof(line), f)) {
1853 break;
1855 if (!strncmp(line, field, field_len)) {
1856 pstrcpy(value, len, line);
1857 ret = 0;
1858 break;
1860 } while(*line);
1862 fclose(f);
1864 return ret;
1867 uint32_t kvmppc_get_tbfreq(void)
1869 char line[512];
1870 char *ns;
1871 uint32_t retval = NANOSECONDS_PER_SECOND;
1873 if (read_cpuinfo("timebase", line, sizeof(line))) {
1874 return retval;
1877 if (!(ns = strchr(line, ':'))) {
1878 return retval;
1881 ns++;
1883 return atoi(ns);
1886 bool kvmppc_get_host_serial(char **value)
1888 return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1889 NULL);
1892 bool kvmppc_get_host_model(char **value)
1894 return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1897 /* Try to find a device tree node for a CPU with clock-frequency property */
1898 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1900 struct dirent *dirp;
1901 DIR *dp;
1903 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1904 printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1905 return -1;
1908 buf[0] = '\0';
1909 while ((dirp = readdir(dp)) != NULL) {
1910 FILE *f;
1911 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1912 dirp->d_name);
1913 f = fopen(buf, "r");
1914 if (f) {
1915 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1916 fclose(f);
1917 break;
1919 buf[0] = '\0';
1921 closedir(dp);
1922 if (buf[0] == '\0') {
1923 printf("Unknown host!\n");
1924 return -1;
1927 return 0;
1930 static uint64_t kvmppc_read_int_dt(const char *filename)
1932 union {
1933 uint32_t v32;
1934 uint64_t v64;
1935 } u;
1936 FILE *f;
1937 int len;
1939 f = fopen(filename, "rb");
1940 if (!f) {
1941 return -1;
1944 len = fread(&u, 1, sizeof(u), f);
1945 fclose(f);
1946 switch (len) {
1947 case 4:
1948 /* property is a 32-bit quantity */
1949 return be32_to_cpu(u.v32);
1950 case 8:
1951 return be64_to_cpu(u.v64);
1954 return 0;
1957 /* Read a CPU node property from the host device tree that's a single
1958 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1959 * (can't find or open the property, or doesn't understand the
1960 * format) */
1961 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1963 char buf[PATH_MAX], *tmp;
1964 uint64_t val;
1966 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1967 return -1;
1970 tmp = g_strdup_printf("%s/%s", buf, propname);
1971 val = kvmppc_read_int_dt(tmp);
1972 g_free(tmp);
1974 return val;
1977 uint64_t kvmppc_get_clockfreq(void)
1979 return kvmppc_read_int_cpu_dt("clock-frequency");
1982 uint32_t kvmppc_get_vmx(void)
1984 return kvmppc_read_int_cpu_dt("ibm,vmx");
1987 uint32_t kvmppc_get_dfp(void)
1989 return kvmppc_read_int_cpu_dt("ibm,dfp");
1992 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1994 PowerPCCPU *cpu = ppc_env_get_cpu(env);
1995 CPUState *cs = CPU(cpu);
1997 if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1998 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1999 return 0;
2002 return 1;
2005 int kvmppc_get_hasidle(CPUPPCState *env)
2007 struct kvm_ppc_pvinfo pvinfo;
2009 if (!kvmppc_get_pvinfo(env, &pvinfo) &&
2010 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
2011 return 1;
2014 return 0;
2017 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
2019 uint32_t *hc = (uint32_t*)buf;
2020 struct kvm_ppc_pvinfo pvinfo;
2022 if (!kvmppc_get_pvinfo(env, &pvinfo)) {
2023 memcpy(buf, pvinfo.hcall, buf_len);
2024 return 0;
2028 * Fallback to always fail hypercalls regardless of endianness:
2030 * tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
2031 * li r3, -1
2032 * b .+8 (becomes nop in wrong endian)
2033 * bswap32(li r3, -1)
2036 hc[0] = cpu_to_be32(0x08000048);
2037 hc[1] = cpu_to_be32(0x3860ffff);
2038 hc[2] = cpu_to_be32(0x48000008);
2039 hc[3] = cpu_to_be32(bswap32(0x3860ffff));
2041 return 1;
2044 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
2046 return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2049 void kvmppc_enable_logical_ci_hcalls(void)
2052 * FIXME: it would be nice if we could detect the cases where
2053 * we're using a device which requires the in kernel
2054 * implementation of these hcalls, but the kernel lacks them and
2055 * produce a warning.
2057 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2058 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2061 void kvmppc_enable_set_mode_hcall(void)
2063 kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2066 void kvmppc_enable_clear_ref_mod_hcalls(void)
2068 kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
2069 kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
2072 void kvmppc_set_papr(PowerPCCPU *cpu)
2074 CPUState *cs = CPU(cpu);
2075 int ret;
2077 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2078 if (ret) {
2079 error_report("This vCPU type or KVM version does not support PAPR");
2080 exit(1);
2083 /* Update the capability flag so we sync the right information
2084 * with kvm */
2085 cap_papr = 1;
2088 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t cpu_version)
2090 return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &cpu_version);
2093 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2095 CPUState *cs = CPU(cpu);
2096 int ret;
2098 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2099 if (ret && mpic_proxy) {
2100 error_report("This KVM version does not support EPR");
2101 exit(1);
2105 int kvmppc_smt_threads(void)
2107 return cap_ppc_smt ? cap_ppc_smt : 1;
2110 #ifdef TARGET_PPC64
2111 off_t kvmppc_alloc_rma(void **rma)
2113 off_t size;
2114 int fd;
2115 struct kvm_allocate_rma ret;
2117 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2118 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2119 * not necessary on this hardware
2120 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2122 * FIXME: We should allow the user to force contiguous RMA
2123 * allocation in the cap_ppc_rma==1 case.
2125 if (cap_ppc_rma < 2) {
2126 return 0;
2129 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
2130 if (fd < 0) {
2131 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2132 strerror(errno));
2133 return -1;
2136 size = MIN(ret.rma_size, 256ul << 20);
2138 *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2139 if (*rma == MAP_FAILED) {
2140 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
2141 return -1;
2144 return size;
2147 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2149 struct kvm_ppc_smmu_info info;
2150 long rampagesize, best_page_shift;
2151 int i;
2153 if (cap_ppc_rma >= 2) {
2154 return current_size;
2157 /* Find the largest hardware supported page size that's less than
2158 * or equal to the (logical) backing page size of guest RAM */
2159 kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2160 rampagesize = getrampagesize();
2161 best_page_shift = 0;
2163 for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2164 struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2166 if (!sps->page_shift) {
2167 continue;
2170 if ((sps->page_shift > best_page_shift)
2171 && ((1UL << sps->page_shift) <= rampagesize)) {
2172 best_page_shift = sps->page_shift;
2176 return MIN(current_size,
2177 1ULL << (best_page_shift + hash_shift - 7));
2179 #endif
2181 bool kvmppc_spapr_use_multitce(void)
2183 return cap_spapr_multitce;
2186 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
2187 bool need_vfio)
2189 struct kvm_create_spapr_tce args = {
2190 .liobn = liobn,
2191 .window_size = window_size,
2193 long len;
2194 int fd;
2195 void *table;
2197 /* Must set fd to -1 so we don't try to munmap when called for
2198 * destroying the table, which the upper layers -will- do
2200 *pfd = -1;
2201 if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2202 return NULL;
2205 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2206 if (fd < 0) {
2207 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2208 liobn);
2209 return NULL;
2212 len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
2213 /* FIXME: round this up to page size */
2215 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2216 if (table == MAP_FAILED) {
2217 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2218 liobn);
2219 close(fd);
2220 return NULL;
2223 *pfd = fd;
2224 return table;
2227 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2229 long len;
2231 if (fd < 0) {
2232 return -1;
2235 len = nb_table * sizeof(uint64_t);
2236 if ((munmap(table, len) < 0) ||
2237 (close(fd) < 0)) {
2238 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2239 strerror(errno));
2240 /* Leak the table */
2243 return 0;
2246 int kvmppc_reset_htab(int shift_hint)
2248 uint32_t shift = shift_hint;
2250 if (!kvm_enabled()) {
2251 /* Full emulation, tell caller to allocate htab itself */
2252 return 0;
2254 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2255 int ret;
2256 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2257 if (ret == -ENOTTY) {
2258 /* At least some versions of PR KVM advertise the
2259 * capability, but don't implement the ioctl(). Oops.
2260 * Return 0 so that we allocate the htab in qemu, as is
2261 * correct for PR. */
2262 return 0;
2263 } else if (ret < 0) {
2264 return ret;
2266 return shift;
2269 /* We have a kernel that predates the htab reset calls. For PR
2270 * KVM, we need to allocate the htab ourselves, for an HV KVM of
2271 * this era, it has allocated a 16MB fixed size hash table
2272 * already. Kernels of this era have the GET_PVINFO capability
2273 * only on PR, so we use this hack to determine the right
2274 * answer */
2275 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
2276 /* PR - tell caller to allocate htab */
2277 return 0;
2278 } else {
2279 /* HV - assume 16MB kernel allocated htab */
2280 return 24;
2284 static inline uint32_t mfpvr(void)
2286 uint32_t pvr;
2288 asm ("mfpvr %0"
2289 : "=r"(pvr));
2290 return pvr;
2293 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2295 if (on) {
2296 *word |= flags;
2297 } else {
2298 *word &= ~flags;
2302 static void kvmppc_host_cpu_initfn(Object *obj)
2304 assert(kvm_enabled());
2307 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2309 DeviceClass *dc = DEVICE_CLASS(oc);
2310 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2311 uint32_t vmx = kvmppc_get_vmx();
2312 uint32_t dfp = kvmppc_get_dfp();
2313 uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2314 uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2316 /* Now fix up the class with information we can query from the host */
2317 pcc->pvr = mfpvr();
2319 if (vmx != -1) {
2320 /* Only override when we know what the host supports */
2321 alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
2322 alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
2324 if (dfp != -1) {
2325 /* Only override when we know what the host supports */
2326 alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
2329 if (dcache_size != -1) {
2330 pcc->l1_dcache_size = dcache_size;
2333 if (icache_size != -1) {
2334 pcc->l1_icache_size = icache_size;
2337 /* Reason: kvmppc_host_cpu_initfn() dies when !kvm_enabled() */
2338 dc->cannot_destroy_with_object_finalize_yet = true;
2341 bool kvmppc_has_cap_epr(void)
2343 return cap_epr;
2346 bool kvmppc_has_cap_htab_fd(void)
2348 return cap_htab_fd;
2351 bool kvmppc_has_cap_fixup_hcalls(void)
2353 return cap_fixup_hcalls;
2356 static PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc)
2358 ObjectClass *oc = OBJECT_CLASS(pcc);
2360 while (oc && !object_class_is_abstract(oc)) {
2361 oc = object_class_get_parent(oc);
2363 assert(oc);
2365 return POWERPC_CPU_CLASS(oc);
2368 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2370 uint32_t host_pvr = mfpvr();
2371 PowerPCCPUClass *pvr_pcc;
2373 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2374 if (pvr_pcc == NULL) {
2375 pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2378 return pvr_pcc;
2381 static int kvm_ppc_register_host_cpu_type(void)
2383 TypeInfo type_info = {
2384 .name = TYPE_HOST_POWERPC_CPU,
2385 .instance_init = kvmppc_host_cpu_initfn,
2386 .class_init = kvmppc_host_cpu_class_init,
2388 PowerPCCPUClass *pvr_pcc;
2389 DeviceClass *dc;
2391 pvr_pcc = kvm_ppc_get_host_cpu_class();
2392 if (pvr_pcc == NULL) {
2393 return -1;
2395 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2396 type_register(&type_info);
2398 /* Register generic family CPU class for a family */
2399 pvr_pcc = ppc_cpu_get_family_class(pvr_pcc);
2400 dc = DEVICE_CLASS(pvr_pcc);
2401 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2402 type_info.name = g_strdup_printf("%s-"TYPE_POWERPC_CPU, dc->desc);
2403 type_register(&type_info);
2405 #if defined(TARGET_PPC64)
2406 type_info.name = g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE, "host");
2407 type_info.parent = TYPE_SPAPR_CPU_CORE,
2408 type_info.instance_size = sizeof(sPAPRCPUCore);
2409 type_info.instance_init = NULL;
2410 type_info.class_init = spapr_cpu_core_class_init;
2411 type_info.class_data = (void *) "host";
2412 type_register(&type_info);
2413 g_free((void *)type_info.name);
2415 /* Register generic spapr CPU family class for current host CPU type */
2416 type_info.name = g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE, dc->desc);
2417 type_info.class_data = (void *) dc->desc;
2418 type_register(&type_info);
2419 g_free((void *)type_info.name);
2420 #endif
2422 return 0;
2425 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2427 struct kvm_rtas_token_args args = {
2428 .token = token,
2431 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2432 return -ENOENT;
2435 strncpy(args.name, function, sizeof(args.name));
2437 return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2440 int kvmppc_get_htab_fd(bool write)
2442 struct kvm_get_htab_fd s = {
2443 .flags = write ? KVM_GET_HTAB_WRITE : 0,
2444 .start_index = 0,
2447 if (!cap_htab_fd) {
2448 fprintf(stderr, "KVM version doesn't support saving the hash table\n");
2449 return -1;
2452 return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2455 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2457 int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2458 uint8_t buf[bufsize];
2459 ssize_t rc;
2461 do {
2462 rc = read(fd, buf, bufsize);
2463 if (rc < 0) {
2464 fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2465 strerror(errno));
2466 return rc;
2467 } else if (rc) {
2468 uint8_t *buffer = buf;
2469 ssize_t n = rc;
2470 while (n) {
2471 struct kvm_get_htab_header *head =
2472 (struct kvm_get_htab_header *) buffer;
2473 size_t chunksize = sizeof(*head) +
2474 HASH_PTE_SIZE_64 * head->n_valid;
2476 qemu_put_be32(f, head->index);
2477 qemu_put_be16(f, head->n_valid);
2478 qemu_put_be16(f, head->n_invalid);
2479 qemu_put_buffer(f, (void *)(head + 1),
2480 HASH_PTE_SIZE_64 * head->n_valid);
2482 buffer += chunksize;
2483 n -= chunksize;
2486 } while ((rc != 0)
2487 && ((max_ns < 0)
2488 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2490 return (rc == 0) ? 1 : 0;
2493 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2494 uint16_t n_valid, uint16_t n_invalid)
2496 struct kvm_get_htab_header *buf;
2497 size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2498 ssize_t rc;
2500 buf = alloca(chunksize);
2501 buf->index = index;
2502 buf->n_valid = n_valid;
2503 buf->n_invalid = n_invalid;
2505 qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2507 rc = write(fd, buf, chunksize);
2508 if (rc < 0) {
2509 fprintf(stderr, "Error writing KVM hash table: %s\n",
2510 strerror(errno));
2511 return rc;
2513 if (rc != chunksize) {
2514 /* We should never get a short write on a single chunk */
2515 fprintf(stderr, "Short write, restoring KVM hash table\n");
2516 return -1;
2518 return 0;
2521 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2523 return true;
2526 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
2528 return 1;
2531 int kvm_arch_on_sigbus(int code, void *addr)
2533 return 1;
2536 void kvm_arch_init_irq_routing(KVMState *s)
2540 struct kvm_get_htab_buf {
2541 struct kvm_get_htab_header header;
2543 * We require one extra byte for read
2545 target_ulong hpte[(HPTES_PER_GROUP * 2) + 1];
2548 uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index)
2550 int htab_fd;
2551 struct kvm_get_htab_fd ghf;
2552 struct kvm_get_htab_buf *hpte_buf;
2554 ghf.flags = 0;
2555 ghf.start_index = pte_index;
2556 htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2557 if (htab_fd < 0) {
2558 goto error_out;
2561 hpte_buf = g_malloc0(sizeof(*hpte_buf));
2563 * Read the hpte group
2565 if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) {
2566 goto out_close;
2569 close(htab_fd);
2570 return (uint64_t)(uintptr_t) hpte_buf->hpte;
2572 out_close:
2573 g_free(hpte_buf);
2574 close(htab_fd);
2575 error_out:
2576 return 0;
2579 void kvmppc_hash64_free_pteg(uint64_t token)
2581 struct kvm_get_htab_buf *htab_buf;
2583 htab_buf = container_of((void *)(uintptr_t) token, struct kvm_get_htab_buf,
2584 hpte);
2585 g_free(htab_buf);
2586 return;
2589 void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
2590 target_ulong pte0, target_ulong pte1)
2592 int htab_fd;
2593 struct kvm_get_htab_fd ghf;
2594 struct kvm_get_htab_buf hpte_buf;
2596 ghf.flags = 0;
2597 ghf.start_index = 0; /* Ignored */
2598 htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2599 if (htab_fd < 0) {
2600 goto error_out;
2603 hpte_buf.header.n_valid = 1;
2604 hpte_buf.header.n_invalid = 0;
2605 hpte_buf.header.index = pte_index;
2606 hpte_buf.hpte[0] = pte0;
2607 hpte_buf.hpte[1] = pte1;
2609 * Write the hpte entry.
2610 * CAUTION: write() has the warn_unused_result attribute. Hence we
2611 * need to check the return value, even though we do nothing.
2613 if (write(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) {
2614 goto out_close;
2617 out_close:
2618 close(htab_fd);
2619 return;
2621 error_out:
2622 return;
2625 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2626 uint64_t address, uint32_t data, PCIDevice *dev)
2628 return 0;
2631 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2632 int vector, PCIDevice *dev)
2634 return 0;
2637 int kvm_arch_release_virq_post(int virq)
2639 return 0;
2642 int kvm_arch_msi_data_to_gsi(uint32_t data)
2644 return data & 0xffff;
2647 int kvmppc_enable_hwrng(void)
2649 if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2650 return -1;
2653 return kvmppc_enable_hcall(kvm_state, H_RANDOM);