Merge remote-tracking branch 'remotes/famz/tags/for-upstream' into staging
[qemu/ar7.git] / target-ppc / kvm.c
blob9c4834c4fc1ebdd03981c32e9604ae09e1799f0a
1 /*
2 * PowerPC implementation of KVM hooks
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
7 * Authors:
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
17 #include "qemu/osdep.h"
18 #include <dirent.h>
19 #include <sys/ioctl.h>
20 #include <sys/vfs.h>
22 #include <linux/kvm.h>
24 #include "qemu-common.h"
25 #include "qemu/error-report.h"
26 #include "cpu.h"
27 #include "qemu/timer.h"
28 #include "sysemu/sysemu.h"
29 #include "sysemu/kvm.h"
30 #include "sysemu/numa.h"
31 #include "kvm_ppc.h"
32 #include "sysemu/cpus.h"
33 #include "sysemu/device_tree.h"
34 #include "mmu-hash64.h"
36 #include "hw/sysbus.h"
37 #include "hw/ppc/spapr.h"
38 #include "hw/ppc/spapr_vio.h"
39 #include "hw/ppc/spapr_cpu_core.h"
40 #include "hw/ppc/ppc.h"
41 #include "sysemu/watchdog.h"
42 #include "trace.h"
43 #include "exec/gdbstub.h"
44 #include "exec/memattrs.h"
45 #include "sysemu/hostmem.h"
46 #include "qemu/cutils.h"
47 #if defined(TARGET_PPC64)
48 #include "hw/ppc/spapr_cpu_core.h"
49 #endif
51 //#define DEBUG_KVM
53 #ifdef DEBUG_KVM
54 #define DPRINTF(fmt, ...) \
55 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
56 #else
57 #define DPRINTF(fmt, ...) \
58 do { } while (0)
59 #endif
61 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
63 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
64 KVM_CAP_LAST_INFO
67 static int cap_interrupt_unset = false;
68 static int cap_interrupt_level = false;
69 static int cap_segstate;
70 static int cap_booke_sregs;
71 static int cap_ppc_smt;
72 static int cap_ppc_rma;
73 static int cap_spapr_tce;
74 static int cap_spapr_multitce;
75 static int cap_spapr_vfio;
76 static int cap_hior;
77 static int cap_one_reg;
78 static int cap_epr;
79 static int cap_ppc_watchdog;
80 static int cap_papr;
81 static int cap_htab_fd;
82 static int cap_fixup_hcalls;
83 static int cap_htm; /* Hardware transactional memory support */
85 static uint32_t debug_inst_opcode;
87 /* XXX We have a race condition where we actually have a level triggered
88 * interrupt, but the infrastructure can't expose that yet, so the guest
89 * takes but ignores it, goes to sleep and never gets notified that there's
90 * still an interrupt pending.
92 * As a quick workaround, let's just wake up again 20 ms after we injected
93 * an interrupt. That way we can assure that we're always reinjecting
94 * interrupts in case the guest swallowed them.
96 static QEMUTimer *idle_timer;
98 static void kvm_kick_cpu(void *opaque)
100 PowerPCCPU *cpu = opaque;
102 qemu_cpu_kick(CPU(cpu));
105 /* Check whether we are running with KVM-PR (instead of KVM-HV). This
106 * should only be used for fallback tests - generally we should use
107 * explicit capabilities for the features we want, rather than
108 * assuming what is/isn't available depending on the KVM variant. */
109 static bool kvmppc_is_pr(KVMState *ks)
111 /* Assume KVM-PR if the GET_PVINFO capability is available */
112 return kvm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0;
115 static int kvm_ppc_register_host_cpu_type(void);
117 int kvm_arch_init(MachineState *ms, KVMState *s)
119 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
120 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
121 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
122 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
123 cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
124 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
125 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
126 cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
127 cap_spapr_vfio = false;
128 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
129 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
130 cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
131 cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
132 /* Note: we don't set cap_papr here, because this capability is
133 * only activated after this by kvmppc_set_papr() */
134 cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
135 cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
136 cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
138 if (!cap_interrupt_level) {
139 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
140 "VM to stall at times!\n");
143 kvm_ppc_register_host_cpu_type();
145 return 0;
148 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
150 CPUPPCState *cenv = &cpu->env;
151 CPUState *cs = CPU(cpu);
152 struct kvm_sregs sregs;
153 int ret;
155 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
156 /* What we're really trying to say is "if we're on BookE, we use
157 the native PVR for now". This is the only sane way to check
158 it though, so we potentially confuse users that they can run
159 BookE guests on BookS. Let's hope nobody dares enough :) */
160 return 0;
161 } else {
162 if (!cap_segstate) {
163 fprintf(stderr, "kvm error: missing PVR setting capability\n");
164 return -ENOSYS;
168 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
169 if (ret) {
170 return ret;
173 sregs.pvr = cenv->spr[SPR_PVR];
174 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
177 /* Set up a shared TLB array with KVM */
178 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
180 CPUPPCState *env = &cpu->env;
181 CPUState *cs = CPU(cpu);
182 struct kvm_book3e_206_tlb_params params = {};
183 struct kvm_config_tlb cfg = {};
184 unsigned int entries = 0;
185 int ret, i;
187 if (!kvm_enabled() ||
188 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
189 return 0;
192 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
194 for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
195 params.tlb_sizes[i] = booke206_tlb_size(env, i);
196 params.tlb_ways[i] = booke206_tlb_ways(env, i);
197 entries += params.tlb_sizes[i];
200 assert(entries == env->nb_tlb);
201 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
203 env->tlb_dirty = true;
205 cfg.array = (uintptr_t)env->tlb.tlbm;
206 cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
207 cfg.params = (uintptr_t)&params;
208 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
210 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
211 if (ret < 0) {
212 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
213 __func__, strerror(-ret));
214 return ret;
217 env->kvm_sw_tlb = true;
218 return 0;
222 #if defined(TARGET_PPC64)
223 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
224 struct kvm_ppc_smmu_info *info)
226 CPUPPCState *env = &cpu->env;
227 CPUState *cs = CPU(cpu);
229 memset(info, 0, sizeof(*info));
231 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
232 * need to "guess" what the supported page sizes are.
234 * For that to work we make a few assumptions:
236 * - Check whether we are running "PR" KVM which only supports 4K
237 * and 16M pages, but supports them regardless of the backing
238 * store characteritics. We also don't support 1T segments.
240 * This is safe as if HV KVM ever supports that capability or PR
241 * KVM grows supports for more page/segment sizes, those versions
242 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
243 * will not hit this fallback
245 * - Else we are running HV KVM. This means we only support page
246 * sizes that fit in the backing store. Additionally we only
247 * advertize 64K pages if the processor is ARCH 2.06 and we assume
248 * P7 encodings for the SLB and hash table. Here too, we assume
249 * support for any newer processor will mean a kernel that
250 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
251 * this fallback.
253 if (kvmppc_is_pr(cs->kvm_state)) {
254 /* No flags */
255 info->flags = 0;
256 info->slb_size = 64;
258 /* Standard 4k base page size segment */
259 info->sps[0].page_shift = 12;
260 info->sps[0].slb_enc = 0;
261 info->sps[0].enc[0].page_shift = 12;
262 info->sps[0].enc[0].pte_enc = 0;
264 /* Standard 16M large page size segment */
265 info->sps[1].page_shift = 24;
266 info->sps[1].slb_enc = SLB_VSID_L;
267 info->sps[1].enc[0].page_shift = 24;
268 info->sps[1].enc[0].pte_enc = 0;
269 } else {
270 int i = 0;
272 /* HV KVM has backing store size restrictions */
273 info->flags = KVM_PPC_PAGE_SIZES_REAL;
275 if (env->mmu_model & POWERPC_MMU_1TSEG) {
276 info->flags |= KVM_PPC_1T_SEGMENTS;
279 if (env->mmu_model == POWERPC_MMU_2_06 ||
280 env->mmu_model == POWERPC_MMU_2_07) {
281 info->slb_size = 32;
282 } else {
283 info->slb_size = 64;
286 /* Standard 4k base page size segment */
287 info->sps[i].page_shift = 12;
288 info->sps[i].slb_enc = 0;
289 info->sps[i].enc[0].page_shift = 12;
290 info->sps[i].enc[0].pte_enc = 0;
291 i++;
293 /* 64K on MMU 2.06 and later */
294 if (env->mmu_model == POWERPC_MMU_2_06 ||
295 env->mmu_model == POWERPC_MMU_2_07) {
296 info->sps[i].page_shift = 16;
297 info->sps[i].slb_enc = 0x110;
298 info->sps[i].enc[0].page_shift = 16;
299 info->sps[i].enc[0].pte_enc = 1;
300 i++;
303 /* Standard 16M large page size segment */
304 info->sps[i].page_shift = 24;
305 info->sps[i].slb_enc = SLB_VSID_L;
306 info->sps[i].enc[0].page_shift = 24;
307 info->sps[i].enc[0].pte_enc = 0;
311 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
313 CPUState *cs = CPU(cpu);
314 int ret;
316 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
317 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
318 if (ret == 0) {
319 return;
323 kvm_get_fallback_smmu_info(cpu, info);
326 static long gethugepagesize(const char *mem_path)
328 struct statfs fs;
329 int ret;
331 do {
332 ret = statfs(mem_path, &fs);
333 } while (ret != 0 && errno == EINTR);
335 if (ret != 0) {
336 fprintf(stderr, "Couldn't statfs() memory path: %s\n",
337 strerror(errno));
338 exit(1);
341 #define HUGETLBFS_MAGIC 0x958458f6
343 if (fs.f_type != HUGETLBFS_MAGIC) {
344 /* Explicit mempath, but it's ordinary pages */
345 return getpagesize();
348 /* It's hugepage, return the huge page size */
349 return fs.f_bsize;
353 * FIXME TOCTTOU: this iterates over memory backends' mem-path, which
354 * may or may not name the same files / on the same filesystem now as
355 * when we actually open and map them. Iterate over the file
356 * descriptors instead, and use qemu_fd_getpagesize().
358 static int find_max_supported_pagesize(Object *obj, void *opaque)
360 char *mem_path;
361 long *hpsize_min = opaque;
363 if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) {
364 mem_path = object_property_get_str(obj, "mem-path", NULL);
365 if (mem_path) {
366 long hpsize = gethugepagesize(mem_path);
367 if (hpsize < *hpsize_min) {
368 *hpsize_min = hpsize;
370 } else {
371 *hpsize_min = getpagesize();
375 return 0;
378 static long getrampagesize(void)
380 long hpsize = LONG_MAX;
381 long mainrampagesize;
382 Object *memdev_root;
384 if (mem_path) {
385 mainrampagesize = gethugepagesize(mem_path);
386 } else {
387 mainrampagesize = getpagesize();
390 /* it's possible we have memory-backend objects with
391 * hugepage-backed RAM. these may get mapped into system
392 * address space via -numa parameters or memory hotplug
393 * hooks. we want to take these into account, but we
394 * also want to make sure these supported hugepage
395 * sizes are applicable across the entire range of memory
396 * we may boot from, so we take the min across all
397 * backends, and assume normal pages in cases where a
398 * backend isn't backed by hugepages.
400 memdev_root = object_resolve_path("/objects", NULL);
401 if (memdev_root) {
402 object_child_foreach(memdev_root, find_max_supported_pagesize, &hpsize);
404 if (hpsize == LONG_MAX) {
405 /* No additional memory regions found ==> Report main RAM page size */
406 return mainrampagesize;
409 /* If NUMA is disabled or the NUMA nodes are not backed with a
410 * memory-backend, then there is at least one node using "normal" RAM,
411 * so if its page size is smaller we have got to report that size instead.
413 if (hpsize > mainrampagesize &&
414 (nb_numa_nodes == 0 || numa_info[0].node_memdev == NULL)) {
415 static bool warned;
416 if (!warned) {
417 error_report("Huge page support disabled (n/a for main memory).");
418 warned = true;
420 return mainrampagesize;
423 return hpsize;
426 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
428 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
429 return true;
432 return (1ul << shift) <= rampgsize;
435 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
437 static struct kvm_ppc_smmu_info smmu_info;
438 static bool has_smmu_info;
439 CPUPPCState *env = &cpu->env;
440 long rampagesize;
441 int iq, ik, jq, jk;
442 bool has_64k_pages = false;
444 /* We only handle page sizes for 64-bit server guests for now */
445 if (!(env->mmu_model & POWERPC_MMU_64)) {
446 return;
449 /* Collect MMU info from kernel if not already */
450 if (!has_smmu_info) {
451 kvm_get_smmu_info(cpu, &smmu_info);
452 has_smmu_info = true;
455 rampagesize = getrampagesize();
457 /* Convert to QEMU form */
458 memset(&env->sps, 0, sizeof(env->sps));
460 /* If we have HV KVM, we need to forbid CI large pages if our
461 * host page size is smaller than 64K.
463 if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
464 env->ci_large_pages = getpagesize() >= 0x10000;
468 * XXX This loop should be an entry wide AND of the capabilities that
469 * the selected CPU has with the capabilities that KVM supports.
471 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
472 struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
473 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
475 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
476 ksps->page_shift)) {
477 continue;
479 qsps->page_shift = ksps->page_shift;
480 qsps->slb_enc = ksps->slb_enc;
481 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
482 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
483 ksps->enc[jk].page_shift)) {
484 continue;
486 if (ksps->enc[jk].page_shift == 16) {
487 has_64k_pages = true;
489 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
490 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
491 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
492 break;
495 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
496 break;
499 env->slb_nr = smmu_info.slb_size;
500 if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
501 env->mmu_model &= ~POWERPC_MMU_1TSEG;
503 if (!has_64k_pages) {
504 env->mmu_model &= ~POWERPC_MMU_64K;
507 #else /* defined (TARGET_PPC64) */
509 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
513 #endif /* !defined (TARGET_PPC64) */
515 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
517 return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
520 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
521 * book3s supports only 1 watchpoint, so array size
522 * of 4 is sufficient for now.
524 #define MAX_HW_BKPTS 4
526 static struct HWBreakpoint {
527 target_ulong addr;
528 int type;
529 } hw_debug_points[MAX_HW_BKPTS];
531 static CPUWatchpoint hw_watchpoint;
533 /* Default there is no breakpoint and watchpoint supported */
534 static int max_hw_breakpoint;
535 static int max_hw_watchpoint;
536 static int nb_hw_breakpoint;
537 static int nb_hw_watchpoint;
539 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
541 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
542 max_hw_breakpoint = 2;
543 max_hw_watchpoint = 2;
546 if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
547 fprintf(stderr, "Error initializing h/w breakpoints\n");
548 return;
552 int kvm_arch_init_vcpu(CPUState *cs)
554 PowerPCCPU *cpu = POWERPC_CPU(cs);
555 CPUPPCState *cenv = &cpu->env;
556 int ret;
558 /* Gather server mmu info from KVM and update the CPU state */
559 kvm_fixup_page_sizes(cpu);
561 /* Synchronize sregs with kvm */
562 ret = kvm_arch_sync_sregs(cpu);
563 if (ret) {
564 if (ret == -EINVAL) {
565 error_report("Register sync failed... If you're using kvm-hv.ko,"
566 " only \"-cpu host\" is possible");
568 return ret;
571 idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
573 switch (cenv->mmu_model) {
574 case POWERPC_MMU_BOOKE206:
575 /* This target supports access to KVM's guest TLB */
576 ret = kvm_booke206_tlb_init(cpu);
577 break;
578 case POWERPC_MMU_2_07:
579 if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) {
580 /* KVM-HV has transactional memory on POWER8 also without the
581 * KVM_CAP_PPC_HTM extension, so enable it here instead. */
582 cap_htm = true;
584 break;
585 default:
586 break;
589 kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
590 kvmppc_hw_debug_points_init(cenv);
592 return ret;
595 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
597 CPUPPCState *env = &cpu->env;
598 CPUState *cs = CPU(cpu);
599 struct kvm_dirty_tlb dirty_tlb;
600 unsigned char *bitmap;
601 int ret;
603 if (!env->kvm_sw_tlb) {
604 return;
607 bitmap = g_malloc((env->nb_tlb + 7) / 8);
608 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
610 dirty_tlb.bitmap = (uintptr_t)bitmap;
611 dirty_tlb.num_dirty = env->nb_tlb;
613 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
614 if (ret) {
615 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
616 __func__, strerror(-ret));
619 g_free(bitmap);
622 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
624 PowerPCCPU *cpu = POWERPC_CPU(cs);
625 CPUPPCState *env = &cpu->env;
626 union {
627 uint32_t u32;
628 uint64_t u64;
629 } val;
630 struct kvm_one_reg reg = {
631 .id = id,
632 .addr = (uintptr_t) &val,
634 int ret;
636 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
637 if (ret != 0) {
638 trace_kvm_failed_spr_get(spr, strerror(errno));
639 } else {
640 switch (id & KVM_REG_SIZE_MASK) {
641 case KVM_REG_SIZE_U32:
642 env->spr[spr] = val.u32;
643 break;
645 case KVM_REG_SIZE_U64:
646 env->spr[spr] = val.u64;
647 break;
649 default:
650 /* Don't handle this size yet */
651 abort();
656 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
658 PowerPCCPU *cpu = POWERPC_CPU(cs);
659 CPUPPCState *env = &cpu->env;
660 union {
661 uint32_t u32;
662 uint64_t u64;
663 } val;
664 struct kvm_one_reg reg = {
665 .id = id,
666 .addr = (uintptr_t) &val,
668 int ret;
670 switch (id & KVM_REG_SIZE_MASK) {
671 case KVM_REG_SIZE_U32:
672 val.u32 = env->spr[spr];
673 break;
675 case KVM_REG_SIZE_U64:
676 val.u64 = env->spr[spr];
677 break;
679 default:
680 /* Don't handle this size yet */
681 abort();
684 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
685 if (ret != 0) {
686 trace_kvm_failed_spr_set(spr, strerror(errno));
690 static int kvm_put_fp(CPUState *cs)
692 PowerPCCPU *cpu = POWERPC_CPU(cs);
693 CPUPPCState *env = &cpu->env;
694 struct kvm_one_reg reg;
695 int i;
696 int ret;
698 if (env->insns_flags & PPC_FLOAT) {
699 uint64_t fpscr = env->fpscr;
700 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
702 reg.id = KVM_REG_PPC_FPSCR;
703 reg.addr = (uintptr_t)&fpscr;
704 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
705 if (ret < 0) {
706 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
707 return ret;
710 for (i = 0; i < 32; i++) {
711 uint64_t vsr[2];
713 #ifdef HOST_WORDS_BIGENDIAN
714 vsr[0] = float64_val(env->fpr[i]);
715 vsr[1] = env->vsr[i];
716 #else
717 vsr[0] = env->vsr[i];
718 vsr[1] = float64_val(env->fpr[i]);
719 #endif
720 reg.addr = (uintptr_t) &vsr;
721 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
723 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
724 if (ret < 0) {
725 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
726 i, strerror(errno));
727 return ret;
732 if (env->insns_flags & PPC_ALTIVEC) {
733 reg.id = KVM_REG_PPC_VSCR;
734 reg.addr = (uintptr_t)&env->vscr;
735 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
736 if (ret < 0) {
737 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
738 return ret;
741 for (i = 0; i < 32; i++) {
742 reg.id = KVM_REG_PPC_VR(i);
743 reg.addr = (uintptr_t)&env->avr[i];
744 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
745 if (ret < 0) {
746 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
747 return ret;
752 return 0;
755 static int kvm_get_fp(CPUState *cs)
757 PowerPCCPU *cpu = POWERPC_CPU(cs);
758 CPUPPCState *env = &cpu->env;
759 struct kvm_one_reg reg;
760 int i;
761 int ret;
763 if (env->insns_flags & PPC_FLOAT) {
764 uint64_t fpscr;
765 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
767 reg.id = KVM_REG_PPC_FPSCR;
768 reg.addr = (uintptr_t)&fpscr;
769 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
770 if (ret < 0) {
771 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
772 return ret;
773 } else {
774 env->fpscr = fpscr;
777 for (i = 0; i < 32; i++) {
778 uint64_t vsr[2];
780 reg.addr = (uintptr_t) &vsr;
781 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
783 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
784 if (ret < 0) {
785 DPRINTF("Unable to get %s%d from KVM: %s\n",
786 vsx ? "VSR" : "FPR", i, strerror(errno));
787 return ret;
788 } else {
789 #ifdef HOST_WORDS_BIGENDIAN
790 env->fpr[i] = vsr[0];
791 if (vsx) {
792 env->vsr[i] = vsr[1];
794 #else
795 env->fpr[i] = vsr[1];
796 if (vsx) {
797 env->vsr[i] = vsr[0];
799 #endif
804 if (env->insns_flags & PPC_ALTIVEC) {
805 reg.id = KVM_REG_PPC_VSCR;
806 reg.addr = (uintptr_t)&env->vscr;
807 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
808 if (ret < 0) {
809 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
810 return ret;
813 for (i = 0; i < 32; i++) {
814 reg.id = KVM_REG_PPC_VR(i);
815 reg.addr = (uintptr_t)&env->avr[i];
816 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
817 if (ret < 0) {
818 DPRINTF("Unable to get VR%d from KVM: %s\n",
819 i, strerror(errno));
820 return ret;
825 return 0;
828 #if defined(TARGET_PPC64)
829 static int kvm_get_vpa(CPUState *cs)
831 PowerPCCPU *cpu = POWERPC_CPU(cs);
832 CPUPPCState *env = &cpu->env;
833 struct kvm_one_reg reg;
834 int ret;
836 reg.id = KVM_REG_PPC_VPA_ADDR;
837 reg.addr = (uintptr_t)&env->vpa_addr;
838 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
839 if (ret < 0) {
840 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
841 return ret;
844 assert((uintptr_t)&env->slb_shadow_size
845 == ((uintptr_t)&env->slb_shadow_addr + 8));
846 reg.id = KVM_REG_PPC_VPA_SLB;
847 reg.addr = (uintptr_t)&env->slb_shadow_addr;
848 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
849 if (ret < 0) {
850 DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
851 strerror(errno));
852 return ret;
855 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
856 reg.id = KVM_REG_PPC_VPA_DTL;
857 reg.addr = (uintptr_t)&env->dtl_addr;
858 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
859 if (ret < 0) {
860 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
861 strerror(errno));
862 return ret;
865 return 0;
868 static int kvm_put_vpa(CPUState *cs)
870 PowerPCCPU *cpu = POWERPC_CPU(cs);
871 CPUPPCState *env = &cpu->env;
872 struct kvm_one_reg reg;
873 int ret;
875 /* SLB shadow or DTL can't be registered unless a master VPA is
876 * registered. That means when restoring state, if a VPA *is*
877 * registered, we need to set that up first. If not, we need to
878 * deregister the others before deregistering the master VPA */
879 assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
881 if (env->vpa_addr) {
882 reg.id = KVM_REG_PPC_VPA_ADDR;
883 reg.addr = (uintptr_t)&env->vpa_addr;
884 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
885 if (ret < 0) {
886 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
887 return ret;
891 assert((uintptr_t)&env->slb_shadow_size
892 == ((uintptr_t)&env->slb_shadow_addr + 8));
893 reg.id = KVM_REG_PPC_VPA_SLB;
894 reg.addr = (uintptr_t)&env->slb_shadow_addr;
895 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
896 if (ret < 0) {
897 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
898 return ret;
901 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
902 reg.id = KVM_REG_PPC_VPA_DTL;
903 reg.addr = (uintptr_t)&env->dtl_addr;
904 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
905 if (ret < 0) {
906 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
907 strerror(errno));
908 return ret;
911 if (!env->vpa_addr) {
912 reg.id = KVM_REG_PPC_VPA_ADDR;
913 reg.addr = (uintptr_t)&env->vpa_addr;
914 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
915 if (ret < 0) {
916 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
917 return ret;
921 return 0;
923 #endif /* TARGET_PPC64 */
925 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
927 CPUPPCState *env = &cpu->env;
928 struct kvm_sregs sregs;
929 int i;
931 sregs.pvr = env->spr[SPR_PVR];
933 sregs.u.s.sdr1 = env->spr[SPR_SDR1];
935 /* Sync SLB */
936 #ifdef TARGET_PPC64
937 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
938 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
939 if (env->slb[i].esid & SLB_ESID_V) {
940 sregs.u.s.ppc64.slb[i].slbe |= i;
942 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
944 #endif
946 /* Sync SRs */
947 for (i = 0; i < 16; i++) {
948 sregs.u.s.ppc32.sr[i] = env->sr[i];
951 /* Sync BATs */
952 for (i = 0; i < 8; i++) {
953 /* Beware. We have to swap upper and lower bits here */
954 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
955 | env->DBAT[1][i];
956 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
957 | env->IBAT[1][i];
960 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
963 int kvm_arch_put_registers(CPUState *cs, int level)
965 PowerPCCPU *cpu = POWERPC_CPU(cs);
966 CPUPPCState *env = &cpu->env;
967 struct kvm_regs regs;
968 int ret;
969 int i;
971 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
972 if (ret < 0) {
973 return ret;
976 regs.ctr = env->ctr;
977 regs.lr = env->lr;
978 regs.xer = cpu_read_xer(env);
979 regs.msr = env->msr;
980 regs.pc = env->nip;
982 regs.srr0 = env->spr[SPR_SRR0];
983 regs.srr1 = env->spr[SPR_SRR1];
985 regs.sprg0 = env->spr[SPR_SPRG0];
986 regs.sprg1 = env->spr[SPR_SPRG1];
987 regs.sprg2 = env->spr[SPR_SPRG2];
988 regs.sprg3 = env->spr[SPR_SPRG3];
989 regs.sprg4 = env->spr[SPR_SPRG4];
990 regs.sprg5 = env->spr[SPR_SPRG5];
991 regs.sprg6 = env->spr[SPR_SPRG6];
992 regs.sprg7 = env->spr[SPR_SPRG7];
994 regs.pid = env->spr[SPR_BOOKE_PID];
996 for (i = 0;i < 32; i++)
997 regs.gpr[i] = env->gpr[i];
999 regs.cr = 0;
1000 for (i = 0; i < 8; i++) {
1001 regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
1004 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
1005 if (ret < 0)
1006 return ret;
1008 kvm_put_fp(cs);
1010 if (env->tlb_dirty) {
1011 kvm_sw_tlb_put(cpu);
1012 env->tlb_dirty = false;
1015 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
1016 ret = kvmppc_put_books_sregs(cpu);
1017 if (ret < 0) {
1018 return ret;
1022 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
1023 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1026 if (cap_one_reg) {
1027 int i;
1029 /* We deliberately ignore errors here, for kernels which have
1030 * the ONE_REG calls, but don't support the specific
1031 * registers, there's a reasonable chance things will still
1032 * work, at least until we try to migrate. */
1033 for (i = 0; i < 1024; i++) {
1034 uint64_t id = env->spr_cb[i].one_reg_id;
1036 if (id != 0) {
1037 kvm_put_one_spr(cs, id, i);
1041 #ifdef TARGET_PPC64
1042 if (msr_ts) {
1043 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1044 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1046 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1047 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1049 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1050 kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1051 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1052 kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1053 kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1054 kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1055 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1056 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1057 kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1058 kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1061 if (cap_papr) {
1062 if (kvm_put_vpa(cs) < 0) {
1063 DPRINTF("Warning: Unable to set VPA information to KVM\n");
1067 kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1068 #endif /* TARGET_PPC64 */
1071 return ret;
1074 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1076 env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1079 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1081 CPUPPCState *env = &cpu->env;
1082 struct kvm_sregs sregs;
1083 int ret;
1085 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1086 if (ret < 0) {
1087 return ret;
1090 if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1091 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1092 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1093 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1094 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1095 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1096 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1097 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1098 env->spr[SPR_DECR] = sregs.u.e.dec;
1099 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1100 env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1101 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1104 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1105 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1106 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1107 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1108 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1109 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1112 if (sregs.u.e.features & KVM_SREGS_E_64) {
1113 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1116 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1117 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1120 if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1121 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1122 kvm_sync_excp(env, POWERPC_EXCP_CRITICAL, SPR_BOOKE_IVOR0);
1123 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1124 kvm_sync_excp(env, POWERPC_EXCP_MCHECK, SPR_BOOKE_IVOR1);
1125 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1126 kvm_sync_excp(env, POWERPC_EXCP_DSI, SPR_BOOKE_IVOR2);
1127 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1128 kvm_sync_excp(env, POWERPC_EXCP_ISI, SPR_BOOKE_IVOR3);
1129 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1130 kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL, SPR_BOOKE_IVOR4);
1131 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1132 kvm_sync_excp(env, POWERPC_EXCP_ALIGN, SPR_BOOKE_IVOR5);
1133 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1134 kvm_sync_excp(env, POWERPC_EXCP_PROGRAM, SPR_BOOKE_IVOR6);
1135 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1136 kvm_sync_excp(env, POWERPC_EXCP_FPU, SPR_BOOKE_IVOR7);
1137 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1138 kvm_sync_excp(env, POWERPC_EXCP_SYSCALL, SPR_BOOKE_IVOR8);
1139 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1140 kvm_sync_excp(env, POWERPC_EXCP_APU, SPR_BOOKE_IVOR9);
1141 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1142 kvm_sync_excp(env, POWERPC_EXCP_DECR, SPR_BOOKE_IVOR10);
1143 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1144 kvm_sync_excp(env, POWERPC_EXCP_FIT, SPR_BOOKE_IVOR11);
1145 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1146 kvm_sync_excp(env, POWERPC_EXCP_WDT, SPR_BOOKE_IVOR12);
1147 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1148 kvm_sync_excp(env, POWERPC_EXCP_DTLB, SPR_BOOKE_IVOR13);
1149 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1150 kvm_sync_excp(env, POWERPC_EXCP_ITLB, SPR_BOOKE_IVOR14);
1151 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1152 kvm_sync_excp(env, POWERPC_EXCP_DEBUG, SPR_BOOKE_IVOR15);
1154 if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1155 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1156 kvm_sync_excp(env, POWERPC_EXCP_SPEU, SPR_BOOKE_IVOR32);
1157 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1158 kvm_sync_excp(env, POWERPC_EXCP_EFPDI, SPR_BOOKE_IVOR33);
1159 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1160 kvm_sync_excp(env, POWERPC_EXCP_EFPRI, SPR_BOOKE_IVOR34);
1163 if (sregs.u.e.features & KVM_SREGS_E_PM) {
1164 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1165 kvm_sync_excp(env, POWERPC_EXCP_EPERFM, SPR_BOOKE_IVOR35);
1168 if (sregs.u.e.features & KVM_SREGS_E_PC) {
1169 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1170 kvm_sync_excp(env, POWERPC_EXCP_DOORI, SPR_BOOKE_IVOR36);
1171 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1172 kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1176 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1177 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1178 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1179 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1180 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1181 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1182 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1183 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1184 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1185 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1186 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1189 if (sregs.u.e.features & KVM_SREGS_EXP) {
1190 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1193 if (sregs.u.e.features & KVM_SREGS_E_PD) {
1194 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1195 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1198 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1199 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1200 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1201 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1203 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1204 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1205 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1209 return 0;
1212 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1214 CPUPPCState *env = &cpu->env;
1215 struct kvm_sregs sregs;
1216 int ret;
1217 int i;
1219 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1220 if (ret < 0) {
1221 return ret;
1224 if (!env->external_htab) {
1225 ppc_store_sdr1(env, sregs.u.s.sdr1);
1228 /* Sync SLB */
1229 #ifdef TARGET_PPC64
1231 * The packed SLB array we get from KVM_GET_SREGS only contains
1232 * information about valid entries. So we flush our internal copy
1233 * to get rid of stale ones, then put all valid SLB entries back
1234 * in.
1236 memset(env->slb, 0, sizeof(env->slb));
1237 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1238 target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1239 target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1241 * Only restore valid entries
1243 if (rb & SLB_ESID_V) {
1244 ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1247 #endif
1249 /* Sync SRs */
1250 for (i = 0; i < 16; i++) {
1251 env->sr[i] = sregs.u.s.ppc32.sr[i];
1254 /* Sync BATs */
1255 for (i = 0; i < 8; i++) {
1256 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1257 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1258 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1259 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1262 return 0;
1265 int kvm_arch_get_registers(CPUState *cs)
1267 PowerPCCPU *cpu = POWERPC_CPU(cs);
1268 CPUPPCState *env = &cpu->env;
1269 struct kvm_regs regs;
1270 uint32_t cr;
1271 int i, ret;
1273 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1274 if (ret < 0)
1275 return ret;
1277 cr = regs.cr;
1278 for (i = 7; i >= 0; i--) {
1279 env->crf[i] = cr & 15;
1280 cr >>= 4;
1283 env->ctr = regs.ctr;
1284 env->lr = regs.lr;
1285 cpu_write_xer(env, regs.xer);
1286 env->msr = regs.msr;
1287 env->nip = regs.pc;
1289 env->spr[SPR_SRR0] = regs.srr0;
1290 env->spr[SPR_SRR1] = regs.srr1;
1292 env->spr[SPR_SPRG0] = regs.sprg0;
1293 env->spr[SPR_SPRG1] = regs.sprg1;
1294 env->spr[SPR_SPRG2] = regs.sprg2;
1295 env->spr[SPR_SPRG3] = regs.sprg3;
1296 env->spr[SPR_SPRG4] = regs.sprg4;
1297 env->spr[SPR_SPRG5] = regs.sprg5;
1298 env->spr[SPR_SPRG6] = regs.sprg6;
1299 env->spr[SPR_SPRG7] = regs.sprg7;
1301 env->spr[SPR_BOOKE_PID] = regs.pid;
1303 for (i = 0;i < 32; i++)
1304 env->gpr[i] = regs.gpr[i];
1306 kvm_get_fp(cs);
1308 if (cap_booke_sregs) {
1309 ret = kvmppc_get_booke_sregs(cpu);
1310 if (ret < 0) {
1311 return ret;
1315 if (cap_segstate) {
1316 ret = kvmppc_get_books_sregs(cpu);
1317 if (ret < 0) {
1318 return ret;
1322 if (cap_hior) {
1323 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1326 if (cap_one_reg) {
1327 int i;
1329 /* We deliberately ignore errors here, for kernels which have
1330 * the ONE_REG calls, but don't support the specific
1331 * registers, there's a reasonable chance things will still
1332 * work, at least until we try to migrate. */
1333 for (i = 0; i < 1024; i++) {
1334 uint64_t id = env->spr_cb[i].one_reg_id;
1336 if (id != 0) {
1337 kvm_get_one_spr(cs, id, i);
1341 #ifdef TARGET_PPC64
1342 if (msr_ts) {
1343 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1344 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1346 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1347 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1349 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1350 kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1351 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1352 kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1353 kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1354 kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1355 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1356 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1357 kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1358 kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1361 if (cap_papr) {
1362 if (kvm_get_vpa(cs) < 0) {
1363 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1367 kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1368 #endif
1371 return 0;
1374 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1376 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1378 if (irq != PPC_INTERRUPT_EXT) {
1379 return 0;
1382 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1383 return 0;
1386 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1388 return 0;
1391 #if defined(TARGET_PPCEMB)
1392 #define PPC_INPUT_INT PPC40x_INPUT_INT
1393 #elif defined(TARGET_PPC64)
1394 #define PPC_INPUT_INT PPC970_INPUT_INT
1395 #else
1396 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1397 #endif
1399 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1401 PowerPCCPU *cpu = POWERPC_CPU(cs);
1402 CPUPPCState *env = &cpu->env;
1403 int r;
1404 unsigned irq;
1406 qemu_mutex_lock_iothread();
1408 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1409 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1410 if (!cap_interrupt_level &&
1411 run->ready_for_interrupt_injection &&
1412 (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1413 (env->irq_input_state & (1<<PPC_INPUT_INT)))
1415 /* For now KVM disregards the 'irq' argument. However, in the
1416 * future KVM could cache it in-kernel to avoid a heavyweight exit
1417 * when reading the UIC.
1419 irq = KVM_INTERRUPT_SET;
1421 DPRINTF("injected interrupt %d\n", irq);
1422 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1423 if (r < 0) {
1424 printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1427 /* Always wake up soon in case the interrupt was level based */
1428 timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1429 (NANOSECONDS_PER_SECOND / 50));
1432 /* We don't know if there are more interrupts pending after this. However,
1433 * the guest will return to userspace in the course of handling this one
1434 * anyways, so we will get a chance to deliver the rest. */
1436 qemu_mutex_unlock_iothread();
1439 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1441 return MEMTXATTRS_UNSPECIFIED;
1444 int kvm_arch_process_async_events(CPUState *cs)
1446 return cs->halted;
1449 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1451 CPUState *cs = CPU(cpu);
1452 CPUPPCState *env = &cpu->env;
1454 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1455 cs->halted = 1;
1456 cs->exception_index = EXCP_HLT;
1459 return 0;
1462 /* map dcr access to existing qemu dcr emulation */
1463 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1465 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1466 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1468 return 0;
1471 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1473 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1474 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1476 return 0;
1479 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1481 /* Mixed endian case is not handled */
1482 uint32_t sc = debug_inst_opcode;
1484 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1485 sizeof(sc), 0) ||
1486 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1487 return -EINVAL;
1490 return 0;
1493 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1495 uint32_t sc;
1497 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1498 sc != debug_inst_opcode ||
1499 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1500 sizeof(sc), 1)) {
1501 return -EINVAL;
1504 return 0;
1507 static int find_hw_breakpoint(target_ulong addr, int type)
1509 int n;
1511 assert((nb_hw_breakpoint + nb_hw_watchpoint)
1512 <= ARRAY_SIZE(hw_debug_points));
1514 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1515 if (hw_debug_points[n].addr == addr &&
1516 hw_debug_points[n].type == type) {
1517 return n;
1521 return -1;
1524 static int find_hw_watchpoint(target_ulong addr, int *flag)
1526 int n;
1528 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1529 if (n >= 0) {
1530 *flag = BP_MEM_ACCESS;
1531 return n;
1534 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1535 if (n >= 0) {
1536 *flag = BP_MEM_WRITE;
1537 return n;
1540 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1541 if (n >= 0) {
1542 *flag = BP_MEM_READ;
1543 return n;
1546 return -1;
1549 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1550 target_ulong len, int type)
1552 if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1553 return -ENOBUFS;
1556 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1557 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1559 switch (type) {
1560 case GDB_BREAKPOINT_HW:
1561 if (nb_hw_breakpoint >= max_hw_breakpoint) {
1562 return -ENOBUFS;
1565 if (find_hw_breakpoint(addr, type) >= 0) {
1566 return -EEXIST;
1569 nb_hw_breakpoint++;
1570 break;
1572 case GDB_WATCHPOINT_WRITE:
1573 case GDB_WATCHPOINT_READ:
1574 case GDB_WATCHPOINT_ACCESS:
1575 if (nb_hw_watchpoint >= max_hw_watchpoint) {
1576 return -ENOBUFS;
1579 if (find_hw_breakpoint(addr, type) >= 0) {
1580 return -EEXIST;
1583 nb_hw_watchpoint++;
1584 break;
1586 default:
1587 return -ENOSYS;
1590 return 0;
1593 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1594 target_ulong len, int type)
1596 int n;
1598 n = find_hw_breakpoint(addr, type);
1599 if (n < 0) {
1600 return -ENOENT;
1603 switch (type) {
1604 case GDB_BREAKPOINT_HW:
1605 nb_hw_breakpoint--;
1606 break;
1608 case GDB_WATCHPOINT_WRITE:
1609 case GDB_WATCHPOINT_READ:
1610 case GDB_WATCHPOINT_ACCESS:
1611 nb_hw_watchpoint--;
1612 break;
1614 default:
1615 return -ENOSYS;
1617 hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1619 return 0;
1622 void kvm_arch_remove_all_hw_breakpoints(void)
1624 nb_hw_breakpoint = nb_hw_watchpoint = 0;
1627 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1629 int n;
1631 /* Software Breakpoint updates */
1632 if (kvm_sw_breakpoints_active(cs)) {
1633 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1636 assert((nb_hw_breakpoint + nb_hw_watchpoint)
1637 <= ARRAY_SIZE(hw_debug_points));
1638 assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1640 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1641 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1642 memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1643 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1644 switch (hw_debug_points[n].type) {
1645 case GDB_BREAKPOINT_HW:
1646 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1647 break;
1648 case GDB_WATCHPOINT_WRITE:
1649 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1650 break;
1651 case GDB_WATCHPOINT_READ:
1652 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1653 break;
1654 case GDB_WATCHPOINT_ACCESS:
1655 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1656 KVMPPC_DEBUG_WATCH_READ;
1657 break;
1658 default:
1659 cpu_abort(cs, "Unsupported breakpoint type\n");
1661 dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1666 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1668 CPUState *cs = CPU(cpu);
1669 CPUPPCState *env = &cpu->env;
1670 struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1671 int handle = 0;
1672 int n;
1673 int flag = 0;
1675 if (cs->singlestep_enabled) {
1676 handle = 1;
1677 } else if (arch_info->status) {
1678 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1679 if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1680 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1681 if (n >= 0) {
1682 handle = 1;
1684 } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1685 KVMPPC_DEBUG_WATCH_WRITE)) {
1686 n = find_hw_watchpoint(arch_info->address, &flag);
1687 if (n >= 0) {
1688 handle = 1;
1689 cs->watchpoint_hit = &hw_watchpoint;
1690 hw_watchpoint.vaddr = hw_debug_points[n].addr;
1691 hw_watchpoint.flags = flag;
1695 } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1696 handle = 1;
1697 } else {
1698 /* QEMU is not able to handle debug exception, so inject
1699 * program exception to guest;
1700 * Yes program exception NOT debug exception !!
1701 * When QEMU is using debug resources then debug exception must
1702 * be always set. To achieve this we set MSR_DE and also set
1703 * MSRP_DEP so guest cannot change MSR_DE.
1704 * When emulating debug resource for guest we want guest
1705 * to control MSR_DE (enable/disable debug interrupt on need).
1706 * Supporting both configurations are NOT possible.
1707 * So the result is that we cannot share debug resources
1708 * between QEMU and Guest on BOOKE architecture.
1709 * In the current design QEMU gets the priority over guest,
1710 * this means that if QEMU is using debug resources then guest
1711 * cannot use them;
1712 * For software breakpoint QEMU uses a privileged instruction;
1713 * So there cannot be any reason that we are here for guest
1714 * set debug exception, only possibility is guest executed a
1715 * privileged / illegal instruction and that's why we are
1716 * injecting a program interrupt.
1719 cpu_synchronize_state(cs);
1720 /* env->nip is PC, so increment this by 4 to use
1721 * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1723 env->nip += 4;
1724 cs->exception_index = POWERPC_EXCP_PROGRAM;
1725 env->error_code = POWERPC_EXCP_INVAL;
1726 ppc_cpu_do_interrupt(cs);
1729 return handle;
1732 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1734 PowerPCCPU *cpu = POWERPC_CPU(cs);
1735 CPUPPCState *env = &cpu->env;
1736 int ret;
1738 qemu_mutex_lock_iothread();
1740 switch (run->exit_reason) {
1741 case KVM_EXIT_DCR:
1742 if (run->dcr.is_write) {
1743 DPRINTF("handle dcr write\n");
1744 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1745 } else {
1746 DPRINTF("handle dcr read\n");
1747 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1749 break;
1750 case KVM_EXIT_HLT:
1751 DPRINTF("handle halt\n");
1752 ret = kvmppc_handle_halt(cpu);
1753 break;
1754 #if defined(TARGET_PPC64)
1755 case KVM_EXIT_PAPR_HCALL:
1756 DPRINTF("handle PAPR hypercall\n");
1757 run->papr_hcall.ret = spapr_hypercall(cpu,
1758 run->papr_hcall.nr,
1759 run->papr_hcall.args);
1760 ret = 0;
1761 break;
1762 #endif
1763 case KVM_EXIT_EPR:
1764 DPRINTF("handle epr\n");
1765 run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1766 ret = 0;
1767 break;
1768 case KVM_EXIT_WATCHDOG:
1769 DPRINTF("handle watchdog expiry\n");
1770 watchdog_perform_action();
1771 ret = 0;
1772 break;
1774 case KVM_EXIT_DEBUG:
1775 DPRINTF("handle debug exception\n");
1776 if (kvm_handle_debug(cpu, run)) {
1777 ret = EXCP_DEBUG;
1778 break;
1780 /* re-enter, this exception was guest-internal */
1781 ret = 0;
1782 break;
1784 default:
1785 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1786 ret = -1;
1787 break;
1790 qemu_mutex_unlock_iothread();
1791 return ret;
1794 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1796 CPUState *cs = CPU(cpu);
1797 uint32_t bits = tsr_bits;
1798 struct kvm_one_reg reg = {
1799 .id = KVM_REG_PPC_OR_TSR,
1800 .addr = (uintptr_t) &bits,
1803 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1806 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1809 CPUState *cs = CPU(cpu);
1810 uint32_t bits = tsr_bits;
1811 struct kvm_one_reg reg = {
1812 .id = KVM_REG_PPC_CLEAR_TSR,
1813 .addr = (uintptr_t) &bits,
1816 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1819 int kvmppc_set_tcr(PowerPCCPU *cpu)
1821 CPUState *cs = CPU(cpu);
1822 CPUPPCState *env = &cpu->env;
1823 uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1825 struct kvm_one_reg reg = {
1826 .id = KVM_REG_PPC_TCR,
1827 .addr = (uintptr_t) &tcr,
1830 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1833 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1835 CPUState *cs = CPU(cpu);
1836 int ret;
1838 if (!kvm_enabled()) {
1839 return -1;
1842 if (!cap_ppc_watchdog) {
1843 printf("warning: KVM does not support watchdog");
1844 return -1;
1847 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1848 if (ret < 0) {
1849 fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1850 __func__, strerror(-ret));
1851 return ret;
1854 return ret;
1857 static int read_cpuinfo(const char *field, char *value, int len)
1859 FILE *f;
1860 int ret = -1;
1861 int field_len = strlen(field);
1862 char line[512];
1864 f = fopen("/proc/cpuinfo", "r");
1865 if (!f) {
1866 return -1;
1869 do {
1870 if (!fgets(line, sizeof(line), f)) {
1871 break;
1873 if (!strncmp(line, field, field_len)) {
1874 pstrcpy(value, len, line);
1875 ret = 0;
1876 break;
1878 } while(*line);
1880 fclose(f);
1882 return ret;
1885 uint32_t kvmppc_get_tbfreq(void)
1887 char line[512];
1888 char *ns;
1889 uint32_t retval = NANOSECONDS_PER_SECOND;
1891 if (read_cpuinfo("timebase", line, sizeof(line))) {
1892 return retval;
1895 if (!(ns = strchr(line, ':'))) {
1896 return retval;
1899 ns++;
1901 return atoi(ns);
1904 bool kvmppc_get_host_serial(char **value)
1906 return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1907 NULL);
1910 bool kvmppc_get_host_model(char **value)
1912 return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1915 /* Try to find a device tree node for a CPU with clock-frequency property */
1916 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1918 struct dirent *dirp;
1919 DIR *dp;
1921 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1922 printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1923 return -1;
1926 buf[0] = '\0';
1927 while ((dirp = readdir(dp)) != NULL) {
1928 FILE *f;
1929 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1930 dirp->d_name);
1931 f = fopen(buf, "r");
1932 if (f) {
1933 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1934 fclose(f);
1935 break;
1937 buf[0] = '\0';
1939 closedir(dp);
1940 if (buf[0] == '\0') {
1941 printf("Unknown host!\n");
1942 return -1;
1945 return 0;
1948 static uint64_t kvmppc_read_int_dt(const char *filename)
1950 union {
1951 uint32_t v32;
1952 uint64_t v64;
1953 } u;
1954 FILE *f;
1955 int len;
1957 f = fopen(filename, "rb");
1958 if (!f) {
1959 return -1;
1962 len = fread(&u, 1, sizeof(u), f);
1963 fclose(f);
1964 switch (len) {
1965 case 4:
1966 /* property is a 32-bit quantity */
1967 return be32_to_cpu(u.v32);
1968 case 8:
1969 return be64_to_cpu(u.v64);
1972 return 0;
1975 /* Read a CPU node property from the host device tree that's a single
1976 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1977 * (can't find or open the property, or doesn't understand the
1978 * format) */
1979 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1981 char buf[PATH_MAX], *tmp;
1982 uint64_t val;
1984 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1985 return -1;
1988 tmp = g_strdup_printf("%s/%s", buf, propname);
1989 val = kvmppc_read_int_dt(tmp);
1990 g_free(tmp);
1992 return val;
1995 uint64_t kvmppc_get_clockfreq(void)
1997 return kvmppc_read_int_cpu_dt("clock-frequency");
2000 uint32_t kvmppc_get_vmx(void)
2002 return kvmppc_read_int_cpu_dt("ibm,vmx");
2005 uint32_t kvmppc_get_dfp(void)
2007 return kvmppc_read_int_cpu_dt("ibm,dfp");
2010 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
2012 PowerPCCPU *cpu = ppc_env_get_cpu(env);
2013 CPUState *cs = CPU(cpu);
2015 if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
2016 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
2017 return 0;
2020 return 1;
2023 int kvmppc_get_hasidle(CPUPPCState *env)
2025 struct kvm_ppc_pvinfo pvinfo;
2027 if (!kvmppc_get_pvinfo(env, &pvinfo) &&
2028 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
2029 return 1;
2032 return 0;
2035 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
2037 uint32_t *hc = (uint32_t*)buf;
2038 struct kvm_ppc_pvinfo pvinfo;
2040 if (!kvmppc_get_pvinfo(env, &pvinfo)) {
2041 memcpy(buf, pvinfo.hcall, buf_len);
2042 return 0;
2046 * Fallback to always fail hypercalls regardless of endianness:
2048 * tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
2049 * li r3, -1
2050 * b .+8 (becomes nop in wrong endian)
2051 * bswap32(li r3, -1)
2054 hc[0] = cpu_to_be32(0x08000048);
2055 hc[1] = cpu_to_be32(0x3860ffff);
2056 hc[2] = cpu_to_be32(0x48000008);
2057 hc[3] = cpu_to_be32(bswap32(0x3860ffff));
2059 return 1;
2062 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
2064 return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2067 void kvmppc_enable_logical_ci_hcalls(void)
2070 * FIXME: it would be nice if we could detect the cases where
2071 * we're using a device which requires the in kernel
2072 * implementation of these hcalls, but the kernel lacks them and
2073 * produce a warning.
2075 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2076 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2079 void kvmppc_enable_set_mode_hcall(void)
2081 kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2084 void kvmppc_enable_clear_ref_mod_hcalls(void)
2086 kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
2087 kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
2090 void kvmppc_set_papr(PowerPCCPU *cpu)
2092 CPUState *cs = CPU(cpu);
2093 int ret;
2095 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2096 if (ret) {
2097 error_report("This vCPU type or KVM version does not support PAPR");
2098 exit(1);
2101 /* Update the capability flag so we sync the right information
2102 * with kvm */
2103 cap_papr = 1;
2106 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t cpu_version)
2108 return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &cpu_version);
2111 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2113 CPUState *cs = CPU(cpu);
2114 int ret;
2116 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2117 if (ret && mpic_proxy) {
2118 error_report("This KVM version does not support EPR");
2119 exit(1);
2123 int kvmppc_smt_threads(void)
2125 return cap_ppc_smt ? cap_ppc_smt : 1;
2128 #ifdef TARGET_PPC64
2129 off_t kvmppc_alloc_rma(void **rma)
2131 off_t size;
2132 int fd;
2133 struct kvm_allocate_rma ret;
2135 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2136 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2137 * not necessary on this hardware
2138 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2140 * FIXME: We should allow the user to force contiguous RMA
2141 * allocation in the cap_ppc_rma==1 case.
2143 if (cap_ppc_rma < 2) {
2144 return 0;
2147 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
2148 if (fd < 0) {
2149 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2150 strerror(errno));
2151 return -1;
2154 size = MIN(ret.rma_size, 256ul << 20);
2156 *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2157 if (*rma == MAP_FAILED) {
2158 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
2159 return -1;
2162 return size;
2165 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2167 struct kvm_ppc_smmu_info info;
2168 long rampagesize, best_page_shift;
2169 int i;
2171 if (cap_ppc_rma >= 2) {
2172 return current_size;
2175 /* Find the largest hardware supported page size that's less than
2176 * or equal to the (logical) backing page size of guest RAM */
2177 kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2178 rampagesize = getrampagesize();
2179 best_page_shift = 0;
2181 for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2182 struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2184 if (!sps->page_shift) {
2185 continue;
2188 if ((sps->page_shift > best_page_shift)
2189 && ((1UL << sps->page_shift) <= rampagesize)) {
2190 best_page_shift = sps->page_shift;
2194 return MIN(current_size,
2195 1ULL << (best_page_shift + hash_shift - 7));
2197 #endif
2199 bool kvmppc_spapr_use_multitce(void)
2201 return cap_spapr_multitce;
2204 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
2205 bool need_vfio)
2207 struct kvm_create_spapr_tce args = {
2208 .liobn = liobn,
2209 .window_size = window_size,
2211 long len;
2212 int fd;
2213 void *table;
2215 /* Must set fd to -1 so we don't try to munmap when called for
2216 * destroying the table, which the upper layers -will- do
2218 *pfd = -1;
2219 if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2220 return NULL;
2223 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2224 if (fd < 0) {
2225 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2226 liobn);
2227 return NULL;
2230 len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
2231 /* FIXME: round this up to page size */
2233 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2234 if (table == MAP_FAILED) {
2235 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2236 liobn);
2237 close(fd);
2238 return NULL;
2241 *pfd = fd;
2242 return table;
2245 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2247 long len;
2249 if (fd < 0) {
2250 return -1;
2253 len = nb_table * sizeof(uint64_t);
2254 if ((munmap(table, len) < 0) ||
2255 (close(fd) < 0)) {
2256 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2257 strerror(errno));
2258 /* Leak the table */
2261 return 0;
2264 int kvmppc_reset_htab(int shift_hint)
2266 uint32_t shift = shift_hint;
2268 if (!kvm_enabled()) {
2269 /* Full emulation, tell caller to allocate htab itself */
2270 return 0;
2272 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2273 int ret;
2274 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2275 if (ret == -ENOTTY) {
2276 /* At least some versions of PR KVM advertise the
2277 * capability, but don't implement the ioctl(). Oops.
2278 * Return 0 so that we allocate the htab in qemu, as is
2279 * correct for PR. */
2280 return 0;
2281 } else if (ret < 0) {
2282 return ret;
2284 return shift;
2287 /* We have a kernel that predates the htab reset calls. For PR
2288 * KVM, we need to allocate the htab ourselves, for an HV KVM of
2289 * this era, it has allocated a 16MB fixed size hash table already. */
2290 if (kvmppc_is_pr(kvm_state)) {
2291 /* PR - tell caller to allocate htab */
2292 return 0;
2293 } else {
2294 /* HV - assume 16MB kernel allocated htab */
2295 return 24;
2299 static inline uint32_t mfpvr(void)
2301 uint32_t pvr;
2303 asm ("mfpvr %0"
2304 : "=r"(pvr));
2305 return pvr;
2308 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2310 if (on) {
2311 *word |= flags;
2312 } else {
2313 *word &= ~flags;
2317 static void kvmppc_host_cpu_initfn(Object *obj)
2319 assert(kvm_enabled());
2322 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2324 DeviceClass *dc = DEVICE_CLASS(oc);
2325 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2326 uint32_t vmx = kvmppc_get_vmx();
2327 uint32_t dfp = kvmppc_get_dfp();
2328 uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2329 uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2331 /* Now fix up the class with information we can query from the host */
2332 pcc->pvr = mfpvr();
2334 if (vmx != -1) {
2335 /* Only override when we know what the host supports */
2336 alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
2337 alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
2339 if (dfp != -1) {
2340 /* Only override when we know what the host supports */
2341 alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
2344 if (dcache_size != -1) {
2345 pcc->l1_dcache_size = dcache_size;
2348 if (icache_size != -1) {
2349 pcc->l1_icache_size = icache_size;
2352 /* Reason: kvmppc_host_cpu_initfn() dies when !kvm_enabled() */
2353 dc->cannot_destroy_with_object_finalize_yet = true;
2356 bool kvmppc_has_cap_epr(void)
2358 return cap_epr;
2361 bool kvmppc_has_cap_htab_fd(void)
2363 return cap_htab_fd;
2366 bool kvmppc_has_cap_fixup_hcalls(void)
2368 return cap_fixup_hcalls;
2371 bool kvmppc_has_cap_htm(void)
2373 return cap_htm;
2376 static PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc)
2378 ObjectClass *oc = OBJECT_CLASS(pcc);
2380 while (oc && !object_class_is_abstract(oc)) {
2381 oc = object_class_get_parent(oc);
2383 assert(oc);
2385 return POWERPC_CPU_CLASS(oc);
2388 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2390 uint32_t host_pvr = mfpvr();
2391 PowerPCCPUClass *pvr_pcc;
2393 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2394 if (pvr_pcc == NULL) {
2395 pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2398 return pvr_pcc;
2401 static int kvm_ppc_register_host_cpu_type(void)
2403 TypeInfo type_info = {
2404 .name = TYPE_HOST_POWERPC_CPU,
2405 .instance_init = kvmppc_host_cpu_initfn,
2406 .class_init = kvmppc_host_cpu_class_init,
2408 PowerPCCPUClass *pvr_pcc;
2409 DeviceClass *dc;
2411 pvr_pcc = kvm_ppc_get_host_cpu_class();
2412 if (pvr_pcc == NULL) {
2413 return -1;
2415 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2416 type_register(&type_info);
2418 /* Register generic family CPU class for a family */
2419 pvr_pcc = ppc_cpu_get_family_class(pvr_pcc);
2420 dc = DEVICE_CLASS(pvr_pcc);
2421 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2422 type_info.name = g_strdup_printf("%s-"TYPE_POWERPC_CPU, dc->desc);
2423 type_register(&type_info);
2425 #if defined(TARGET_PPC64)
2426 type_info.name = g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE, "host");
2427 type_info.parent = TYPE_SPAPR_CPU_CORE,
2428 type_info.instance_size = sizeof(sPAPRCPUCore);
2429 type_info.instance_init = NULL;
2430 type_info.class_init = spapr_cpu_core_class_init;
2431 type_info.class_data = (void *) "host";
2432 type_register(&type_info);
2433 g_free((void *)type_info.name);
2435 /* Register generic spapr CPU family class for current host CPU type */
2436 type_info.name = g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE, dc->desc);
2437 type_info.class_data = (void *) dc->desc;
2438 type_register(&type_info);
2439 g_free((void *)type_info.name);
2440 #endif
2442 return 0;
2445 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2447 struct kvm_rtas_token_args args = {
2448 .token = token,
2451 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2452 return -ENOENT;
2455 strncpy(args.name, function, sizeof(args.name));
2457 return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2460 int kvmppc_get_htab_fd(bool write)
2462 struct kvm_get_htab_fd s = {
2463 .flags = write ? KVM_GET_HTAB_WRITE : 0,
2464 .start_index = 0,
2467 if (!cap_htab_fd) {
2468 fprintf(stderr, "KVM version doesn't support saving the hash table\n");
2469 return -1;
2472 return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2475 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2477 int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2478 uint8_t buf[bufsize];
2479 ssize_t rc;
2481 do {
2482 rc = read(fd, buf, bufsize);
2483 if (rc < 0) {
2484 fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2485 strerror(errno));
2486 return rc;
2487 } else if (rc) {
2488 uint8_t *buffer = buf;
2489 ssize_t n = rc;
2490 while (n) {
2491 struct kvm_get_htab_header *head =
2492 (struct kvm_get_htab_header *) buffer;
2493 size_t chunksize = sizeof(*head) +
2494 HASH_PTE_SIZE_64 * head->n_valid;
2496 qemu_put_be32(f, head->index);
2497 qemu_put_be16(f, head->n_valid);
2498 qemu_put_be16(f, head->n_invalid);
2499 qemu_put_buffer(f, (void *)(head + 1),
2500 HASH_PTE_SIZE_64 * head->n_valid);
2502 buffer += chunksize;
2503 n -= chunksize;
2506 } while ((rc != 0)
2507 && ((max_ns < 0)
2508 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2510 return (rc == 0) ? 1 : 0;
2513 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2514 uint16_t n_valid, uint16_t n_invalid)
2516 struct kvm_get_htab_header *buf;
2517 size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2518 ssize_t rc;
2520 buf = alloca(chunksize);
2521 buf->index = index;
2522 buf->n_valid = n_valid;
2523 buf->n_invalid = n_invalid;
2525 qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2527 rc = write(fd, buf, chunksize);
2528 if (rc < 0) {
2529 fprintf(stderr, "Error writing KVM hash table: %s\n",
2530 strerror(errno));
2531 return rc;
2533 if (rc != chunksize) {
2534 /* We should never get a short write on a single chunk */
2535 fprintf(stderr, "Short write, restoring KVM hash table\n");
2536 return -1;
2538 return 0;
2541 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2543 return true;
2546 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
2548 return 1;
2551 int kvm_arch_on_sigbus(int code, void *addr)
2553 return 1;
2556 void kvm_arch_init_irq_routing(KVMState *s)
2560 struct kvm_get_htab_buf {
2561 struct kvm_get_htab_header header;
2563 * We require one extra byte for read
2565 target_ulong hpte[(HPTES_PER_GROUP * 2) + 1];
2568 uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index)
2570 int htab_fd;
2571 struct kvm_get_htab_fd ghf;
2572 struct kvm_get_htab_buf *hpte_buf;
2574 ghf.flags = 0;
2575 ghf.start_index = pte_index;
2576 htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2577 if (htab_fd < 0) {
2578 goto error_out;
2581 hpte_buf = g_malloc0(sizeof(*hpte_buf));
2583 * Read the hpte group
2585 if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) {
2586 goto out_close;
2589 close(htab_fd);
2590 return (uint64_t)(uintptr_t) hpte_buf->hpte;
2592 out_close:
2593 g_free(hpte_buf);
2594 close(htab_fd);
2595 error_out:
2596 return 0;
2599 void kvmppc_hash64_free_pteg(uint64_t token)
2601 struct kvm_get_htab_buf *htab_buf;
2603 htab_buf = container_of((void *)(uintptr_t) token, struct kvm_get_htab_buf,
2604 hpte);
2605 g_free(htab_buf);
2606 return;
2609 void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
2610 target_ulong pte0, target_ulong pte1)
2612 int htab_fd;
2613 struct kvm_get_htab_fd ghf;
2614 struct kvm_get_htab_buf hpte_buf;
2616 ghf.flags = 0;
2617 ghf.start_index = 0; /* Ignored */
2618 htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2619 if (htab_fd < 0) {
2620 goto error_out;
2623 hpte_buf.header.n_valid = 1;
2624 hpte_buf.header.n_invalid = 0;
2625 hpte_buf.header.index = pte_index;
2626 hpte_buf.hpte[0] = pte0;
2627 hpte_buf.hpte[1] = pte1;
2629 * Write the hpte entry.
2630 * CAUTION: write() has the warn_unused_result attribute. Hence we
2631 * need to check the return value, even though we do nothing.
2633 if (write(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) {
2634 goto out_close;
2637 out_close:
2638 close(htab_fd);
2639 return;
2641 error_out:
2642 return;
2645 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2646 uint64_t address, uint32_t data, PCIDevice *dev)
2648 return 0;
2651 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2652 int vector, PCIDevice *dev)
2654 return 0;
2657 int kvm_arch_release_virq_post(int virq)
2659 return 0;
2662 int kvm_arch_msi_data_to_gsi(uint32_t data)
2664 return data & 0xffff;
2667 int kvmppc_enable_hwrng(void)
2669 if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2670 return -1;
2673 return kvmppc_enable_hcall(kvm_state, H_RANDOM);