ppc: Check the availability of transactional memory
[qemu/ar7.git] / target-ppc / kvm.c
blobe9a9fafb8845db39a2e0a7a4ebaed7c982ca1b8e
1 /*
2 * PowerPC implementation of KVM hooks
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
7 * Authors:
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
17 #include "qemu/osdep.h"
18 #include <dirent.h>
19 #include <sys/ioctl.h>
20 #include <sys/vfs.h>
22 #include <linux/kvm.h>
24 #include "qemu-common.h"
25 #include "qemu/error-report.h"
26 #include "cpu.h"
27 #include "qemu/timer.h"
28 #include "sysemu/sysemu.h"
29 #include "sysemu/kvm.h"
30 #include "sysemu/numa.h"
31 #include "kvm_ppc.h"
32 #include "sysemu/cpus.h"
33 #include "sysemu/device_tree.h"
34 #include "mmu-hash64.h"
36 #include "hw/sysbus.h"
37 #include "hw/ppc/spapr.h"
38 #include "hw/ppc/spapr_vio.h"
39 #include "hw/ppc/spapr_cpu_core.h"
40 #include "hw/ppc/ppc.h"
41 #include "sysemu/watchdog.h"
42 #include "trace.h"
43 #include "exec/gdbstub.h"
44 #include "exec/memattrs.h"
45 #include "sysemu/hostmem.h"
46 #include "qemu/cutils.h"
47 #if defined(TARGET_PPC64)
48 #include "hw/ppc/spapr_cpu_core.h"
49 #endif
51 //#define DEBUG_KVM
53 #ifdef DEBUG_KVM
54 #define DPRINTF(fmt, ...) \
55 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
56 #else
57 #define DPRINTF(fmt, ...) \
58 do { } while (0)
59 #endif
61 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
63 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
64 KVM_CAP_LAST_INFO
67 static int cap_interrupt_unset = false;
68 static int cap_interrupt_level = false;
69 static int cap_segstate;
70 static int cap_booke_sregs;
71 static int cap_ppc_smt;
72 static int cap_ppc_rma;
73 static int cap_spapr_tce;
74 static int cap_spapr_multitce;
75 static int cap_spapr_vfio;
76 static int cap_hior;
77 static int cap_one_reg;
78 static int cap_epr;
79 static int cap_ppc_watchdog;
80 static int cap_papr;
81 static int cap_htab_fd;
82 static int cap_fixup_hcalls;
83 static int cap_htm; /* Hardware transactional memory support */
85 static uint32_t debug_inst_opcode;
87 /* XXX We have a race condition where we actually have a level triggered
88 * interrupt, but the infrastructure can't expose that yet, so the guest
89 * takes but ignores it, goes to sleep and never gets notified that there's
90 * still an interrupt pending.
92 * As a quick workaround, let's just wake up again 20 ms after we injected
93 * an interrupt. That way we can assure that we're always reinjecting
94 * interrupts in case the guest swallowed them.
96 static QEMUTimer *idle_timer;
98 static void kvm_kick_cpu(void *opaque)
100 PowerPCCPU *cpu = opaque;
102 qemu_cpu_kick(CPU(cpu));
105 static int kvm_ppc_register_host_cpu_type(void);
107 int kvm_arch_init(MachineState *ms, KVMState *s)
109 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
110 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
111 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
112 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
113 cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
114 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
115 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
116 cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
117 cap_spapr_vfio = false;
118 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
119 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
120 cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
121 cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
122 /* Note: we don't set cap_papr here, because this capability is
123 * only activated after this by kvmppc_set_papr() */
124 cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
125 cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
126 cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
128 if (!cap_interrupt_level) {
129 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
130 "VM to stall at times!\n");
133 kvm_ppc_register_host_cpu_type();
135 return 0;
138 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
140 CPUPPCState *cenv = &cpu->env;
141 CPUState *cs = CPU(cpu);
142 struct kvm_sregs sregs;
143 int ret;
145 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
146 /* What we're really trying to say is "if we're on BookE, we use
147 the native PVR for now". This is the only sane way to check
148 it though, so we potentially confuse users that they can run
149 BookE guests on BookS. Let's hope nobody dares enough :) */
150 return 0;
151 } else {
152 if (!cap_segstate) {
153 fprintf(stderr, "kvm error: missing PVR setting capability\n");
154 return -ENOSYS;
158 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
159 if (ret) {
160 return ret;
163 sregs.pvr = cenv->spr[SPR_PVR];
164 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
167 /* Set up a shared TLB array with KVM */
168 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
170 CPUPPCState *env = &cpu->env;
171 CPUState *cs = CPU(cpu);
172 struct kvm_book3e_206_tlb_params params = {};
173 struct kvm_config_tlb cfg = {};
174 unsigned int entries = 0;
175 int ret, i;
177 if (!kvm_enabled() ||
178 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
179 return 0;
182 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
184 for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
185 params.tlb_sizes[i] = booke206_tlb_size(env, i);
186 params.tlb_ways[i] = booke206_tlb_ways(env, i);
187 entries += params.tlb_sizes[i];
190 assert(entries == env->nb_tlb);
191 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
193 env->tlb_dirty = true;
195 cfg.array = (uintptr_t)env->tlb.tlbm;
196 cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
197 cfg.params = (uintptr_t)&params;
198 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
200 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
201 if (ret < 0) {
202 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
203 __func__, strerror(-ret));
204 return ret;
207 env->kvm_sw_tlb = true;
208 return 0;
212 #if defined(TARGET_PPC64)
213 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
214 struct kvm_ppc_smmu_info *info)
216 CPUPPCState *env = &cpu->env;
217 CPUState *cs = CPU(cpu);
219 memset(info, 0, sizeof(*info));
221 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
222 * need to "guess" what the supported page sizes are.
224 * For that to work we make a few assumptions:
226 * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
227 * KVM which only supports 4K and 16M pages, but supports them
228 * regardless of the backing store characteritics. We also don't
229 * support 1T segments.
231 * This is safe as if HV KVM ever supports that capability or PR
232 * KVM grows supports for more page/segment sizes, those versions
233 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
234 * will not hit this fallback
236 * - Else we are running HV KVM. This means we only support page
237 * sizes that fit in the backing store. Additionally we only
238 * advertize 64K pages if the processor is ARCH 2.06 and we assume
239 * P7 encodings for the SLB and hash table. Here too, we assume
240 * support for any newer processor will mean a kernel that
241 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
242 * this fallback.
244 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
245 /* No flags */
246 info->flags = 0;
247 info->slb_size = 64;
249 /* Standard 4k base page size segment */
250 info->sps[0].page_shift = 12;
251 info->sps[0].slb_enc = 0;
252 info->sps[0].enc[0].page_shift = 12;
253 info->sps[0].enc[0].pte_enc = 0;
255 /* Standard 16M large page size segment */
256 info->sps[1].page_shift = 24;
257 info->sps[1].slb_enc = SLB_VSID_L;
258 info->sps[1].enc[0].page_shift = 24;
259 info->sps[1].enc[0].pte_enc = 0;
260 } else {
261 int i = 0;
263 /* HV KVM has backing store size restrictions */
264 info->flags = KVM_PPC_PAGE_SIZES_REAL;
266 if (env->mmu_model & POWERPC_MMU_1TSEG) {
267 info->flags |= KVM_PPC_1T_SEGMENTS;
270 if (env->mmu_model == POWERPC_MMU_2_06 ||
271 env->mmu_model == POWERPC_MMU_2_07) {
272 info->slb_size = 32;
273 } else {
274 info->slb_size = 64;
277 /* Standard 4k base page size segment */
278 info->sps[i].page_shift = 12;
279 info->sps[i].slb_enc = 0;
280 info->sps[i].enc[0].page_shift = 12;
281 info->sps[i].enc[0].pte_enc = 0;
282 i++;
284 /* 64K on MMU 2.06 and later */
285 if (env->mmu_model == POWERPC_MMU_2_06 ||
286 env->mmu_model == POWERPC_MMU_2_07) {
287 info->sps[i].page_shift = 16;
288 info->sps[i].slb_enc = 0x110;
289 info->sps[i].enc[0].page_shift = 16;
290 info->sps[i].enc[0].pte_enc = 1;
291 i++;
294 /* Standard 16M large page size segment */
295 info->sps[i].page_shift = 24;
296 info->sps[i].slb_enc = SLB_VSID_L;
297 info->sps[i].enc[0].page_shift = 24;
298 info->sps[i].enc[0].pte_enc = 0;
302 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
304 CPUState *cs = CPU(cpu);
305 int ret;
307 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
308 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
309 if (ret == 0) {
310 return;
314 kvm_get_fallback_smmu_info(cpu, info);
317 static long gethugepagesize(const char *mem_path)
319 struct statfs fs;
320 int ret;
322 do {
323 ret = statfs(mem_path, &fs);
324 } while (ret != 0 && errno == EINTR);
326 if (ret != 0) {
327 fprintf(stderr, "Couldn't statfs() memory path: %s\n",
328 strerror(errno));
329 exit(1);
332 #define HUGETLBFS_MAGIC 0x958458f6
334 if (fs.f_type != HUGETLBFS_MAGIC) {
335 /* Explicit mempath, but it's ordinary pages */
336 return getpagesize();
339 /* It's hugepage, return the huge page size */
340 return fs.f_bsize;
344 * FIXME TOCTTOU: this iterates over memory backends' mem-path, which
345 * may or may not name the same files / on the same filesystem now as
346 * when we actually open and map them. Iterate over the file
347 * descriptors instead, and use qemu_fd_getpagesize().
349 static int find_max_supported_pagesize(Object *obj, void *opaque)
351 char *mem_path;
352 long *hpsize_min = opaque;
354 if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) {
355 mem_path = object_property_get_str(obj, "mem-path", NULL);
356 if (mem_path) {
357 long hpsize = gethugepagesize(mem_path);
358 if (hpsize < *hpsize_min) {
359 *hpsize_min = hpsize;
361 } else {
362 *hpsize_min = getpagesize();
366 return 0;
369 static long getrampagesize(void)
371 long hpsize = LONG_MAX;
372 long mainrampagesize;
373 Object *memdev_root;
375 if (mem_path) {
376 mainrampagesize = gethugepagesize(mem_path);
377 } else {
378 mainrampagesize = getpagesize();
381 /* it's possible we have memory-backend objects with
382 * hugepage-backed RAM. these may get mapped into system
383 * address space via -numa parameters or memory hotplug
384 * hooks. we want to take these into account, but we
385 * also want to make sure these supported hugepage
386 * sizes are applicable across the entire range of memory
387 * we may boot from, so we take the min across all
388 * backends, and assume normal pages in cases where a
389 * backend isn't backed by hugepages.
391 memdev_root = object_resolve_path("/objects", NULL);
392 if (memdev_root) {
393 object_child_foreach(memdev_root, find_max_supported_pagesize, &hpsize);
395 if (hpsize == LONG_MAX) {
396 /* No additional memory regions found ==> Report main RAM page size */
397 return mainrampagesize;
400 /* If NUMA is disabled or the NUMA nodes are not backed with a
401 * memory-backend, then there is at least one node using "normal" RAM,
402 * so if its page size is smaller we have got to report that size instead.
404 if (hpsize > mainrampagesize &&
405 (nb_numa_nodes == 0 || numa_info[0].node_memdev == NULL)) {
406 static bool warned;
407 if (!warned) {
408 error_report("Huge page support disabled (n/a for main memory).");
409 warned = true;
411 return mainrampagesize;
414 return hpsize;
417 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
419 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
420 return true;
423 return (1ul << shift) <= rampgsize;
426 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
428 static struct kvm_ppc_smmu_info smmu_info;
429 static bool has_smmu_info;
430 CPUPPCState *env = &cpu->env;
431 long rampagesize;
432 int iq, ik, jq, jk;
433 bool has_64k_pages = false;
435 /* We only handle page sizes for 64-bit server guests for now */
436 if (!(env->mmu_model & POWERPC_MMU_64)) {
437 return;
440 /* Collect MMU info from kernel if not already */
441 if (!has_smmu_info) {
442 kvm_get_smmu_info(cpu, &smmu_info);
443 has_smmu_info = true;
446 rampagesize = getrampagesize();
448 /* Convert to QEMU form */
449 memset(&env->sps, 0, sizeof(env->sps));
451 /* If we have HV KVM, we need to forbid CI large pages if our
452 * host page size is smaller than 64K.
454 if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
455 env->ci_large_pages = getpagesize() >= 0x10000;
459 * XXX This loop should be an entry wide AND of the capabilities that
460 * the selected CPU has with the capabilities that KVM supports.
462 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
463 struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
464 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
466 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
467 ksps->page_shift)) {
468 continue;
470 qsps->page_shift = ksps->page_shift;
471 qsps->slb_enc = ksps->slb_enc;
472 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
473 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
474 ksps->enc[jk].page_shift)) {
475 continue;
477 if (ksps->enc[jk].page_shift == 16) {
478 has_64k_pages = true;
480 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
481 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
482 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
483 break;
486 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
487 break;
490 env->slb_nr = smmu_info.slb_size;
491 if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
492 env->mmu_model &= ~POWERPC_MMU_1TSEG;
494 if (!has_64k_pages) {
495 env->mmu_model &= ~POWERPC_MMU_64K;
498 #else /* defined (TARGET_PPC64) */
500 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
504 #endif /* !defined (TARGET_PPC64) */
506 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
508 return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
511 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
512 * book3s supports only 1 watchpoint, so array size
513 * of 4 is sufficient for now.
515 #define MAX_HW_BKPTS 4
517 static struct HWBreakpoint {
518 target_ulong addr;
519 int type;
520 } hw_debug_points[MAX_HW_BKPTS];
522 static CPUWatchpoint hw_watchpoint;
524 /* Default there is no breakpoint and watchpoint supported */
525 static int max_hw_breakpoint;
526 static int max_hw_watchpoint;
527 static int nb_hw_breakpoint;
528 static int nb_hw_watchpoint;
530 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
532 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
533 max_hw_breakpoint = 2;
534 max_hw_watchpoint = 2;
537 if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
538 fprintf(stderr, "Error initializing h/w breakpoints\n");
539 return;
543 int kvm_arch_init_vcpu(CPUState *cs)
545 PowerPCCPU *cpu = POWERPC_CPU(cs);
546 CPUPPCState *cenv = &cpu->env;
547 int ret;
549 /* Gather server mmu info from KVM and update the CPU state */
550 kvm_fixup_page_sizes(cpu);
552 /* Synchronize sregs with kvm */
553 ret = kvm_arch_sync_sregs(cpu);
554 if (ret) {
555 if (ret == -EINVAL) {
556 error_report("Register sync failed... If you're using kvm-hv.ko,"
557 " only \"-cpu host\" is possible");
559 return ret;
562 idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
564 /* Some targets support access to KVM's guest TLB. */
565 switch (cenv->mmu_model) {
566 case POWERPC_MMU_BOOKE206:
567 ret = kvm_booke206_tlb_init(cpu);
568 break;
569 default:
570 break;
573 kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
574 kvmppc_hw_debug_points_init(cenv);
576 return ret;
579 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
581 CPUPPCState *env = &cpu->env;
582 CPUState *cs = CPU(cpu);
583 struct kvm_dirty_tlb dirty_tlb;
584 unsigned char *bitmap;
585 int ret;
587 if (!env->kvm_sw_tlb) {
588 return;
591 bitmap = g_malloc((env->nb_tlb + 7) / 8);
592 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
594 dirty_tlb.bitmap = (uintptr_t)bitmap;
595 dirty_tlb.num_dirty = env->nb_tlb;
597 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
598 if (ret) {
599 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
600 __func__, strerror(-ret));
603 g_free(bitmap);
606 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
608 PowerPCCPU *cpu = POWERPC_CPU(cs);
609 CPUPPCState *env = &cpu->env;
610 union {
611 uint32_t u32;
612 uint64_t u64;
613 } val;
614 struct kvm_one_reg reg = {
615 .id = id,
616 .addr = (uintptr_t) &val,
618 int ret;
620 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
621 if (ret != 0) {
622 trace_kvm_failed_spr_get(spr, strerror(errno));
623 } else {
624 switch (id & KVM_REG_SIZE_MASK) {
625 case KVM_REG_SIZE_U32:
626 env->spr[spr] = val.u32;
627 break;
629 case KVM_REG_SIZE_U64:
630 env->spr[spr] = val.u64;
631 break;
633 default:
634 /* Don't handle this size yet */
635 abort();
640 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
642 PowerPCCPU *cpu = POWERPC_CPU(cs);
643 CPUPPCState *env = &cpu->env;
644 union {
645 uint32_t u32;
646 uint64_t u64;
647 } val;
648 struct kvm_one_reg reg = {
649 .id = id,
650 .addr = (uintptr_t) &val,
652 int ret;
654 switch (id & KVM_REG_SIZE_MASK) {
655 case KVM_REG_SIZE_U32:
656 val.u32 = env->spr[spr];
657 break;
659 case KVM_REG_SIZE_U64:
660 val.u64 = env->spr[spr];
661 break;
663 default:
664 /* Don't handle this size yet */
665 abort();
668 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
669 if (ret != 0) {
670 trace_kvm_failed_spr_set(spr, strerror(errno));
674 static int kvm_put_fp(CPUState *cs)
676 PowerPCCPU *cpu = POWERPC_CPU(cs);
677 CPUPPCState *env = &cpu->env;
678 struct kvm_one_reg reg;
679 int i;
680 int ret;
682 if (env->insns_flags & PPC_FLOAT) {
683 uint64_t fpscr = env->fpscr;
684 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
686 reg.id = KVM_REG_PPC_FPSCR;
687 reg.addr = (uintptr_t)&fpscr;
688 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
689 if (ret < 0) {
690 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
691 return ret;
694 for (i = 0; i < 32; i++) {
695 uint64_t vsr[2];
697 #ifdef HOST_WORDS_BIGENDIAN
698 vsr[0] = float64_val(env->fpr[i]);
699 vsr[1] = env->vsr[i];
700 #else
701 vsr[0] = env->vsr[i];
702 vsr[1] = float64_val(env->fpr[i]);
703 #endif
704 reg.addr = (uintptr_t) &vsr;
705 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
707 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
708 if (ret < 0) {
709 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
710 i, strerror(errno));
711 return ret;
716 if (env->insns_flags & PPC_ALTIVEC) {
717 reg.id = KVM_REG_PPC_VSCR;
718 reg.addr = (uintptr_t)&env->vscr;
719 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
720 if (ret < 0) {
721 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
722 return ret;
725 for (i = 0; i < 32; i++) {
726 reg.id = KVM_REG_PPC_VR(i);
727 reg.addr = (uintptr_t)&env->avr[i];
728 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
729 if (ret < 0) {
730 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
731 return ret;
736 return 0;
739 static int kvm_get_fp(CPUState *cs)
741 PowerPCCPU *cpu = POWERPC_CPU(cs);
742 CPUPPCState *env = &cpu->env;
743 struct kvm_one_reg reg;
744 int i;
745 int ret;
747 if (env->insns_flags & PPC_FLOAT) {
748 uint64_t fpscr;
749 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
751 reg.id = KVM_REG_PPC_FPSCR;
752 reg.addr = (uintptr_t)&fpscr;
753 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
754 if (ret < 0) {
755 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
756 return ret;
757 } else {
758 env->fpscr = fpscr;
761 for (i = 0; i < 32; i++) {
762 uint64_t vsr[2];
764 reg.addr = (uintptr_t) &vsr;
765 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
767 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
768 if (ret < 0) {
769 DPRINTF("Unable to get %s%d from KVM: %s\n",
770 vsx ? "VSR" : "FPR", i, strerror(errno));
771 return ret;
772 } else {
773 #ifdef HOST_WORDS_BIGENDIAN
774 env->fpr[i] = vsr[0];
775 if (vsx) {
776 env->vsr[i] = vsr[1];
778 #else
779 env->fpr[i] = vsr[1];
780 if (vsx) {
781 env->vsr[i] = vsr[0];
783 #endif
788 if (env->insns_flags & PPC_ALTIVEC) {
789 reg.id = KVM_REG_PPC_VSCR;
790 reg.addr = (uintptr_t)&env->vscr;
791 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
792 if (ret < 0) {
793 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
794 return ret;
797 for (i = 0; i < 32; i++) {
798 reg.id = KVM_REG_PPC_VR(i);
799 reg.addr = (uintptr_t)&env->avr[i];
800 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
801 if (ret < 0) {
802 DPRINTF("Unable to get VR%d from KVM: %s\n",
803 i, strerror(errno));
804 return ret;
809 return 0;
812 #if defined(TARGET_PPC64)
813 static int kvm_get_vpa(CPUState *cs)
815 PowerPCCPU *cpu = POWERPC_CPU(cs);
816 CPUPPCState *env = &cpu->env;
817 struct kvm_one_reg reg;
818 int ret;
820 reg.id = KVM_REG_PPC_VPA_ADDR;
821 reg.addr = (uintptr_t)&env->vpa_addr;
822 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
823 if (ret < 0) {
824 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
825 return ret;
828 assert((uintptr_t)&env->slb_shadow_size
829 == ((uintptr_t)&env->slb_shadow_addr + 8));
830 reg.id = KVM_REG_PPC_VPA_SLB;
831 reg.addr = (uintptr_t)&env->slb_shadow_addr;
832 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
833 if (ret < 0) {
834 DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
835 strerror(errno));
836 return ret;
839 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
840 reg.id = KVM_REG_PPC_VPA_DTL;
841 reg.addr = (uintptr_t)&env->dtl_addr;
842 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
843 if (ret < 0) {
844 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
845 strerror(errno));
846 return ret;
849 return 0;
852 static int kvm_put_vpa(CPUState *cs)
854 PowerPCCPU *cpu = POWERPC_CPU(cs);
855 CPUPPCState *env = &cpu->env;
856 struct kvm_one_reg reg;
857 int ret;
859 /* SLB shadow or DTL can't be registered unless a master VPA is
860 * registered. That means when restoring state, if a VPA *is*
861 * registered, we need to set that up first. If not, we need to
862 * deregister the others before deregistering the master VPA */
863 assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
865 if (env->vpa_addr) {
866 reg.id = KVM_REG_PPC_VPA_ADDR;
867 reg.addr = (uintptr_t)&env->vpa_addr;
868 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
869 if (ret < 0) {
870 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
871 return ret;
875 assert((uintptr_t)&env->slb_shadow_size
876 == ((uintptr_t)&env->slb_shadow_addr + 8));
877 reg.id = KVM_REG_PPC_VPA_SLB;
878 reg.addr = (uintptr_t)&env->slb_shadow_addr;
879 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
880 if (ret < 0) {
881 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
882 return ret;
885 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
886 reg.id = KVM_REG_PPC_VPA_DTL;
887 reg.addr = (uintptr_t)&env->dtl_addr;
888 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
889 if (ret < 0) {
890 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
891 strerror(errno));
892 return ret;
895 if (!env->vpa_addr) {
896 reg.id = KVM_REG_PPC_VPA_ADDR;
897 reg.addr = (uintptr_t)&env->vpa_addr;
898 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
899 if (ret < 0) {
900 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
901 return ret;
905 return 0;
907 #endif /* TARGET_PPC64 */
909 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
911 CPUPPCState *env = &cpu->env;
912 struct kvm_sregs sregs;
913 int i;
915 sregs.pvr = env->spr[SPR_PVR];
917 sregs.u.s.sdr1 = env->spr[SPR_SDR1];
919 /* Sync SLB */
920 #ifdef TARGET_PPC64
921 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
922 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
923 if (env->slb[i].esid & SLB_ESID_V) {
924 sregs.u.s.ppc64.slb[i].slbe |= i;
926 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
928 #endif
930 /* Sync SRs */
931 for (i = 0; i < 16; i++) {
932 sregs.u.s.ppc32.sr[i] = env->sr[i];
935 /* Sync BATs */
936 for (i = 0; i < 8; i++) {
937 /* Beware. We have to swap upper and lower bits here */
938 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
939 | env->DBAT[1][i];
940 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
941 | env->IBAT[1][i];
944 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
947 int kvm_arch_put_registers(CPUState *cs, int level)
949 PowerPCCPU *cpu = POWERPC_CPU(cs);
950 CPUPPCState *env = &cpu->env;
951 struct kvm_regs regs;
952 int ret;
953 int i;
955 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
956 if (ret < 0) {
957 return ret;
960 regs.ctr = env->ctr;
961 regs.lr = env->lr;
962 regs.xer = cpu_read_xer(env);
963 regs.msr = env->msr;
964 regs.pc = env->nip;
966 regs.srr0 = env->spr[SPR_SRR0];
967 regs.srr1 = env->spr[SPR_SRR1];
969 regs.sprg0 = env->spr[SPR_SPRG0];
970 regs.sprg1 = env->spr[SPR_SPRG1];
971 regs.sprg2 = env->spr[SPR_SPRG2];
972 regs.sprg3 = env->spr[SPR_SPRG3];
973 regs.sprg4 = env->spr[SPR_SPRG4];
974 regs.sprg5 = env->spr[SPR_SPRG5];
975 regs.sprg6 = env->spr[SPR_SPRG6];
976 regs.sprg7 = env->spr[SPR_SPRG7];
978 regs.pid = env->spr[SPR_BOOKE_PID];
980 for (i = 0;i < 32; i++)
981 regs.gpr[i] = env->gpr[i];
983 regs.cr = 0;
984 for (i = 0; i < 8; i++) {
985 regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
988 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
989 if (ret < 0)
990 return ret;
992 kvm_put_fp(cs);
994 if (env->tlb_dirty) {
995 kvm_sw_tlb_put(cpu);
996 env->tlb_dirty = false;
999 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
1000 ret = kvmppc_put_books_sregs(cpu);
1001 if (ret < 0) {
1002 return ret;
1006 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
1007 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1010 if (cap_one_reg) {
1011 int i;
1013 /* We deliberately ignore errors here, for kernels which have
1014 * the ONE_REG calls, but don't support the specific
1015 * registers, there's a reasonable chance things will still
1016 * work, at least until we try to migrate. */
1017 for (i = 0; i < 1024; i++) {
1018 uint64_t id = env->spr_cb[i].one_reg_id;
1020 if (id != 0) {
1021 kvm_put_one_spr(cs, id, i);
1025 #ifdef TARGET_PPC64
1026 if (msr_ts) {
1027 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1028 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1030 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1031 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1033 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1034 kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1035 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1036 kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1037 kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1038 kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1039 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1040 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1041 kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1042 kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1045 if (cap_papr) {
1046 if (kvm_put_vpa(cs) < 0) {
1047 DPRINTF("Warning: Unable to set VPA information to KVM\n");
1051 kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1052 #endif /* TARGET_PPC64 */
1055 return ret;
1058 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1060 env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1063 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1065 CPUPPCState *env = &cpu->env;
1066 struct kvm_sregs sregs;
1067 int ret;
1069 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1070 if (ret < 0) {
1071 return ret;
1074 if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1075 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1076 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1077 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1078 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1079 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1080 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1081 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1082 env->spr[SPR_DECR] = sregs.u.e.dec;
1083 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1084 env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1085 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1088 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1089 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1090 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1091 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1092 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1093 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1096 if (sregs.u.e.features & KVM_SREGS_E_64) {
1097 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1100 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1101 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1104 if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1105 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1106 kvm_sync_excp(env, POWERPC_EXCP_CRITICAL, SPR_BOOKE_IVOR0);
1107 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1108 kvm_sync_excp(env, POWERPC_EXCP_MCHECK, SPR_BOOKE_IVOR1);
1109 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1110 kvm_sync_excp(env, POWERPC_EXCP_DSI, SPR_BOOKE_IVOR2);
1111 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1112 kvm_sync_excp(env, POWERPC_EXCP_ISI, SPR_BOOKE_IVOR3);
1113 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1114 kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL, SPR_BOOKE_IVOR4);
1115 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1116 kvm_sync_excp(env, POWERPC_EXCP_ALIGN, SPR_BOOKE_IVOR5);
1117 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1118 kvm_sync_excp(env, POWERPC_EXCP_PROGRAM, SPR_BOOKE_IVOR6);
1119 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1120 kvm_sync_excp(env, POWERPC_EXCP_FPU, SPR_BOOKE_IVOR7);
1121 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1122 kvm_sync_excp(env, POWERPC_EXCP_SYSCALL, SPR_BOOKE_IVOR8);
1123 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1124 kvm_sync_excp(env, POWERPC_EXCP_APU, SPR_BOOKE_IVOR9);
1125 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1126 kvm_sync_excp(env, POWERPC_EXCP_DECR, SPR_BOOKE_IVOR10);
1127 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1128 kvm_sync_excp(env, POWERPC_EXCP_FIT, SPR_BOOKE_IVOR11);
1129 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1130 kvm_sync_excp(env, POWERPC_EXCP_WDT, SPR_BOOKE_IVOR12);
1131 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1132 kvm_sync_excp(env, POWERPC_EXCP_DTLB, SPR_BOOKE_IVOR13);
1133 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1134 kvm_sync_excp(env, POWERPC_EXCP_ITLB, SPR_BOOKE_IVOR14);
1135 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1136 kvm_sync_excp(env, POWERPC_EXCP_DEBUG, SPR_BOOKE_IVOR15);
1138 if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1139 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1140 kvm_sync_excp(env, POWERPC_EXCP_SPEU, SPR_BOOKE_IVOR32);
1141 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1142 kvm_sync_excp(env, POWERPC_EXCP_EFPDI, SPR_BOOKE_IVOR33);
1143 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1144 kvm_sync_excp(env, POWERPC_EXCP_EFPRI, SPR_BOOKE_IVOR34);
1147 if (sregs.u.e.features & KVM_SREGS_E_PM) {
1148 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1149 kvm_sync_excp(env, POWERPC_EXCP_EPERFM, SPR_BOOKE_IVOR35);
1152 if (sregs.u.e.features & KVM_SREGS_E_PC) {
1153 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1154 kvm_sync_excp(env, POWERPC_EXCP_DOORI, SPR_BOOKE_IVOR36);
1155 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1156 kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1160 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1161 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1162 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1163 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1164 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1165 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1166 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1167 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1168 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1169 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1170 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1173 if (sregs.u.e.features & KVM_SREGS_EXP) {
1174 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1177 if (sregs.u.e.features & KVM_SREGS_E_PD) {
1178 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1179 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1182 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1183 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1184 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1185 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1187 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1188 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1189 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1193 return 0;
1196 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1198 CPUPPCState *env = &cpu->env;
1199 struct kvm_sregs sregs;
1200 int ret;
1201 int i;
1203 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1204 if (ret < 0) {
1205 return ret;
1208 if (!env->external_htab) {
1209 ppc_store_sdr1(env, sregs.u.s.sdr1);
1212 /* Sync SLB */
1213 #ifdef TARGET_PPC64
1215 * The packed SLB array we get from KVM_GET_SREGS only contains
1216 * information about valid entries. So we flush our internal copy
1217 * to get rid of stale ones, then put all valid SLB entries back
1218 * in.
1220 memset(env->slb, 0, sizeof(env->slb));
1221 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1222 target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1223 target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1225 * Only restore valid entries
1227 if (rb & SLB_ESID_V) {
1228 ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1231 #endif
1233 /* Sync SRs */
1234 for (i = 0; i < 16; i++) {
1235 env->sr[i] = sregs.u.s.ppc32.sr[i];
1238 /* Sync BATs */
1239 for (i = 0; i < 8; i++) {
1240 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1241 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1242 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1243 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1246 return 0;
1249 int kvm_arch_get_registers(CPUState *cs)
1251 PowerPCCPU *cpu = POWERPC_CPU(cs);
1252 CPUPPCState *env = &cpu->env;
1253 struct kvm_regs regs;
1254 uint32_t cr;
1255 int i, ret;
1257 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1258 if (ret < 0)
1259 return ret;
1261 cr = regs.cr;
1262 for (i = 7; i >= 0; i--) {
1263 env->crf[i] = cr & 15;
1264 cr >>= 4;
1267 env->ctr = regs.ctr;
1268 env->lr = regs.lr;
1269 cpu_write_xer(env, regs.xer);
1270 env->msr = regs.msr;
1271 env->nip = regs.pc;
1273 env->spr[SPR_SRR0] = regs.srr0;
1274 env->spr[SPR_SRR1] = regs.srr1;
1276 env->spr[SPR_SPRG0] = regs.sprg0;
1277 env->spr[SPR_SPRG1] = regs.sprg1;
1278 env->spr[SPR_SPRG2] = regs.sprg2;
1279 env->spr[SPR_SPRG3] = regs.sprg3;
1280 env->spr[SPR_SPRG4] = regs.sprg4;
1281 env->spr[SPR_SPRG5] = regs.sprg5;
1282 env->spr[SPR_SPRG6] = regs.sprg6;
1283 env->spr[SPR_SPRG7] = regs.sprg7;
1285 env->spr[SPR_BOOKE_PID] = regs.pid;
1287 for (i = 0;i < 32; i++)
1288 env->gpr[i] = regs.gpr[i];
1290 kvm_get_fp(cs);
1292 if (cap_booke_sregs) {
1293 ret = kvmppc_get_booke_sregs(cpu);
1294 if (ret < 0) {
1295 return ret;
1299 if (cap_segstate) {
1300 ret = kvmppc_get_books_sregs(cpu);
1301 if (ret < 0) {
1302 return ret;
1306 if (cap_hior) {
1307 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1310 if (cap_one_reg) {
1311 int i;
1313 /* We deliberately ignore errors here, for kernels which have
1314 * the ONE_REG calls, but don't support the specific
1315 * registers, there's a reasonable chance things will still
1316 * work, at least until we try to migrate. */
1317 for (i = 0; i < 1024; i++) {
1318 uint64_t id = env->spr_cb[i].one_reg_id;
1320 if (id != 0) {
1321 kvm_get_one_spr(cs, id, i);
1325 #ifdef TARGET_PPC64
1326 if (msr_ts) {
1327 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1328 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1330 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1331 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1333 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1334 kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1335 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1336 kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1337 kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1338 kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1339 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1340 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1341 kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1342 kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1345 if (cap_papr) {
1346 if (kvm_get_vpa(cs) < 0) {
1347 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1351 kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1352 #endif
1355 return 0;
1358 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1360 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1362 if (irq != PPC_INTERRUPT_EXT) {
1363 return 0;
1366 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1367 return 0;
1370 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1372 return 0;
1375 #if defined(TARGET_PPCEMB)
1376 #define PPC_INPUT_INT PPC40x_INPUT_INT
1377 #elif defined(TARGET_PPC64)
1378 #define PPC_INPUT_INT PPC970_INPUT_INT
1379 #else
1380 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1381 #endif
1383 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1385 PowerPCCPU *cpu = POWERPC_CPU(cs);
1386 CPUPPCState *env = &cpu->env;
1387 int r;
1388 unsigned irq;
1390 qemu_mutex_lock_iothread();
1392 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1393 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1394 if (!cap_interrupt_level &&
1395 run->ready_for_interrupt_injection &&
1396 (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1397 (env->irq_input_state & (1<<PPC_INPUT_INT)))
1399 /* For now KVM disregards the 'irq' argument. However, in the
1400 * future KVM could cache it in-kernel to avoid a heavyweight exit
1401 * when reading the UIC.
1403 irq = KVM_INTERRUPT_SET;
1405 DPRINTF("injected interrupt %d\n", irq);
1406 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1407 if (r < 0) {
1408 printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1411 /* Always wake up soon in case the interrupt was level based */
1412 timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1413 (NANOSECONDS_PER_SECOND / 50));
1416 /* We don't know if there are more interrupts pending after this. However,
1417 * the guest will return to userspace in the course of handling this one
1418 * anyways, so we will get a chance to deliver the rest. */
1420 qemu_mutex_unlock_iothread();
1423 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1425 return MEMTXATTRS_UNSPECIFIED;
1428 int kvm_arch_process_async_events(CPUState *cs)
1430 return cs->halted;
1433 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1435 CPUState *cs = CPU(cpu);
1436 CPUPPCState *env = &cpu->env;
1438 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1439 cs->halted = 1;
1440 cs->exception_index = EXCP_HLT;
1443 return 0;
1446 /* map dcr access to existing qemu dcr emulation */
1447 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1449 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1450 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1452 return 0;
1455 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1457 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1458 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1460 return 0;
1463 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1465 /* Mixed endian case is not handled */
1466 uint32_t sc = debug_inst_opcode;
1468 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1469 sizeof(sc), 0) ||
1470 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1471 return -EINVAL;
1474 return 0;
1477 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1479 uint32_t sc;
1481 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1482 sc != debug_inst_opcode ||
1483 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1484 sizeof(sc), 1)) {
1485 return -EINVAL;
1488 return 0;
1491 static int find_hw_breakpoint(target_ulong addr, int type)
1493 int n;
1495 assert((nb_hw_breakpoint + nb_hw_watchpoint)
1496 <= ARRAY_SIZE(hw_debug_points));
1498 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1499 if (hw_debug_points[n].addr == addr &&
1500 hw_debug_points[n].type == type) {
1501 return n;
1505 return -1;
1508 static int find_hw_watchpoint(target_ulong addr, int *flag)
1510 int n;
1512 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1513 if (n >= 0) {
1514 *flag = BP_MEM_ACCESS;
1515 return n;
1518 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1519 if (n >= 0) {
1520 *flag = BP_MEM_WRITE;
1521 return n;
1524 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1525 if (n >= 0) {
1526 *flag = BP_MEM_READ;
1527 return n;
1530 return -1;
1533 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1534 target_ulong len, int type)
1536 if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1537 return -ENOBUFS;
1540 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1541 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1543 switch (type) {
1544 case GDB_BREAKPOINT_HW:
1545 if (nb_hw_breakpoint >= max_hw_breakpoint) {
1546 return -ENOBUFS;
1549 if (find_hw_breakpoint(addr, type) >= 0) {
1550 return -EEXIST;
1553 nb_hw_breakpoint++;
1554 break;
1556 case GDB_WATCHPOINT_WRITE:
1557 case GDB_WATCHPOINT_READ:
1558 case GDB_WATCHPOINT_ACCESS:
1559 if (nb_hw_watchpoint >= max_hw_watchpoint) {
1560 return -ENOBUFS;
1563 if (find_hw_breakpoint(addr, type) >= 0) {
1564 return -EEXIST;
1567 nb_hw_watchpoint++;
1568 break;
1570 default:
1571 return -ENOSYS;
1574 return 0;
1577 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1578 target_ulong len, int type)
1580 int n;
1582 n = find_hw_breakpoint(addr, type);
1583 if (n < 0) {
1584 return -ENOENT;
1587 switch (type) {
1588 case GDB_BREAKPOINT_HW:
1589 nb_hw_breakpoint--;
1590 break;
1592 case GDB_WATCHPOINT_WRITE:
1593 case GDB_WATCHPOINT_READ:
1594 case GDB_WATCHPOINT_ACCESS:
1595 nb_hw_watchpoint--;
1596 break;
1598 default:
1599 return -ENOSYS;
1601 hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1603 return 0;
1606 void kvm_arch_remove_all_hw_breakpoints(void)
1608 nb_hw_breakpoint = nb_hw_watchpoint = 0;
1611 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1613 int n;
1615 /* Software Breakpoint updates */
1616 if (kvm_sw_breakpoints_active(cs)) {
1617 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1620 assert((nb_hw_breakpoint + nb_hw_watchpoint)
1621 <= ARRAY_SIZE(hw_debug_points));
1622 assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1624 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1625 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1626 memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1627 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1628 switch (hw_debug_points[n].type) {
1629 case GDB_BREAKPOINT_HW:
1630 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1631 break;
1632 case GDB_WATCHPOINT_WRITE:
1633 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1634 break;
1635 case GDB_WATCHPOINT_READ:
1636 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1637 break;
1638 case GDB_WATCHPOINT_ACCESS:
1639 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1640 KVMPPC_DEBUG_WATCH_READ;
1641 break;
1642 default:
1643 cpu_abort(cs, "Unsupported breakpoint type\n");
1645 dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1650 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1652 CPUState *cs = CPU(cpu);
1653 CPUPPCState *env = &cpu->env;
1654 struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1655 int handle = 0;
1656 int n;
1657 int flag = 0;
1659 if (cs->singlestep_enabled) {
1660 handle = 1;
1661 } else if (arch_info->status) {
1662 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1663 if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1664 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1665 if (n >= 0) {
1666 handle = 1;
1668 } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1669 KVMPPC_DEBUG_WATCH_WRITE)) {
1670 n = find_hw_watchpoint(arch_info->address, &flag);
1671 if (n >= 0) {
1672 handle = 1;
1673 cs->watchpoint_hit = &hw_watchpoint;
1674 hw_watchpoint.vaddr = hw_debug_points[n].addr;
1675 hw_watchpoint.flags = flag;
1679 } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1680 handle = 1;
1681 } else {
1682 /* QEMU is not able to handle debug exception, so inject
1683 * program exception to guest;
1684 * Yes program exception NOT debug exception !!
1685 * When QEMU is using debug resources then debug exception must
1686 * be always set. To achieve this we set MSR_DE and also set
1687 * MSRP_DEP so guest cannot change MSR_DE.
1688 * When emulating debug resource for guest we want guest
1689 * to control MSR_DE (enable/disable debug interrupt on need).
1690 * Supporting both configurations are NOT possible.
1691 * So the result is that we cannot share debug resources
1692 * between QEMU and Guest on BOOKE architecture.
1693 * In the current design QEMU gets the priority over guest,
1694 * this means that if QEMU is using debug resources then guest
1695 * cannot use them;
1696 * For software breakpoint QEMU uses a privileged instruction;
1697 * So there cannot be any reason that we are here for guest
1698 * set debug exception, only possibility is guest executed a
1699 * privileged / illegal instruction and that's why we are
1700 * injecting a program interrupt.
1703 cpu_synchronize_state(cs);
1704 /* env->nip is PC, so increment this by 4 to use
1705 * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1707 env->nip += 4;
1708 cs->exception_index = POWERPC_EXCP_PROGRAM;
1709 env->error_code = POWERPC_EXCP_INVAL;
1710 ppc_cpu_do_interrupt(cs);
1713 return handle;
1716 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1718 PowerPCCPU *cpu = POWERPC_CPU(cs);
1719 CPUPPCState *env = &cpu->env;
1720 int ret;
1722 qemu_mutex_lock_iothread();
1724 switch (run->exit_reason) {
1725 case KVM_EXIT_DCR:
1726 if (run->dcr.is_write) {
1727 DPRINTF("handle dcr write\n");
1728 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1729 } else {
1730 DPRINTF("handle dcr read\n");
1731 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1733 break;
1734 case KVM_EXIT_HLT:
1735 DPRINTF("handle halt\n");
1736 ret = kvmppc_handle_halt(cpu);
1737 break;
1738 #if defined(TARGET_PPC64)
1739 case KVM_EXIT_PAPR_HCALL:
1740 DPRINTF("handle PAPR hypercall\n");
1741 run->papr_hcall.ret = spapr_hypercall(cpu,
1742 run->papr_hcall.nr,
1743 run->papr_hcall.args);
1744 ret = 0;
1745 break;
1746 #endif
1747 case KVM_EXIT_EPR:
1748 DPRINTF("handle epr\n");
1749 run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1750 ret = 0;
1751 break;
1752 case KVM_EXIT_WATCHDOG:
1753 DPRINTF("handle watchdog expiry\n");
1754 watchdog_perform_action();
1755 ret = 0;
1756 break;
1758 case KVM_EXIT_DEBUG:
1759 DPRINTF("handle debug exception\n");
1760 if (kvm_handle_debug(cpu, run)) {
1761 ret = EXCP_DEBUG;
1762 break;
1764 /* re-enter, this exception was guest-internal */
1765 ret = 0;
1766 break;
1768 default:
1769 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1770 ret = -1;
1771 break;
1774 qemu_mutex_unlock_iothread();
1775 return ret;
1778 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1780 CPUState *cs = CPU(cpu);
1781 uint32_t bits = tsr_bits;
1782 struct kvm_one_reg reg = {
1783 .id = KVM_REG_PPC_OR_TSR,
1784 .addr = (uintptr_t) &bits,
1787 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1790 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1793 CPUState *cs = CPU(cpu);
1794 uint32_t bits = tsr_bits;
1795 struct kvm_one_reg reg = {
1796 .id = KVM_REG_PPC_CLEAR_TSR,
1797 .addr = (uintptr_t) &bits,
1800 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1803 int kvmppc_set_tcr(PowerPCCPU *cpu)
1805 CPUState *cs = CPU(cpu);
1806 CPUPPCState *env = &cpu->env;
1807 uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1809 struct kvm_one_reg reg = {
1810 .id = KVM_REG_PPC_TCR,
1811 .addr = (uintptr_t) &tcr,
1814 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1817 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1819 CPUState *cs = CPU(cpu);
1820 int ret;
1822 if (!kvm_enabled()) {
1823 return -1;
1826 if (!cap_ppc_watchdog) {
1827 printf("warning: KVM does not support watchdog");
1828 return -1;
1831 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1832 if (ret < 0) {
1833 fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1834 __func__, strerror(-ret));
1835 return ret;
1838 return ret;
1841 static int read_cpuinfo(const char *field, char *value, int len)
1843 FILE *f;
1844 int ret = -1;
1845 int field_len = strlen(field);
1846 char line[512];
1848 f = fopen("/proc/cpuinfo", "r");
1849 if (!f) {
1850 return -1;
1853 do {
1854 if (!fgets(line, sizeof(line), f)) {
1855 break;
1857 if (!strncmp(line, field, field_len)) {
1858 pstrcpy(value, len, line);
1859 ret = 0;
1860 break;
1862 } while(*line);
1864 fclose(f);
1866 return ret;
1869 uint32_t kvmppc_get_tbfreq(void)
1871 char line[512];
1872 char *ns;
1873 uint32_t retval = NANOSECONDS_PER_SECOND;
1875 if (read_cpuinfo("timebase", line, sizeof(line))) {
1876 return retval;
1879 if (!(ns = strchr(line, ':'))) {
1880 return retval;
1883 ns++;
1885 return atoi(ns);
1888 bool kvmppc_get_host_serial(char **value)
1890 return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1891 NULL);
1894 bool kvmppc_get_host_model(char **value)
1896 return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1899 /* Try to find a device tree node for a CPU with clock-frequency property */
1900 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1902 struct dirent *dirp;
1903 DIR *dp;
1905 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1906 printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1907 return -1;
1910 buf[0] = '\0';
1911 while ((dirp = readdir(dp)) != NULL) {
1912 FILE *f;
1913 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1914 dirp->d_name);
1915 f = fopen(buf, "r");
1916 if (f) {
1917 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1918 fclose(f);
1919 break;
1921 buf[0] = '\0';
1923 closedir(dp);
1924 if (buf[0] == '\0') {
1925 printf("Unknown host!\n");
1926 return -1;
1929 return 0;
1932 static uint64_t kvmppc_read_int_dt(const char *filename)
1934 union {
1935 uint32_t v32;
1936 uint64_t v64;
1937 } u;
1938 FILE *f;
1939 int len;
1941 f = fopen(filename, "rb");
1942 if (!f) {
1943 return -1;
1946 len = fread(&u, 1, sizeof(u), f);
1947 fclose(f);
1948 switch (len) {
1949 case 4:
1950 /* property is a 32-bit quantity */
1951 return be32_to_cpu(u.v32);
1952 case 8:
1953 return be64_to_cpu(u.v64);
1956 return 0;
1959 /* Read a CPU node property from the host device tree that's a single
1960 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1961 * (can't find or open the property, or doesn't understand the
1962 * format) */
1963 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1965 char buf[PATH_MAX], *tmp;
1966 uint64_t val;
1968 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1969 return -1;
1972 tmp = g_strdup_printf("%s/%s", buf, propname);
1973 val = kvmppc_read_int_dt(tmp);
1974 g_free(tmp);
1976 return val;
1979 uint64_t kvmppc_get_clockfreq(void)
1981 return kvmppc_read_int_cpu_dt("clock-frequency");
1984 uint32_t kvmppc_get_vmx(void)
1986 return kvmppc_read_int_cpu_dt("ibm,vmx");
1989 uint32_t kvmppc_get_dfp(void)
1991 return kvmppc_read_int_cpu_dt("ibm,dfp");
1994 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1996 PowerPCCPU *cpu = ppc_env_get_cpu(env);
1997 CPUState *cs = CPU(cpu);
1999 if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
2000 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
2001 return 0;
2004 return 1;
2007 int kvmppc_get_hasidle(CPUPPCState *env)
2009 struct kvm_ppc_pvinfo pvinfo;
2011 if (!kvmppc_get_pvinfo(env, &pvinfo) &&
2012 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
2013 return 1;
2016 return 0;
2019 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
2021 uint32_t *hc = (uint32_t*)buf;
2022 struct kvm_ppc_pvinfo pvinfo;
2024 if (!kvmppc_get_pvinfo(env, &pvinfo)) {
2025 memcpy(buf, pvinfo.hcall, buf_len);
2026 return 0;
2030 * Fallback to always fail hypercalls regardless of endianness:
2032 * tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
2033 * li r3, -1
2034 * b .+8 (becomes nop in wrong endian)
2035 * bswap32(li r3, -1)
2038 hc[0] = cpu_to_be32(0x08000048);
2039 hc[1] = cpu_to_be32(0x3860ffff);
2040 hc[2] = cpu_to_be32(0x48000008);
2041 hc[3] = cpu_to_be32(bswap32(0x3860ffff));
2043 return 1;
2046 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
2048 return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2051 void kvmppc_enable_logical_ci_hcalls(void)
2054 * FIXME: it would be nice if we could detect the cases where
2055 * we're using a device which requires the in kernel
2056 * implementation of these hcalls, but the kernel lacks them and
2057 * produce a warning.
2059 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2060 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2063 void kvmppc_enable_set_mode_hcall(void)
2065 kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2068 void kvmppc_enable_clear_ref_mod_hcalls(void)
2070 kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
2071 kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
2074 void kvmppc_set_papr(PowerPCCPU *cpu)
2076 CPUState *cs = CPU(cpu);
2077 int ret;
2079 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2080 if (ret) {
2081 error_report("This vCPU type or KVM version does not support PAPR");
2082 exit(1);
2085 /* Update the capability flag so we sync the right information
2086 * with kvm */
2087 cap_papr = 1;
2090 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t cpu_version)
2092 return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &cpu_version);
2095 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2097 CPUState *cs = CPU(cpu);
2098 int ret;
2100 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2101 if (ret && mpic_proxy) {
2102 error_report("This KVM version does not support EPR");
2103 exit(1);
2107 int kvmppc_smt_threads(void)
2109 return cap_ppc_smt ? cap_ppc_smt : 1;
2112 #ifdef TARGET_PPC64
2113 off_t kvmppc_alloc_rma(void **rma)
2115 off_t size;
2116 int fd;
2117 struct kvm_allocate_rma ret;
2119 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2120 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2121 * not necessary on this hardware
2122 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2124 * FIXME: We should allow the user to force contiguous RMA
2125 * allocation in the cap_ppc_rma==1 case.
2127 if (cap_ppc_rma < 2) {
2128 return 0;
2131 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
2132 if (fd < 0) {
2133 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2134 strerror(errno));
2135 return -1;
2138 size = MIN(ret.rma_size, 256ul << 20);
2140 *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2141 if (*rma == MAP_FAILED) {
2142 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
2143 return -1;
2146 return size;
2149 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2151 struct kvm_ppc_smmu_info info;
2152 long rampagesize, best_page_shift;
2153 int i;
2155 if (cap_ppc_rma >= 2) {
2156 return current_size;
2159 /* Find the largest hardware supported page size that's less than
2160 * or equal to the (logical) backing page size of guest RAM */
2161 kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2162 rampagesize = getrampagesize();
2163 best_page_shift = 0;
2165 for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2166 struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2168 if (!sps->page_shift) {
2169 continue;
2172 if ((sps->page_shift > best_page_shift)
2173 && ((1UL << sps->page_shift) <= rampagesize)) {
2174 best_page_shift = sps->page_shift;
2178 return MIN(current_size,
2179 1ULL << (best_page_shift + hash_shift - 7));
2181 #endif
2183 bool kvmppc_spapr_use_multitce(void)
2185 return cap_spapr_multitce;
2188 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
2189 bool need_vfio)
2191 struct kvm_create_spapr_tce args = {
2192 .liobn = liobn,
2193 .window_size = window_size,
2195 long len;
2196 int fd;
2197 void *table;
2199 /* Must set fd to -1 so we don't try to munmap when called for
2200 * destroying the table, which the upper layers -will- do
2202 *pfd = -1;
2203 if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2204 return NULL;
2207 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2208 if (fd < 0) {
2209 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2210 liobn);
2211 return NULL;
2214 len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
2215 /* FIXME: round this up to page size */
2217 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2218 if (table == MAP_FAILED) {
2219 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2220 liobn);
2221 close(fd);
2222 return NULL;
2225 *pfd = fd;
2226 return table;
2229 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2231 long len;
2233 if (fd < 0) {
2234 return -1;
2237 len = nb_table * sizeof(uint64_t);
2238 if ((munmap(table, len) < 0) ||
2239 (close(fd) < 0)) {
2240 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2241 strerror(errno));
2242 /* Leak the table */
2245 return 0;
2248 int kvmppc_reset_htab(int shift_hint)
2250 uint32_t shift = shift_hint;
2252 if (!kvm_enabled()) {
2253 /* Full emulation, tell caller to allocate htab itself */
2254 return 0;
2256 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2257 int ret;
2258 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2259 if (ret == -ENOTTY) {
2260 /* At least some versions of PR KVM advertise the
2261 * capability, but don't implement the ioctl(). Oops.
2262 * Return 0 so that we allocate the htab in qemu, as is
2263 * correct for PR. */
2264 return 0;
2265 } else if (ret < 0) {
2266 return ret;
2268 return shift;
2271 /* We have a kernel that predates the htab reset calls. For PR
2272 * KVM, we need to allocate the htab ourselves, for an HV KVM of
2273 * this era, it has allocated a 16MB fixed size hash table
2274 * already. Kernels of this era have the GET_PVINFO capability
2275 * only on PR, so we use this hack to determine the right
2276 * answer */
2277 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
2278 /* PR - tell caller to allocate htab */
2279 return 0;
2280 } else {
2281 /* HV - assume 16MB kernel allocated htab */
2282 return 24;
2286 static inline uint32_t mfpvr(void)
2288 uint32_t pvr;
2290 asm ("mfpvr %0"
2291 : "=r"(pvr));
2292 return pvr;
2295 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2297 if (on) {
2298 *word |= flags;
2299 } else {
2300 *word &= ~flags;
2304 static void kvmppc_host_cpu_initfn(Object *obj)
2306 assert(kvm_enabled());
2309 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2311 DeviceClass *dc = DEVICE_CLASS(oc);
2312 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2313 uint32_t vmx = kvmppc_get_vmx();
2314 uint32_t dfp = kvmppc_get_dfp();
2315 uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2316 uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2318 /* Now fix up the class with information we can query from the host */
2319 pcc->pvr = mfpvr();
2321 if (vmx != -1) {
2322 /* Only override when we know what the host supports */
2323 alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
2324 alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
2326 if (dfp != -1) {
2327 /* Only override when we know what the host supports */
2328 alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
2331 if (dcache_size != -1) {
2332 pcc->l1_dcache_size = dcache_size;
2335 if (icache_size != -1) {
2336 pcc->l1_icache_size = icache_size;
2339 /* Reason: kvmppc_host_cpu_initfn() dies when !kvm_enabled() */
2340 dc->cannot_destroy_with_object_finalize_yet = true;
2343 bool kvmppc_has_cap_epr(void)
2345 return cap_epr;
2348 bool kvmppc_has_cap_htab_fd(void)
2350 return cap_htab_fd;
2353 bool kvmppc_has_cap_fixup_hcalls(void)
2355 return cap_fixup_hcalls;
2358 bool kvmppc_has_cap_htm(void)
2360 return cap_htm;
2363 static PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc)
2365 ObjectClass *oc = OBJECT_CLASS(pcc);
2367 while (oc && !object_class_is_abstract(oc)) {
2368 oc = object_class_get_parent(oc);
2370 assert(oc);
2372 return POWERPC_CPU_CLASS(oc);
2375 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2377 uint32_t host_pvr = mfpvr();
2378 PowerPCCPUClass *pvr_pcc;
2380 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2381 if (pvr_pcc == NULL) {
2382 pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2385 return pvr_pcc;
2388 static int kvm_ppc_register_host_cpu_type(void)
2390 TypeInfo type_info = {
2391 .name = TYPE_HOST_POWERPC_CPU,
2392 .instance_init = kvmppc_host_cpu_initfn,
2393 .class_init = kvmppc_host_cpu_class_init,
2395 PowerPCCPUClass *pvr_pcc;
2396 DeviceClass *dc;
2398 pvr_pcc = kvm_ppc_get_host_cpu_class();
2399 if (pvr_pcc == NULL) {
2400 return -1;
2402 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2403 type_register(&type_info);
2405 /* Register generic family CPU class for a family */
2406 pvr_pcc = ppc_cpu_get_family_class(pvr_pcc);
2407 dc = DEVICE_CLASS(pvr_pcc);
2408 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2409 type_info.name = g_strdup_printf("%s-"TYPE_POWERPC_CPU, dc->desc);
2410 type_register(&type_info);
2412 #if defined(TARGET_PPC64)
2413 type_info.name = g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE, "host");
2414 type_info.parent = TYPE_SPAPR_CPU_CORE,
2415 type_info.instance_size = sizeof(sPAPRCPUCore);
2416 type_info.instance_init = NULL;
2417 type_info.class_init = spapr_cpu_core_class_init;
2418 type_info.class_data = (void *) "host";
2419 type_register(&type_info);
2420 g_free((void *)type_info.name);
2422 /* Register generic spapr CPU family class for current host CPU type */
2423 type_info.name = g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE, dc->desc);
2424 type_info.class_data = (void *) dc->desc;
2425 type_register(&type_info);
2426 g_free((void *)type_info.name);
2427 #endif
2429 return 0;
2432 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2434 struct kvm_rtas_token_args args = {
2435 .token = token,
2438 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2439 return -ENOENT;
2442 strncpy(args.name, function, sizeof(args.name));
2444 return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2447 int kvmppc_get_htab_fd(bool write)
2449 struct kvm_get_htab_fd s = {
2450 .flags = write ? KVM_GET_HTAB_WRITE : 0,
2451 .start_index = 0,
2454 if (!cap_htab_fd) {
2455 fprintf(stderr, "KVM version doesn't support saving the hash table\n");
2456 return -1;
2459 return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2462 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2464 int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2465 uint8_t buf[bufsize];
2466 ssize_t rc;
2468 do {
2469 rc = read(fd, buf, bufsize);
2470 if (rc < 0) {
2471 fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2472 strerror(errno));
2473 return rc;
2474 } else if (rc) {
2475 uint8_t *buffer = buf;
2476 ssize_t n = rc;
2477 while (n) {
2478 struct kvm_get_htab_header *head =
2479 (struct kvm_get_htab_header *) buffer;
2480 size_t chunksize = sizeof(*head) +
2481 HASH_PTE_SIZE_64 * head->n_valid;
2483 qemu_put_be32(f, head->index);
2484 qemu_put_be16(f, head->n_valid);
2485 qemu_put_be16(f, head->n_invalid);
2486 qemu_put_buffer(f, (void *)(head + 1),
2487 HASH_PTE_SIZE_64 * head->n_valid);
2489 buffer += chunksize;
2490 n -= chunksize;
2493 } while ((rc != 0)
2494 && ((max_ns < 0)
2495 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2497 return (rc == 0) ? 1 : 0;
2500 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2501 uint16_t n_valid, uint16_t n_invalid)
2503 struct kvm_get_htab_header *buf;
2504 size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2505 ssize_t rc;
2507 buf = alloca(chunksize);
2508 buf->index = index;
2509 buf->n_valid = n_valid;
2510 buf->n_invalid = n_invalid;
2512 qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2514 rc = write(fd, buf, chunksize);
2515 if (rc < 0) {
2516 fprintf(stderr, "Error writing KVM hash table: %s\n",
2517 strerror(errno));
2518 return rc;
2520 if (rc != chunksize) {
2521 /* We should never get a short write on a single chunk */
2522 fprintf(stderr, "Short write, restoring KVM hash table\n");
2523 return -1;
2525 return 0;
2528 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2530 return true;
2533 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
2535 return 1;
2538 int kvm_arch_on_sigbus(int code, void *addr)
2540 return 1;
2543 void kvm_arch_init_irq_routing(KVMState *s)
2547 struct kvm_get_htab_buf {
2548 struct kvm_get_htab_header header;
2550 * We require one extra byte for read
2552 target_ulong hpte[(HPTES_PER_GROUP * 2) + 1];
2555 uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index)
2557 int htab_fd;
2558 struct kvm_get_htab_fd ghf;
2559 struct kvm_get_htab_buf *hpte_buf;
2561 ghf.flags = 0;
2562 ghf.start_index = pte_index;
2563 htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2564 if (htab_fd < 0) {
2565 goto error_out;
2568 hpte_buf = g_malloc0(sizeof(*hpte_buf));
2570 * Read the hpte group
2572 if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) {
2573 goto out_close;
2576 close(htab_fd);
2577 return (uint64_t)(uintptr_t) hpte_buf->hpte;
2579 out_close:
2580 g_free(hpte_buf);
2581 close(htab_fd);
2582 error_out:
2583 return 0;
2586 void kvmppc_hash64_free_pteg(uint64_t token)
2588 struct kvm_get_htab_buf *htab_buf;
2590 htab_buf = container_of((void *)(uintptr_t) token, struct kvm_get_htab_buf,
2591 hpte);
2592 g_free(htab_buf);
2593 return;
2596 void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
2597 target_ulong pte0, target_ulong pte1)
2599 int htab_fd;
2600 struct kvm_get_htab_fd ghf;
2601 struct kvm_get_htab_buf hpte_buf;
2603 ghf.flags = 0;
2604 ghf.start_index = 0; /* Ignored */
2605 htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2606 if (htab_fd < 0) {
2607 goto error_out;
2610 hpte_buf.header.n_valid = 1;
2611 hpte_buf.header.n_invalid = 0;
2612 hpte_buf.header.index = pte_index;
2613 hpte_buf.hpte[0] = pte0;
2614 hpte_buf.hpte[1] = pte1;
2616 * Write the hpte entry.
2617 * CAUTION: write() has the warn_unused_result attribute. Hence we
2618 * need to check the return value, even though we do nothing.
2620 if (write(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) {
2621 goto out_close;
2624 out_close:
2625 close(htab_fd);
2626 return;
2628 error_out:
2629 return;
2632 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2633 uint64_t address, uint32_t data, PCIDevice *dev)
2635 return 0;
2638 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2639 int vector, PCIDevice *dev)
2641 return 0;
2644 int kvm_arch_release_virq_post(int virq)
2646 return 0;
2649 int kvm_arch_msi_data_to_gsi(uint32_t data)
2651 return data & 0xffff;
2654 int kvmppc_enable_hwrng(void)
2656 if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2657 return -1;
2660 return kvmppc_enable_hcall(kvm_state, H_RANDOM);