ppc: Disable huge page support if it is not available for main RAM
[qemu/kevin.git] / target-ppc / kvm.c
blob884d564e0f17edb027ef6d46ecac9e8f97a549bf
1 /*
2 * PowerPC implementation of KVM hooks
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
7 * Authors:
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
17 #include "qemu/osdep.h"
18 #include <dirent.h>
19 #include <sys/ioctl.h>
20 #include <sys/vfs.h>
22 #include <linux/kvm.h>
24 #include "qemu-common.h"
25 #include "qemu/error-report.h"
26 #include "cpu.h"
27 #include "qemu/timer.h"
28 #include "sysemu/sysemu.h"
29 #include "sysemu/kvm.h"
30 #include "sysemu/numa.h"
31 #include "kvm_ppc.h"
32 #include "sysemu/cpus.h"
33 #include "sysemu/device_tree.h"
34 #include "mmu-hash64.h"
36 #include "hw/sysbus.h"
37 #include "hw/ppc/spapr.h"
38 #include "hw/ppc/spapr_vio.h"
39 #include "hw/ppc/ppc.h"
40 #include "sysemu/watchdog.h"
41 #include "trace.h"
42 #include "exec/gdbstub.h"
43 #include "exec/memattrs.h"
44 #include "sysemu/hostmem.h"
45 #include "qemu/cutils.h"
46 #if defined(TARGET_PPC64)
47 #include "hw/ppc/spapr_cpu_core.h"
48 #endif
50 //#define DEBUG_KVM
52 #ifdef DEBUG_KVM
53 #define DPRINTF(fmt, ...) \
54 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
55 #else
56 #define DPRINTF(fmt, ...) \
57 do { } while (0)
58 #endif
60 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
62 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
63 KVM_CAP_LAST_INFO
66 static int cap_interrupt_unset = false;
67 static int cap_interrupt_level = false;
68 static int cap_segstate;
69 static int cap_booke_sregs;
70 static int cap_ppc_smt;
71 static int cap_ppc_rma;
72 static int cap_spapr_tce;
73 static int cap_spapr_multitce;
74 static int cap_spapr_vfio;
75 static int cap_hior;
76 static int cap_one_reg;
77 static int cap_epr;
78 static int cap_ppc_watchdog;
79 static int cap_papr;
80 static int cap_htab_fd;
81 static int cap_fixup_hcalls;
83 static uint32_t debug_inst_opcode;
85 /* XXX We have a race condition where we actually have a level triggered
86 * interrupt, but the infrastructure can't expose that yet, so the guest
87 * takes but ignores it, goes to sleep and never gets notified that there's
88 * still an interrupt pending.
90 * As a quick workaround, let's just wake up again 20 ms after we injected
91 * an interrupt. That way we can assure that we're always reinjecting
92 * interrupts in case the guest swallowed them.
94 static QEMUTimer *idle_timer;
96 static void kvm_kick_cpu(void *opaque)
98 PowerPCCPU *cpu = opaque;
100 qemu_cpu_kick(CPU(cpu));
103 static int kvm_ppc_register_host_cpu_type(void);
105 int kvm_arch_init(MachineState *ms, KVMState *s)
107 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
108 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
109 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
110 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
111 cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
112 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
113 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
114 cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
115 cap_spapr_vfio = false;
116 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
117 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
118 cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
119 cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
120 /* Note: we don't set cap_papr here, because this capability is
121 * only activated after this by kvmppc_set_papr() */
122 cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
123 cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
125 if (!cap_interrupt_level) {
126 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
127 "VM to stall at times!\n");
130 kvm_ppc_register_host_cpu_type();
132 return 0;
135 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
137 CPUPPCState *cenv = &cpu->env;
138 CPUState *cs = CPU(cpu);
139 struct kvm_sregs sregs;
140 int ret;
142 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
143 /* What we're really trying to say is "if we're on BookE, we use
144 the native PVR for now". This is the only sane way to check
145 it though, so we potentially confuse users that they can run
146 BookE guests on BookS. Let's hope nobody dares enough :) */
147 return 0;
148 } else {
149 if (!cap_segstate) {
150 fprintf(stderr, "kvm error: missing PVR setting capability\n");
151 return -ENOSYS;
155 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
156 if (ret) {
157 return ret;
160 sregs.pvr = cenv->spr[SPR_PVR];
161 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
164 /* Set up a shared TLB array with KVM */
165 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
167 CPUPPCState *env = &cpu->env;
168 CPUState *cs = CPU(cpu);
169 struct kvm_book3e_206_tlb_params params = {};
170 struct kvm_config_tlb cfg = {};
171 unsigned int entries = 0;
172 int ret, i;
174 if (!kvm_enabled() ||
175 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
176 return 0;
179 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
181 for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
182 params.tlb_sizes[i] = booke206_tlb_size(env, i);
183 params.tlb_ways[i] = booke206_tlb_ways(env, i);
184 entries += params.tlb_sizes[i];
187 assert(entries == env->nb_tlb);
188 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
190 env->tlb_dirty = true;
192 cfg.array = (uintptr_t)env->tlb.tlbm;
193 cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
194 cfg.params = (uintptr_t)&params;
195 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
197 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
198 if (ret < 0) {
199 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
200 __func__, strerror(-ret));
201 return ret;
204 env->kvm_sw_tlb = true;
205 return 0;
209 #if defined(TARGET_PPC64)
210 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
211 struct kvm_ppc_smmu_info *info)
213 CPUPPCState *env = &cpu->env;
214 CPUState *cs = CPU(cpu);
216 memset(info, 0, sizeof(*info));
218 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
219 * need to "guess" what the supported page sizes are.
221 * For that to work we make a few assumptions:
223 * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
224 * KVM which only supports 4K and 16M pages, but supports them
225 * regardless of the backing store characteritics. We also don't
226 * support 1T segments.
228 * This is safe as if HV KVM ever supports that capability or PR
229 * KVM grows supports for more page/segment sizes, those versions
230 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
231 * will not hit this fallback
233 * - Else we are running HV KVM. This means we only support page
234 * sizes that fit in the backing store. Additionally we only
235 * advertize 64K pages if the processor is ARCH 2.06 and we assume
236 * P7 encodings for the SLB and hash table. Here too, we assume
237 * support for any newer processor will mean a kernel that
238 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
239 * this fallback.
241 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
242 /* No flags */
243 info->flags = 0;
244 info->slb_size = 64;
246 /* Standard 4k base page size segment */
247 info->sps[0].page_shift = 12;
248 info->sps[0].slb_enc = 0;
249 info->sps[0].enc[0].page_shift = 12;
250 info->sps[0].enc[0].pte_enc = 0;
252 /* Standard 16M large page size segment */
253 info->sps[1].page_shift = 24;
254 info->sps[1].slb_enc = SLB_VSID_L;
255 info->sps[1].enc[0].page_shift = 24;
256 info->sps[1].enc[0].pte_enc = 0;
257 } else {
258 int i = 0;
260 /* HV KVM has backing store size restrictions */
261 info->flags = KVM_PPC_PAGE_SIZES_REAL;
263 if (env->mmu_model & POWERPC_MMU_1TSEG) {
264 info->flags |= KVM_PPC_1T_SEGMENTS;
267 if (env->mmu_model == POWERPC_MMU_2_06 ||
268 env->mmu_model == POWERPC_MMU_2_07) {
269 info->slb_size = 32;
270 } else {
271 info->slb_size = 64;
274 /* Standard 4k base page size segment */
275 info->sps[i].page_shift = 12;
276 info->sps[i].slb_enc = 0;
277 info->sps[i].enc[0].page_shift = 12;
278 info->sps[i].enc[0].pte_enc = 0;
279 i++;
281 /* 64K on MMU 2.06 and later */
282 if (env->mmu_model == POWERPC_MMU_2_06 ||
283 env->mmu_model == POWERPC_MMU_2_07) {
284 info->sps[i].page_shift = 16;
285 info->sps[i].slb_enc = 0x110;
286 info->sps[i].enc[0].page_shift = 16;
287 info->sps[i].enc[0].pte_enc = 1;
288 i++;
291 /* Standard 16M large page size segment */
292 info->sps[i].page_shift = 24;
293 info->sps[i].slb_enc = SLB_VSID_L;
294 info->sps[i].enc[0].page_shift = 24;
295 info->sps[i].enc[0].pte_enc = 0;
299 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
301 CPUState *cs = CPU(cpu);
302 int ret;
304 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
305 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
306 if (ret == 0) {
307 return;
311 kvm_get_fallback_smmu_info(cpu, info);
314 static long gethugepagesize(const char *mem_path)
316 struct statfs fs;
317 int ret;
319 do {
320 ret = statfs(mem_path, &fs);
321 } while (ret != 0 && errno == EINTR);
323 if (ret != 0) {
324 fprintf(stderr, "Couldn't statfs() memory path: %s\n",
325 strerror(errno));
326 exit(1);
329 #define HUGETLBFS_MAGIC 0x958458f6
331 if (fs.f_type != HUGETLBFS_MAGIC) {
332 /* Explicit mempath, but it's ordinary pages */
333 return getpagesize();
336 /* It's hugepage, return the huge page size */
337 return fs.f_bsize;
341 * FIXME TOCTTOU: this iterates over memory backends' mem-path, which
342 * may or may not name the same files / on the same filesystem now as
343 * when we actually open and map them. Iterate over the file
344 * descriptors instead, and use qemu_fd_getpagesize().
346 static int find_max_supported_pagesize(Object *obj, void *opaque)
348 char *mem_path;
349 long *hpsize_min = opaque;
351 if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) {
352 mem_path = object_property_get_str(obj, "mem-path", NULL);
353 if (mem_path) {
354 long hpsize = gethugepagesize(mem_path);
355 if (hpsize < *hpsize_min) {
356 *hpsize_min = hpsize;
358 } else {
359 *hpsize_min = getpagesize();
363 return 0;
366 static long getrampagesize(void)
368 long hpsize = LONG_MAX;
369 Object *memdev_root;
371 if (mem_path) {
372 return gethugepagesize(mem_path);
375 /* it's possible we have memory-backend objects with
376 * hugepage-backed RAM. these may get mapped into system
377 * address space via -numa parameters or memory hotplug
378 * hooks. we want to take these into account, but we
379 * also want to make sure these supported hugepage
380 * sizes are applicable across the entire range of memory
381 * we may boot from, so we take the min across all
382 * backends, and assume normal pages in cases where a
383 * backend isn't backed by hugepages.
385 memdev_root = object_resolve_path("/objects", NULL);
386 if (!memdev_root) {
387 return getpagesize();
390 object_child_foreach(memdev_root, find_max_supported_pagesize, &hpsize);
392 if (hpsize == LONG_MAX) {
393 return getpagesize();
396 if (nb_numa_nodes == 0 && hpsize > getpagesize()) {
397 /* No NUMA nodes and normal RAM without -mem-path ==> no huge pages! */
398 static bool warned;
399 if (!warned) {
400 error_report("Huge page support disabled (n/a for main memory).");
401 warned = true;
403 return getpagesize();
406 return hpsize;
409 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
411 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
412 return true;
415 return (1ul << shift) <= rampgsize;
418 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
420 static struct kvm_ppc_smmu_info smmu_info;
421 static bool has_smmu_info;
422 CPUPPCState *env = &cpu->env;
423 long rampagesize;
424 int iq, ik, jq, jk;
426 /* We only handle page sizes for 64-bit server guests for now */
427 if (!(env->mmu_model & POWERPC_MMU_64)) {
428 return;
431 /* Collect MMU info from kernel if not already */
432 if (!has_smmu_info) {
433 kvm_get_smmu_info(cpu, &smmu_info);
434 has_smmu_info = true;
437 rampagesize = getrampagesize();
439 /* Convert to QEMU form */
440 memset(&env->sps, 0, sizeof(env->sps));
442 /* If we have HV KVM, we need to forbid CI large pages if our
443 * host page size is smaller than 64K.
445 if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
446 env->ci_large_pages = getpagesize() >= 0x10000;
450 * XXX This loop should be an entry wide AND of the capabilities that
451 * the selected CPU has with the capabilities that KVM supports.
453 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
454 struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
455 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
457 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
458 ksps->page_shift)) {
459 continue;
461 qsps->page_shift = ksps->page_shift;
462 qsps->slb_enc = ksps->slb_enc;
463 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
464 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
465 ksps->enc[jk].page_shift)) {
466 continue;
468 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
469 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
470 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
471 break;
474 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
475 break;
478 env->slb_nr = smmu_info.slb_size;
479 if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
480 env->mmu_model &= ~POWERPC_MMU_1TSEG;
483 #else /* defined (TARGET_PPC64) */
485 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
489 #endif /* !defined (TARGET_PPC64) */
491 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
493 return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
496 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
497 * book3s supports only 1 watchpoint, so array size
498 * of 4 is sufficient for now.
500 #define MAX_HW_BKPTS 4
502 static struct HWBreakpoint {
503 target_ulong addr;
504 int type;
505 } hw_debug_points[MAX_HW_BKPTS];
507 static CPUWatchpoint hw_watchpoint;
509 /* Default there is no breakpoint and watchpoint supported */
510 static int max_hw_breakpoint;
511 static int max_hw_watchpoint;
512 static int nb_hw_breakpoint;
513 static int nb_hw_watchpoint;
515 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
517 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
518 max_hw_breakpoint = 2;
519 max_hw_watchpoint = 2;
522 if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
523 fprintf(stderr, "Error initializing h/w breakpoints\n");
524 return;
528 int kvm_arch_init_vcpu(CPUState *cs)
530 PowerPCCPU *cpu = POWERPC_CPU(cs);
531 CPUPPCState *cenv = &cpu->env;
532 int ret;
534 /* Gather server mmu info from KVM and update the CPU state */
535 kvm_fixup_page_sizes(cpu);
537 /* Synchronize sregs with kvm */
538 ret = kvm_arch_sync_sregs(cpu);
539 if (ret) {
540 if (ret == -EINVAL) {
541 error_report("Register sync failed... If you're using kvm-hv.ko,"
542 " only \"-cpu host\" is possible");
544 return ret;
547 idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
549 /* Some targets support access to KVM's guest TLB. */
550 switch (cenv->mmu_model) {
551 case POWERPC_MMU_BOOKE206:
552 ret = kvm_booke206_tlb_init(cpu);
553 break;
554 default:
555 break;
558 kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
559 kvmppc_hw_debug_points_init(cenv);
561 return ret;
564 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
566 CPUPPCState *env = &cpu->env;
567 CPUState *cs = CPU(cpu);
568 struct kvm_dirty_tlb dirty_tlb;
569 unsigned char *bitmap;
570 int ret;
572 if (!env->kvm_sw_tlb) {
573 return;
576 bitmap = g_malloc((env->nb_tlb + 7) / 8);
577 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
579 dirty_tlb.bitmap = (uintptr_t)bitmap;
580 dirty_tlb.num_dirty = env->nb_tlb;
582 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
583 if (ret) {
584 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
585 __func__, strerror(-ret));
588 g_free(bitmap);
591 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
593 PowerPCCPU *cpu = POWERPC_CPU(cs);
594 CPUPPCState *env = &cpu->env;
595 union {
596 uint32_t u32;
597 uint64_t u64;
598 } val;
599 struct kvm_one_reg reg = {
600 .id = id,
601 .addr = (uintptr_t) &val,
603 int ret;
605 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
606 if (ret != 0) {
607 trace_kvm_failed_spr_get(spr, strerror(errno));
608 } else {
609 switch (id & KVM_REG_SIZE_MASK) {
610 case KVM_REG_SIZE_U32:
611 env->spr[spr] = val.u32;
612 break;
614 case KVM_REG_SIZE_U64:
615 env->spr[spr] = val.u64;
616 break;
618 default:
619 /* Don't handle this size yet */
620 abort();
625 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
627 PowerPCCPU *cpu = POWERPC_CPU(cs);
628 CPUPPCState *env = &cpu->env;
629 union {
630 uint32_t u32;
631 uint64_t u64;
632 } val;
633 struct kvm_one_reg reg = {
634 .id = id,
635 .addr = (uintptr_t) &val,
637 int ret;
639 switch (id & KVM_REG_SIZE_MASK) {
640 case KVM_REG_SIZE_U32:
641 val.u32 = env->spr[spr];
642 break;
644 case KVM_REG_SIZE_U64:
645 val.u64 = env->spr[spr];
646 break;
648 default:
649 /* Don't handle this size yet */
650 abort();
653 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
654 if (ret != 0) {
655 trace_kvm_failed_spr_set(spr, strerror(errno));
659 static int kvm_put_fp(CPUState *cs)
661 PowerPCCPU *cpu = POWERPC_CPU(cs);
662 CPUPPCState *env = &cpu->env;
663 struct kvm_one_reg reg;
664 int i;
665 int ret;
667 if (env->insns_flags & PPC_FLOAT) {
668 uint64_t fpscr = env->fpscr;
669 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
671 reg.id = KVM_REG_PPC_FPSCR;
672 reg.addr = (uintptr_t)&fpscr;
673 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
674 if (ret < 0) {
675 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
676 return ret;
679 for (i = 0; i < 32; i++) {
680 uint64_t vsr[2];
682 #ifdef HOST_WORDS_BIGENDIAN
683 vsr[0] = float64_val(env->fpr[i]);
684 vsr[1] = env->vsr[i];
685 #else
686 vsr[0] = env->vsr[i];
687 vsr[1] = float64_val(env->fpr[i]);
688 #endif
689 reg.addr = (uintptr_t) &vsr;
690 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
692 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
693 if (ret < 0) {
694 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
695 i, strerror(errno));
696 return ret;
701 if (env->insns_flags & PPC_ALTIVEC) {
702 reg.id = KVM_REG_PPC_VSCR;
703 reg.addr = (uintptr_t)&env->vscr;
704 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
705 if (ret < 0) {
706 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
707 return ret;
710 for (i = 0; i < 32; i++) {
711 reg.id = KVM_REG_PPC_VR(i);
712 reg.addr = (uintptr_t)&env->avr[i];
713 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
714 if (ret < 0) {
715 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
716 return ret;
721 return 0;
724 static int kvm_get_fp(CPUState *cs)
726 PowerPCCPU *cpu = POWERPC_CPU(cs);
727 CPUPPCState *env = &cpu->env;
728 struct kvm_one_reg reg;
729 int i;
730 int ret;
732 if (env->insns_flags & PPC_FLOAT) {
733 uint64_t fpscr;
734 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
736 reg.id = KVM_REG_PPC_FPSCR;
737 reg.addr = (uintptr_t)&fpscr;
738 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
739 if (ret < 0) {
740 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
741 return ret;
742 } else {
743 env->fpscr = fpscr;
746 for (i = 0; i < 32; i++) {
747 uint64_t vsr[2];
749 reg.addr = (uintptr_t) &vsr;
750 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
752 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
753 if (ret < 0) {
754 DPRINTF("Unable to get %s%d from KVM: %s\n",
755 vsx ? "VSR" : "FPR", i, strerror(errno));
756 return ret;
757 } else {
758 #ifdef HOST_WORDS_BIGENDIAN
759 env->fpr[i] = vsr[0];
760 if (vsx) {
761 env->vsr[i] = vsr[1];
763 #else
764 env->fpr[i] = vsr[1];
765 if (vsx) {
766 env->vsr[i] = vsr[0];
768 #endif
773 if (env->insns_flags & PPC_ALTIVEC) {
774 reg.id = KVM_REG_PPC_VSCR;
775 reg.addr = (uintptr_t)&env->vscr;
776 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
777 if (ret < 0) {
778 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
779 return ret;
782 for (i = 0; i < 32; i++) {
783 reg.id = KVM_REG_PPC_VR(i);
784 reg.addr = (uintptr_t)&env->avr[i];
785 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
786 if (ret < 0) {
787 DPRINTF("Unable to get VR%d from KVM: %s\n",
788 i, strerror(errno));
789 return ret;
794 return 0;
797 #if defined(TARGET_PPC64)
798 static int kvm_get_vpa(CPUState *cs)
800 PowerPCCPU *cpu = POWERPC_CPU(cs);
801 CPUPPCState *env = &cpu->env;
802 struct kvm_one_reg reg;
803 int ret;
805 reg.id = KVM_REG_PPC_VPA_ADDR;
806 reg.addr = (uintptr_t)&env->vpa_addr;
807 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
808 if (ret < 0) {
809 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
810 return ret;
813 assert((uintptr_t)&env->slb_shadow_size
814 == ((uintptr_t)&env->slb_shadow_addr + 8));
815 reg.id = KVM_REG_PPC_VPA_SLB;
816 reg.addr = (uintptr_t)&env->slb_shadow_addr;
817 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
818 if (ret < 0) {
819 DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
820 strerror(errno));
821 return ret;
824 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
825 reg.id = KVM_REG_PPC_VPA_DTL;
826 reg.addr = (uintptr_t)&env->dtl_addr;
827 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
828 if (ret < 0) {
829 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
830 strerror(errno));
831 return ret;
834 return 0;
837 static int kvm_put_vpa(CPUState *cs)
839 PowerPCCPU *cpu = POWERPC_CPU(cs);
840 CPUPPCState *env = &cpu->env;
841 struct kvm_one_reg reg;
842 int ret;
844 /* SLB shadow or DTL can't be registered unless a master VPA is
845 * registered. That means when restoring state, if a VPA *is*
846 * registered, we need to set that up first. If not, we need to
847 * deregister the others before deregistering the master VPA */
848 assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
850 if (env->vpa_addr) {
851 reg.id = KVM_REG_PPC_VPA_ADDR;
852 reg.addr = (uintptr_t)&env->vpa_addr;
853 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
854 if (ret < 0) {
855 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
856 return ret;
860 assert((uintptr_t)&env->slb_shadow_size
861 == ((uintptr_t)&env->slb_shadow_addr + 8));
862 reg.id = KVM_REG_PPC_VPA_SLB;
863 reg.addr = (uintptr_t)&env->slb_shadow_addr;
864 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
865 if (ret < 0) {
866 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
867 return ret;
870 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
871 reg.id = KVM_REG_PPC_VPA_DTL;
872 reg.addr = (uintptr_t)&env->dtl_addr;
873 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
874 if (ret < 0) {
875 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
876 strerror(errno));
877 return ret;
880 if (!env->vpa_addr) {
881 reg.id = KVM_REG_PPC_VPA_ADDR;
882 reg.addr = (uintptr_t)&env->vpa_addr;
883 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
884 if (ret < 0) {
885 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
886 return ret;
890 return 0;
892 #endif /* TARGET_PPC64 */
894 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
896 CPUPPCState *env = &cpu->env;
897 struct kvm_sregs sregs;
898 int i;
900 sregs.pvr = env->spr[SPR_PVR];
902 sregs.u.s.sdr1 = env->spr[SPR_SDR1];
904 /* Sync SLB */
905 #ifdef TARGET_PPC64
906 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
907 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
908 if (env->slb[i].esid & SLB_ESID_V) {
909 sregs.u.s.ppc64.slb[i].slbe |= i;
911 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
913 #endif
915 /* Sync SRs */
916 for (i = 0; i < 16; i++) {
917 sregs.u.s.ppc32.sr[i] = env->sr[i];
920 /* Sync BATs */
921 for (i = 0; i < 8; i++) {
922 /* Beware. We have to swap upper and lower bits here */
923 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
924 | env->DBAT[1][i];
925 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
926 | env->IBAT[1][i];
929 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
932 int kvm_arch_put_registers(CPUState *cs, int level)
934 PowerPCCPU *cpu = POWERPC_CPU(cs);
935 CPUPPCState *env = &cpu->env;
936 struct kvm_regs regs;
937 int ret;
938 int i;
940 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
941 if (ret < 0) {
942 return ret;
945 regs.ctr = env->ctr;
946 regs.lr = env->lr;
947 regs.xer = cpu_read_xer(env);
948 regs.msr = env->msr;
949 regs.pc = env->nip;
951 regs.srr0 = env->spr[SPR_SRR0];
952 regs.srr1 = env->spr[SPR_SRR1];
954 regs.sprg0 = env->spr[SPR_SPRG0];
955 regs.sprg1 = env->spr[SPR_SPRG1];
956 regs.sprg2 = env->spr[SPR_SPRG2];
957 regs.sprg3 = env->spr[SPR_SPRG3];
958 regs.sprg4 = env->spr[SPR_SPRG4];
959 regs.sprg5 = env->spr[SPR_SPRG5];
960 regs.sprg6 = env->spr[SPR_SPRG6];
961 regs.sprg7 = env->spr[SPR_SPRG7];
963 regs.pid = env->spr[SPR_BOOKE_PID];
965 for (i = 0;i < 32; i++)
966 regs.gpr[i] = env->gpr[i];
968 regs.cr = 0;
969 for (i = 0; i < 8; i++) {
970 regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
973 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
974 if (ret < 0)
975 return ret;
977 kvm_put_fp(cs);
979 if (env->tlb_dirty) {
980 kvm_sw_tlb_put(cpu);
981 env->tlb_dirty = false;
984 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
985 ret = kvmppc_put_books_sregs(cpu);
986 if (ret < 0) {
987 return ret;
991 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
992 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
995 if (cap_one_reg) {
996 int i;
998 /* We deliberately ignore errors here, for kernels which have
999 * the ONE_REG calls, but don't support the specific
1000 * registers, there's a reasonable chance things will still
1001 * work, at least until we try to migrate. */
1002 for (i = 0; i < 1024; i++) {
1003 uint64_t id = env->spr_cb[i].one_reg_id;
1005 if (id != 0) {
1006 kvm_put_one_spr(cs, id, i);
1010 #ifdef TARGET_PPC64
1011 if (msr_ts) {
1012 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1013 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1015 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1016 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1018 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1019 kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1020 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1021 kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1022 kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1023 kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1024 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1025 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1026 kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1027 kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1030 if (cap_papr) {
1031 if (kvm_put_vpa(cs) < 0) {
1032 DPRINTF("Warning: Unable to set VPA information to KVM\n");
1036 kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1037 #endif /* TARGET_PPC64 */
1040 return ret;
1043 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1045 env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1048 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1050 CPUPPCState *env = &cpu->env;
1051 struct kvm_sregs sregs;
1052 int ret;
1054 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1055 if (ret < 0) {
1056 return ret;
1059 if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1060 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1061 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1062 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1063 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1064 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1065 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1066 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1067 env->spr[SPR_DECR] = sregs.u.e.dec;
1068 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1069 env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1070 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1073 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1074 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1075 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1076 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1077 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1078 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1081 if (sregs.u.e.features & KVM_SREGS_E_64) {
1082 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1085 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1086 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1089 if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1090 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1091 kvm_sync_excp(env, POWERPC_EXCP_CRITICAL, SPR_BOOKE_IVOR0);
1092 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1093 kvm_sync_excp(env, POWERPC_EXCP_MCHECK, SPR_BOOKE_IVOR1);
1094 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1095 kvm_sync_excp(env, POWERPC_EXCP_DSI, SPR_BOOKE_IVOR2);
1096 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1097 kvm_sync_excp(env, POWERPC_EXCP_ISI, SPR_BOOKE_IVOR3);
1098 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1099 kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL, SPR_BOOKE_IVOR4);
1100 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1101 kvm_sync_excp(env, POWERPC_EXCP_ALIGN, SPR_BOOKE_IVOR5);
1102 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1103 kvm_sync_excp(env, POWERPC_EXCP_PROGRAM, SPR_BOOKE_IVOR6);
1104 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1105 kvm_sync_excp(env, POWERPC_EXCP_FPU, SPR_BOOKE_IVOR7);
1106 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1107 kvm_sync_excp(env, POWERPC_EXCP_SYSCALL, SPR_BOOKE_IVOR8);
1108 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1109 kvm_sync_excp(env, POWERPC_EXCP_APU, SPR_BOOKE_IVOR9);
1110 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1111 kvm_sync_excp(env, POWERPC_EXCP_DECR, SPR_BOOKE_IVOR10);
1112 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1113 kvm_sync_excp(env, POWERPC_EXCP_FIT, SPR_BOOKE_IVOR11);
1114 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1115 kvm_sync_excp(env, POWERPC_EXCP_WDT, SPR_BOOKE_IVOR12);
1116 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1117 kvm_sync_excp(env, POWERPC_EXCP_DTLB, SPR_BOOKE_IVOR13);
1118 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1119 kvm_sync_excp(env, POWERPC_EXCP_ITLB, SPR_BOOKE_IVOR14);
1120 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1121 kvm_sync_excp(env, POWERPC_EXCP_DEBUG, SPR_BOOKE_IVOR15);
1123 if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1124 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1125 kvm_sync_excp(env, POWERPC_EXCP_SPEU, SPR_BOOKE_IVOR32);
1126 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1127 kvm_sync_excp(env, POWERPC_EXCP_EFPDI, SPR_BOOKE_IVOR33);
1128 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1129 kvm_sync_excp(env, POWERPC_EXCP_EFPRI, SPR_BOOKE_IVOR34);
1132 if (sregs.u.e.features & KVM_SREGS_E_PM) {
1133 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1134 kvm_sync_excp(env, POWERPC_EXCP_EPERFM, SPR_BOOKE_IVOR35);
1137 if (sregs.u.e.features & KVM_SREGS_E_PC) {
1138 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1139 kvm_sync_excp(env, POWERPC_EXCP_DOORI, SPR_BOOKE_IVOR36);
1140 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1141 kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1145 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1146 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1147 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1148 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1149 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1150 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1151 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1152 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1153 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1154 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1155 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1158 if (sregs.u.e.features & KVM_SREGS_EXP) {
1159 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1162 if (sregs.u.e.features & KVM_SREGS_E_PD) {
1163 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1164 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1167 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1168 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1169 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1170 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1172 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1173 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1174 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1178 return 0;
1181 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1183 CPUPPCState *env = &cpu->env;
1184 struct kvm_sregs sregs;
1185 int ret;
1186 int i;
1188 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1189 if (ret < 0) {
1190 return ret;
1193 if (!env->external_htab) {
1194 ppc_store_sdr1(env, sregs.u.s.sdr1);
1197 /* Sync SLB */
1198 #ifdef TARGET_PPC64
1200 * The packed SLB array we get from KVM_GET_SREGS only contains
1201 * information about valid entries. So we flush our internal copy
1202 * to get rid of stale ones, then put all valid SLB entries back
1203 * in.
1205 memset(env->slb, 0, sizeof(env->slb));
1206 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1207 target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1208 target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1210 * Only restore valid entries
1212 if (rb & SLB_ESID_V) {
1213 ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1216 #endif
1218 /* Sync SRs */
1219 for (i = 0; i < 16; i++) {
1220 env->sr[i] = sregs.u.s.ppc32.sr[i];
1223 /* Sync BATs */
1224 for (i = 0; i < 8; i++) {
1225 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1226 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1227 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1228 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1231 return 0;
1234 int kvm_arch_get_registers(CPUState *cs)
1236 PowerPCCPU *cpu = POWERPC_CPU(cs);
1237 CPUPPCState *env = &cpu->env;
1238 struct kvm_regs regs;
1239 uint32_t cr;
1240 int i, ret;
1242 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1243 if (ret < 0)
1244 return ret;
1246 cr = regs.cr;
1247 for (i = 7; i >= 0; i--) {
1248 env->crf[i] = cr & 15;
1249 cr >>= 4;
1252 env->ctr = regs.ctr;
1253 env->lr = regs.lr;
1254 cpu_write_xer(env, regs.xer);
1255 env->msr = regs.msr;
1256 env->nip = regs.pc;
1258 env->spr[SPR_SRR0] = regs.srr0;
1259 env->spr[SPR_SRR1] = regs.srr1;
1261 env->spr[SPR_SPRG0] = regs.sprg0;
1262 env->spr[SPR_SPRG1] = regs.sprg1;
1263 env->spr[SPR_SPRG2] = regs.sprg2;
1264 env->spr[SPR_SPRG3] = regs.sprg3;
1265 env->spr[SPR_SPRG4] = regs.sprg4;
1266 env->spr[SPR_SPRG5] = regs.sprg5;
1267 env->spr[SPR_SPRG6] = regs.sprg6;
1268 env->spr[SPR_SPRG7] = regs.sprg7;
1270 env->spr[SPR_BOOKE_PID] = regs.pid;
1272 for (i = 0;i < 32; i++)
1273 env->gpr[i] = regs.gpr[i];
1275 kvm_get_fp(cs);
1277 if (cap_booke_sregs) {
1278 ret = kvmppc_get_booke_sregs(cpu);
1279 if (ret < 0) {
1280 return ret;
1284 if (cap_segstate) {
1285 ret = kvmppc_get_books_sregs(cpu);
1286 if (ret < 0) {
1287 return ret;
1291 if (cap_hior) {
1292 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1295 if (cap_one_reg) {
1296 int i;
1298 /* We deliberately ignore errors here, for kernels which have
1299 * the ONE_REG calls, but don't support the specific
1300 * registers, there's a reasonable chance things will still
1301 * work, at least until we try to migrate. */
1302 for (i = 0; i < 1024; i++) {
1303 uint64_t id = env->spr_cb[i].one_reg_id;
1305 if (id != 0) {
1306 kvm_get_one_spr(cs, id, i);
1310 #ifdef TARGET_PPC64
1311 if (msr_ts) {
1312 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1313 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1315 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1316 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1318 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1319 kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1320 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1321 kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1322 kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1323 kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1324 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1325 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1326 kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1327 kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1330 if (cap_papr) {
1331 if (kvm_get_vpa(cs) < 0) {
1332 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1336 kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1337 #endif
1340 return 0;
1343 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1345 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1347 if (irq != PPC_INTERRUPT_EXT) {
1348 return 0;
1351 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1352 return 0;
1355 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1357 return 0;
1360 #if defined(TARGET_PPCEMB)
1361 #define PPC_INPUT_INT PPC40x_INPUT_INT
1362 #elif defined(TARGET_PPC64)
1363 #define PPC_INPUT_INT PPC970_INPUT_INT
1364 #else
1365 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1366 #endif
1368 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1370 PowerPCCPU *cpu = POWERPC_CPU(cs);
1371 CPUPPCState *env = &cpu->env;
1372 int r;
1373 unsigned irq;
1375 qemu_mutex_lock_iothread();
1377 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1378 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1379 if (!cap_interrupt_level &&
1380 run->ready_for_interrupt_injection &&
1381 (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1382 (env->irq_input_state & (1<<PPC_INPUT_INT)))
1384 /* For now KVM disregards the 'irq' argument. However, in the
1385 * future KVM could cache it in-kernel to avoid a heavyweight exit
1386 * when reading the UIC.
1388 irq = KVM_INTERRUPT_SET;
1390 DPRINTF("injected interrupt %d\n", irq);
1391 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1392 if (r < 0) {
1393 printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1396 /* Always wake up soon in case the interrupt was level based */
1397 timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1398 (NANOSECONDS_PER_SECOND / 50));
1401 /* We don't know if there are more interrupts pending after this. However,
1402 * the guest will return to userspace in the course of handling this one
1403 * anyways, so we will get a chance to deliver the rest. */
1405 qemu_mutex_unlock_iothread();
1408 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1410 return MEMTXATTRS_UNSPECIFIED;
1413 int kvm_arch_process_async_events(CPUState *cs)
1415 return cs->halted;
1418 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1420 CPUState *cs = CPU(cpu);
1421 CPUPPCState *env = &cpu->env;
1423 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1424 cs->halted = 1;
1425 cs->exception_index = EXCP_HLT;
1428 return 0;
1431 /* map dcr access to existing qemu dcr emulation */
1432 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1434 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1435 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1437 return 0;
1440 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1442 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1443 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1445 return 0;
1448 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1450 /* Mixed endian case is not handled */
1451 uint32_t sc = debug_inst_opcode;
1453 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1454 sizeof(sc), 0) ||
1455 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1456 return -EINVAL;
1459 return 0;
1462 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1464 uint32_t sc;
1466 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1467 sc != debug_inst_opcode ||
1468 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1469 sizeof(sc), 1)) {
1470 return -EINVAL;
1473 return 0;
1476 static int find_hw_breakpoint(target_ulong addr, int type)
1478 int n;
1480 assert((nb_hw_breakpoint + nb_hw_watchpoint)
1481 <= ARRAY_SIZE(hw_debug_points));
1483 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1484 if (hw_debug_points[n].addr == addr &&
1485 hw_debug_points[n].type == type) {
1486 return n;
1490 return -1;
1493 static int find_hw_watchpoint(target_ulong addr, int *flag)
1495 int n;
1497 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1498 if (n >= 0) {
1499 *flag = BP_MEM_ACCESS;
1500 return n;
1503 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1504 if (n >= 0) {
1505 *flag = BP_MEM_WRITE;
1506 return n;
1509 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1510 if (n >= 0) {
1511 *flag = BP_MEM_READ;
1512 return n;
1515 return -1;
1518 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1519 target_ulong len, int type)
1521 if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1522 return -ENOBUFS;
1525 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1526 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1528 switch (type) {
1529 case GDB_BREAKPOINT_HW:
1530 if (nb_hw_breakpoint >= max_hw_breakpoint) {
1531 return -ENOBUFS;
1534 if (find_hw_breakpoint(addr, type) >= 0) {
1535 return -EEXIST;
1538 nb_hw_breakpoint++;
1539 break;
1541 case GDB_WATCHPOINT_WRITE:
1542 case GDB_WATCHPOINT_READ:
1543 case GDB_WATCHPOINT_ACCESS:
1544 if (nb_hw_watchpoint >= max_hw_watchpoint) {
1545 return -ENOBUFS;
1548 if (find_hw_breakpoint(addr, type) >= 0) {
1549 return -EEXIST;
1552 nb_hw_watchpoint++;
1553 break;
1555 default:
1556 return -ENOSYS;
1559 return 0;
1562 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1563 target_ulong len, int type)
1565 int n;
1567 n = find_hw_breakpoint(addr, type);
1568 if (n < 0) {
1569 return -ENOENT;
1572 switch (type) {
1573 case GDB_BREAKPOINT_HW:
1574 nb_hw_breakpoint--;
1575 break;
1577 case GDB_WATCHPOINT_WRITE:
1578 case GDB_WATCHPOINT_READ:
1579 case GDB_WATCHPOINT_ACCESS:
1580 nb_hw_watchpoint--;
1581 break;
1583 default:
1584 return -ENOSYS;
1586 hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1588 return 0;
1591 void kvm_arch_remove_all_hw_breakpoints(void)
1593 nb_hw_breakpoint = nb_hw_watchpoint = 0;
1596 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1598 int n;
1600 /* Software Breakpoint updates */
1601 if (kvm_sw_breakpoints_active(cs)) {
1602 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1605 assert((nb_hw_breakpoint + nb_hw_watchpoint)
1606 <= ARRAY_SIZE(hw_debug_points));
1607 assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1609 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1610 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1611 memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1612 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1613 switch (hw_debug_points[n].type) {
1614 case GDB_BREAKPOINT_HW:
1615 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1616 break;
1617 case GDB_WATCHPOINT_WRITE:
1618 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1619 break;
1620 case GDB_WATCHPOINT_READ:
1621 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1622 break;
1623 case GDB_WATCHPOINT_ACCESS:
1624 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1625 KVMPPC_DEBUG_WATCH_READ;
1626 break;
1627 default:
1628 cpu_abort(cs, "Unsupported breakpoint type\n");
1630 dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1635 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1637 CPUState *cs = CPU(cpu);
1638 CPUPPCState *env = &cpu->env;
1639 struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1640 int handle = 0;
1641 int n;
1642 int flag = 0;
1644 if (cs->singlestep_enabled) {
1645 handle = 1;
1646 } else if (arch_info->status) {
1647 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1648 if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1649 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1650 if (n >= 0) {
1651 handle = 1;
1653 } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1654 KVMPPC_DEBUG_WATCH_WRITE)) {
1655 n = find_hw_watchpoint(arch_info->address, &flag);
1656 if (n >= 0) {
1657 handle = 1;
1658 cs->watchpoint_hit = &hw_watchpoint;
1659 hw_watchpoint.vaddr = hw_debug_points[n].addr;
1660 hw_watchpoint.flags = flag;
1664 } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1665 handle = 1;
1666 } else {
1667 /* QEMU is not able to handle debug exception, so inject
1668 * program exception to guest;
1669 * Yes program exception NOT debug exception !!
1670 * When QEMU is using debug resources then debug exception must
1671 * be always set. To achieve this we set MSR_DE and also set
1672 * MSRP_DEP so guest cannot change MSR_DE.
1673 * When emulating debug resource for guest we want guest
1674 * to control MSR_DE (enable/disable debug interrupt on need).
1675 * Supporting both configurations are NOT possible.
1676 * So the result is that we cannot share debug resources
1677 * between QEMU and Guest on BOOKE architecture.
1678 * In the current design QEMU gets the priority over guest,
1679 * this means that if QEMU is using debug resources then guest
1680 * cannot use them;
1681 * For software breakpoint QEMU uses a privileged instruction;
1682 * So there cannot be any reason that we are here for guest
1683 * set debug exception, only possibility is guest executed a
1684 * privileged / illegal instruction and that's why we are
1685 * injecting a program interrupt.
1688 cpu_synchronize_state(cs);
1689 /* env->nip is PC, so increment this by 4 to use
1690 * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1692 env->nip += 4;
1693 cs->exception_index = POWERPC_EXCP_PROGRAM;
1694 env->error_code = POWERPC_EXCP_INVAL;
1695 ppc_cpu_do_interrupt(cs);
1698 return handle;
1701 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1703 PowerPCCPU *cpu = POWERPC_CPU(cs);
1704 CPUPPCState *env = &cpu->env;
1705 int ret;
1707 qemu_mutex_lock_iothread();
1709 switch (run->exit_reason) {
1710 case KVM_EXIT_DCR:
1711 if (run->dcr.is_write) {
1712 DPRINTF("handle dcr write\n");
1713 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1714 } else {
1715 DPRINTF("handle dcr read\n");
1716 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1718 break;
1719 case KVM_EXIT_HLT:
1720 DPRINTF("handle halt\n");
1721 ret = kvmppc_handle_halt(cpu);
1722 break;
1723 #if defined(TARGET_PPC64)
1724 case KVM_EXIT_PAPR_HCALL:
1725 DPRINTF("handle PAPR hypercall\n");
1726 run->papr_hcall.ret = spapr_hypercall(cpu,
1727 run->papr_hcall.nr,
1728 run->papr_hcall.args);
1729 ret = 0;
1730 break;
1731 #endif
1732 case KVM_EXIT_EPR:
1733 DPRINTF("handle epr\n");
1734 run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1735 ret = 0;
1736 break;
1737 case KVM_EXIT_WATCHDOG:
1738 DPRINTF("handle watchdog expiry\n");
1739 watchdog_perform_action();
1740 ret = 0;
1741 break;
1743 case KVM_EXIT_DEBUG:
1744 DPRINTF("handle debug exception\n");
1745 if (kvm_handle_debug(cpu, run)) {
1746 ret = EXCP_DEBUG;
1747 break;
1749 /* re-enter, this exception was guest-internal */
1750 ret = 0;
1751 break;
1753 default:
1754 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1755 ret = -1;
1756 break;
1759 qemu_mutex_unlock_iothread();
1760 return ret;
1763 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1765 CPUState *cs = CPU(cpu);
1766 uint32_t bits = tsr_bits;
1767 struct kvm_one_reg reg = {
1768 .id = KVM_REG_PPC_OR_TSR,
1769 .addr = (uintptr_t) &bits,
1772 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1775 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1778 CPUState *cs = CPU(cpu);
1779 uint32_t bits = tsr_bits;
1780 struct kvm_one_reg reg = {
1781 .id = KVM_REG_PPC_CLEAR_TSR,
1782 .addr = (uintptr_t) &bits,
1785 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1788 int kvmppc_set_tcr(PowerPCCPU *cpu)
1790 CPUState *cs = CPU(cpu);
1791 CPUPPCState *env = &cpu->env;
1792 uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1794 struct kvm_one_reg reg = {
1795 .id = KVM_REG_PPC_TCR,
1796 .addr = (uintptr_t) &tcr,
1799 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1802 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1804 CPUState *cs = CPU(cpu);
1805 int ret;
1807 if (!kvm_enabled()) {
1808 return -1;
1811 if (!cap_ppc_watchdog) {
1812 printf("warning: KVM does not support watchdog");
1813 return -1;
1816 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1817 if (ret < 0) {
1818 fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1819 __func__, strerror(-ret));
1820 return ret;
1823 return ret;
1826 static int read_cpuinfo(const char *field, char *value, int len)
1828 FILE *f;
1829 int ret = -1;
1830 int field_len = strlen(field);
1831 char line[512];
1833 f = fopen("/proc/cpuinfo", "r");
1834 if (!f) {
1835 return -1;
1838 do {
1839 if (!fgets(line, sizeof(line), f)) {
1840 break;
1842 if (!strncmp(line, field, field_len)) {
1843 pstrcpy(value, len, line);
1844 ret = 0;
1845 break;
1847 } while(*line);
1849 fclose(f);
1851 return ret;
1854 uint32_t kvmppc_get_tbfreq(void)
1856 char line[512];
1857 char *ns;
1858 uint32_t retval = NANOSECONDS_PER_SECOND;
1860 if (read_cpuinfo("timebase", line, sizeof(line))) {
1861 return retval;
1864 if (!(ns = strchr(line, ':'))) {
1865 return retval;
1868 ns++;
1870 return atoi(ns);
1873 bool kvmppc_get_host_serial(char **value)
1875 return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1876 NULL);
1879 bool kvmppc_get_host_model(char **value)
1881 return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1884 /* Try to find a device tree node for a CPU with clock-frequency property */
1885 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1887 struct dirent *dirp;
1888 DIR *dp;
1890 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1891 printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1892 return -1;
1895 buf[0] = '\0';
1896 while ((dirp = readdir(dp)) != NULL) {
1897 FILE *f;
1898 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1899 dirp->d_name);
1900 f = fopen(buf, "r");
1901 if (f) {
1902 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1903 fclose(f);
1904 break;
1906 buf[0] = '\0';
1908 closedir(dp);
1909 if (buf[0] == '\0') {
1910 printf("Unknown host!\n");
1911 return -1;
1914 return 0;
1917 static uint64_t kvmppc_read_int_dt(const char *filename)
1919 union {
1920 uint32_t v32;
1921 uint64_t v64;
1922 } u;
1923 FILE *f;
1924 int len;
1926 f = fopen(filename, "rb");
1927 if (!f) {
1928 return -1;
1931 len = fread(&u, 1, sizeof(u), f);
1932 fclose(f);
1933 switch (len) {
1934 case 4:
1935 /* property is a 32-bit quantity */
1936 return be32_to_cpu(u.v32);
1937 case 8:
1938 return be64_to_cpu(u.v64);
1941 return 0;
1944 /* Read a CPU node property from the host device tree that's a single
1945 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1946 * (can't find or open the property, or doesn't understand the
1947 * format) */
1948 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1950 char buf[PATH_MAX], *tmp;
1951 uint64_t val;
1953 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1954 return -1;
1957 tmp = g_strdup_printf("%s/%s", buf, propname);
1958 val = kvmppc_read_int_dt(tmp);
1959 g_free(tmp);
1961 return val;
1964 uint64_t kvmppc_get_clockfreq(void)
1966 return kvmppc_read_int_cpu_dt("clock-frequency");
1969 uint32_t kvmppc_get_vmx(void)
1971 return kvmppc_read_int_cpu_dt("ibm,vmx");
1974 uint32_t kvmppc_get_dfp(void)
1976 return kvmppc_read_int_cpu_dt("ibm,dfp");
1979 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1981 PowerPCCPU *cpu = ppc_env_get_cpu(env);
1982 CPUState *cs = CPU(cpu);
1984 if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1985 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1986 return 0;
1989 return 1;
1992 int kvmppc_get_hasidle(CPUPPCState *env)
1994 struct kvm_ppc_pvinfo pvinfo;
1996 if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1997 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1998 return 1;
2001 return 0;
2004 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
2006 uint32_t *hc = (uint32_t*)buf;
2007 struct kvm_ppc_pvinfo pvinfo;
2009 if (!kvmppc_get_pvinfo(env, &pvinfo)) {
2010 memcpy(buf, pvinfo.hcall, buf_len);
2011 return 0;
2015 * Fallback to always fail hypercalls regardless of endianness:
2017 * tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
2018 * li r3, -1
2019 * b .+8 (becomes nop in wrong endian)
2020 * bswap32(li r3, -1)
2023 hc[0] = cpu_to_be32(0x08000048);
2024 hc[1] = cpu_to_be32(0x3860ffff);
2025 hc[2] = cpu_to_be32(0x48000008);
2026 hc[3] = cpu_to_be32(bswap32(0x3860ffff));
2028 return 1;
2031 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
2033 return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2036 void kvmppc_enable_logical_ci_hcalls(void)
2039 * FIXME: it would be nice if we could detect the cases where
2040 * we're using a device which requires the in kernel
2041 * implementation of these hcalls, but the kernel lacks them and
2042 * produce a warning.
2044 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2045 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2048 void kvmppc_enable_set_mode_hcall(void)
2050 kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2053 void kvmppc_set_papr(PowerPCCPU *cpu)
2055 CPUState *cs = CPU(cpu);
2056 int ret;
2058 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2059 if (ret) {
2060 error_report("This vCPU type or KVM version does not support PAPR");
2061 exit(1);
2064 /* Update the capability flag so we sync the right information
2065 * with kvm */
2066 cap_papr = 1;
2069 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t cpu_version)
2071 return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &cpu_version);
2074 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2076 CPUState *cs = CPU(cpu);
2077 int ret;
2079 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2080 if (ret && mpic_proxy) {
2081 error_report("This KVM version does not support EPR");
2082 exit(1);
2086 int kvmppc_smt_threads(void)
2088 return cap_ppc_smt ? cap_ppc_smt : 1;
2091 #ifdef TARGET_PPC64
2092 off_t kvmppc_alloc_rma(void **rma)
2094 off_t size;
2095 int fd;
2096 struct kvm_allocate_rma ret;
2098 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2099 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2100 * not necessary on this hardware
2101 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2103 * FIXME: We should allow the user to force contiguous RMA
2104 * allocation in the cap_ppc_rma==1 case.
2106 if (cap_ppc_rma < 2) {
2107 return 0;
2110 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
2111 if (fd < 0) {
2112 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2113 strerror(errno));
2114 return -1;
2117 size = MIN(ret.rma_size, 256ul << 20);
2119 *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2120 if (*rma == MAP_FAILED) {
2121 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
2122 return -1;
2125 return size;
2128 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2130 struct kvm_ppc_smmu_info info;
2131 long rampagesize, best_page_shift;
2132 int i;
2134 if (cap_ppc_rma >= 2) {
2135 return current_size;
2138 /* Find the largest hardware supported page size that's less than
2139 * or equal to the (logical) backing page size of guest RAM */
2140 kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2141 rampagesize = getrampagesize();
2142 best_page_shift = 0;
2144 for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2145 struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2147 if (!sps->page_shift) {
2148 continue;
2151 if ((sps->page_shift > best_page_shift)
2152 && ((1UL << sps->page_shift) <= rampagesize)) {
2153 best_page_shift = sps->page_shift;
2157 return MIN(current_size,
2158 1ULL << (best_page_shift + hash_shift - 7));
2160 #endif
2162 bool kvmppc_spapr_use_multitce(void)
2164 return cap_spapr_multitce;
2167 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
2168 bool need_vfio)
2170 struct kvm_create_spapr_tce args = {
2171 .liobn = liobn,
2172 .window_size = window_size,
2174 long len;
2175 int fd;
2176 void *table;
2178 /* Must set fd to -1 so we don't try to munmap when called for
2179 * destroying the table, which the upper layers -will- do
2181 *pfd = -1;
2182 if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2183 return NULL;
2186 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2187 if (fd < 0) {
2188 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2189 liobn);
2190 return NULL;
2193 len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
2194 /* FIXME: round this up to page size */
2196 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2197 if (table == MAP_FAILED) {
2198 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2199 liobn);
2200 close(fd);
2201 return NULL;
2204 *pfd = fd;
2205 return table;
2208 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2210 long len;
2212 if (fd < 0) {
2213 return -1;
2216 len = nb_table * sizeof(uint64_t);
2217 if ((munmap(table, len) < 0) ||
2218 (close(fd) < 0)) {
2219 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2220 strerror(errno));
2221 /* Leak the table */
2224 return 0;
2227 int kvmppc_reset_htab(int shift_hint)
2229 uint32_t shift = shift_hint;
2231 if (!kvm_enabled()) {
2232 /* Full emulation, tell caller to allocate htab itself */
2233 return 0;
2235 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2236 int ret;
2237 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2238 if (ret == -ENOTTY) {
2239 /* At least some versions of PR KVM advertise the
2240 * capability, but don't implement the ioctl(). Oops.
2241 * Return 0 so that we allocate the htab in qemu, as is
2242 * correct for PR. */
2243 return 0;
2244 } else if (ret < 0) {
2245 return ret;
2247 return shift;
2250 /* We have a kernel that predates the htab reset calls. For PR
2251 * KVM, we need to allocate the htab ourselves, for an HV KVM of
2252 * this era, it has allocated a 16MB fixed size hash table
2253 * already. Kernels of this era have the GET_PVINFO capability
2254 * only on PR, so we use this hack to determine the right
2255 * answer */
2256 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
2257 /* PR - tell caller to allocate htab */
2258 return 0;
2259 } else {
2260 /* HV - assume 16MB kernel allocated htab */
2261 return 24;
2265 static inline uint32_t mfpvr(void)
2267 uint32_t pvr;
2269 asm ("mfpvr %0"
2270 : "=r"(pvr));
2271 return pvr;
2274 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2276 if (on) {
2277 *word |= flags;
2278 } else {
2279 *word &= ~flags;
2283 static void kvmppc_host_cpu_initfn(Object *obj)
2285 assert(kvm_enabled());
2288 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2290 DeviceClass *dc = DEVICE_CLASS(oc);
2291 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2292 uint32_t vmx = kvmppc_get_vmx();
2293 uint32_t dfp = kvmppc_get_dfp();
2294 uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2295 uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2297 /* Now fix up the class with information we can query from the host */
2298 pcc->pvr = mfpvr();
2300 if (vmx != -1) {
2301 /* Only override when we know what the host supports */
2302 alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
2303 alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
2305 if (dfp != -1) {
2306 /* Only override when we know what the host supports */
2307 alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
2310 if (dcache_size != -1) {
2311 pcc->l1_dcache_size = dcache_size;
2314 if (icache_size != -1) {
2315 pcc->l1_icache_size = icache_size;
2318 /* Reason: kvmppc_host_cpu_initfn() dies when !kvm_enabled() */
2319 dc->cannot_destroy_with_object_finalize_yet = true;
2322 bool kvmppc_has_cap_epr(void)
2324 return cap_epr;
2327 bool kvmppc_has_cap_htab_fd(void)
2329 return cap_htab_fd;
2332 bool kvmppc_has_cap_fixup_hcalls(void)
2334 return cap_fixup_hcalls;
2337 static PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc)
2339 ObjectClass *oc = OBJECT_CLASS(pcc);
2341 while (oc && !object_class_is_abstract(oc)) {
2342 oc = object_class_get_parent(oc);
2344 assert(oc);
2346 return POWERPC_CPU_CLASS(oc);
2349 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2351 uint32_t host_pvr = mfpvr();
2352 PowerPCCPUClass *pvr_pcc;
2354 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2355 if (pvr_pcc == NULL) {
2356 pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2359 return pvr_pcc;
2362 #if defined(TARGET_PPC64)
2363 static void spapr_cpu_core_host_initfn(Object *obj)
2365 sPAPRCPUCore *core = SPAPR_CPU_CORE(obj);
2366 char *name = g_strdup_printf("%s-" TYPE_POWERPC_CPU, "host");
2367 ObjectClass *oc = object_class_by_name(name);
2369 g_assert(oc);
2370 g_free((void *)name);
2371 core->cpu_class = oc;
2373 #endif
2375 static int kvm_ppc_register_host_cpu_type(void)
2377 TypeInfo type_info = {
2378 .name = TYPE_HOST_POWERPC_CPU,
2379 .instance_init = kvmppc_host_cpu_initfn,
2380 .class_init = kvmppc_host_cpu_class_init,
2382 PowerPCCPUClass *pvr_pcc;
2383 DeviceClass *dc;
2385 pvr_pcc = kvm_ppc_get_host_cpu_class();
2386 if (pvr_pcc == NULL) {
2387 return -1;
2389 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2390 type_register(&type_info);
2392 #if defined(TARGET_PPC64)
2393 type_info.name = g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE, "host");
2394 type_info.parent = TYPE_SPAPR_CPU_CORE,
2395 type_info.instance_size = sizeof(sPAPRCPUCore),
2396 type_info.instance_init = spapr_cpu_core_host_initfn,
2397 type_info.class_init = NULL;
2398 type_register(&type_info);
2399 g_free((void *)type_info.name);
2400 type_info.instance_size = 0;
2401 type_info.instance_init = NULL;
2402 #endif
2404 /* Register generic family CPU class for a family */
2405 pvr_pcc = ppc_cpu_get_family_class(pvr_pcc);
2406 dc = DEVICE_CLASS(pvr_pcc);
2407 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2408 type_info.name = g_strdup_printf("%s-"TYPE_POWERPC_CPU, dc->desc);
2409 type_register(&type_info);
2411 return 0;
2414 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2416 struct kvm_rtas_token_args args = {
2417 .token = token,
2420 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2421 return -ENOENT;
2424 strncpy(args.name, function, sizeof(args.name));
2426 return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2429 int kvmppc_get_htab_fd(bool write)
2431 struct kvm_get_htab_fd s = {
2432 .flags = write ? KVM_GET_HTAB_WRITE : 0,
2433 .start_index = 0,
2436 if (!cap_htab_fd) {
2437 fprintf(stderr, "KVM version doesn't support saving the hash table\n");
2438 return -1;
2441 return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2444 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2446 int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2447 uint8_t buf[bufsize];
2448 ssize_t rc;
2450 do {
2451 rc = read(fd, buf, bufsize);
2452 if (rc < 0) {
2453 fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2454 strerror(errno));
2455 return rc;
2456 } else if (rc) {
2457 uint8_t *buffer = buf;
2458 ssize_t n = rc;
2459 while (n) {
2460 struct kvm_get_htab_header *head =
2461 (struct kvm_get_htab_header *) buffer;
2462 size_t chunksize = sizeof(*head) +
2463 HASH_PTE_SIZE_64 * head->n_valid;
2465 qemu_put_be32(f, head->index);
2466 qemu_put_be16(f, head->n_valid);
2467 qemu_put_be16(f, head->n_invalid);
2468 qemu_put_buffer(f, (void *)(head + 1),
2469 HASH_PTE_SIZE_64 * head->n_valid);
2471 buffer += chunksize;
2472 n -= chunksize;
2475 } while ((rc != 0)
2476 && ((max_ns < 0)
2477 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2479 return (rc == 0) ? 1 : 0;
2482 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2483 uint16_t n_valid, uint16_t n_invalid)
2485 struct kvm_get_htab_header *buf;
2486 size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2487 ssize_t rc;
2489 buf = alloca(chunksize);
2490 buf->index = index;
2491 buf->n_valid = n_valid;
2492 buf->n_invalid = n_invalid;
2494 qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2496 rc = write(fd, buf, chunksize);
2497 if (rc < 0) {
2498 fprintf(stderr, "Error writing KVM hash table: %s\n",
2499 strerror(errno));
2500 return rc;
2502 if (rc != chunksize) {
2503 /* We should never get a short write on a single chunk */
2504 fprintf(stderr, "Short write, restoring KVM hash table\n");
2505 return -1;
2507 return 0;
2510 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2512 return true;
2515 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
2517 return 1;
2520 int kvm_arch_on_sigbus(int code, void *addr)
2522 return 1;
2525 void kvm_arch_init_irq_routing(KVMState *s)
2529 struct kvm_get_htab_buf {
2530 struct kvm_get_htab_header header;
2532 * We require one extra byte for read
2534 target_ulong hpte[(HPTES_PER_GROUP * 2) + 1];
2537 uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index)
2539 int htab_fd;
2540 struct kvm_get_htab_fd ghf;
2541 struct kvm_get_htab_buf *hpte_buf;
2543 ghf.flags = 0;
2544 ghf.start_index = pte_index;
2545 htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2546 if (htab_fd < 0) {
2547 goto error_out;
2550 hpte_buf = g_malloc0(sizeof(*hpte_buf));
2552 * Read the hpte group
2554 if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) {
2555 goto out_close;
2558 close(htab_fd);
2559 return (uint64_t)(uintptr_t) hpte_buf->hpte;
2561 out_close:
2562 g_free(hpte_buf);
2563 close(htab_fd);
2564 error_out:
2565 return 0;
2568 void kvmppc_hash64_free_pteg(uint64_t token)
2570 struct kvm_get_htab_buf *htab_buf;
2572 htab_buf = container_of((void *)(uintptr_t) token, struct kvm_get_htab_buf,
2573 hpte);
2574 g_free(htab_buf);
2575 return;
2578 void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
2579 target_ulong pte0, target_ulong pte1)
2581 int htab_fd;
2582 struct kvm_get_htab_fd ghf;
2583 struct kvm_get_htab_buf hpte_buf;
2585 ghf.flags = 0;
2586 ghf.start_index = 0; /* Ignored */
2587 htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2588 if (htab_fd < 0) {
2589 goto error_out;
2592 hpte_buf.header.n_valid = 1;
2593 hpte_buf.header.n_invalid = 0;
2594 hpte_buf.header.index = pte_index;
2595 hpte_buf.hpte[0] = pte0;
2596 hpte_buf.hpte[1] = pte1;
2598 * Write the hpte entry.
2599 * CAUTION: write() has the warn_unused_result attribute. Hence we
2600 * need to check the return value, even though we do nothing.
2602 if (write(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) {
2603 goto out_close;
2606 out_close:
2607 close(htab_fd);
2608 return;
2610 error_out:
2611 return;
2614 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2615 uint64_t address, uint32_t data, PCIDevice *dev)
2617 return 0;
2620 int kvm_arch_msi_data_to_gsi(uint32_t data)
2622 return data & 0xffff;
2625 int kvmppc_enable_hwrng(void)
2627 if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2628 return -1;
2631 return kvmppc_enable_hcall(kvm_state, H_RANDOM);