io: bind to socket before creating QIOChannelSocket
[qemu/ar7.git] / target-ppc / kvm.c
blobd67c169ba32431788d7e3624633888376d43b63f
1 /*
2 * PowerPC implementation of KVM hooks
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
7 * Authors:
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
17 #include "qemu/osdep.h"
18 #include <dirent.h>
19 #include <sys/ioctl.h>
20 #include <sys/mman.h>
21 #include <sys/vfs.h>
23 #include <linux/kvm.h>
25 #include "qemu-common.h"
26 #include "qemu/error-report.h"
27 #include "qemu/timer.h"
28 #include "sysemu/sysemu.h"
29 #include "sysemu/kvm.h"
30 #include "kvm_ppc.h"
31 #include "cpu.h"
32 #include "sysemu/cpus.h"
33 #include "sysemu/device_tree.h"
34 #include "mmu-hash64.h"
36 #include "hw/sysbus.h"
37 #include "hw/ppc/spapr.h"
38 #include "hw/ppc/spapr_vio.h"
39 #include "hw/ppc/ppc.h"
40 #include "sysemu/watchdog.h"
41 #include "trace.h"
42 #include "exec/gdbstub.h"
43 #include "exec/memattrs.h"
44 #include "sysemu/hostmem.h"
46 //#define DEBUG_KVM
48 #ifdef DEBUG_KVM
49 #define DPRINTF(fmt, ...) \
50 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
51 #else
52 #define DPRINTF(fmt, ...) \
53 do { } while (0)
54 #endif
56 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
58 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
59 KVM_CAP_LAST_INFO
62 static int cap_interrupt_unset = false;
63 static int cap_interrupt_level = false;
64 static int cap_segstate;
65 static int cap_booke_sregs;
66 static int cap_ppc_smt;
67 static int cap_ppc_rma;
68 static int cap_spapr_tce;
69 static int cap_spapr_multitce;
70 static int cap_spapr_vfio;
71 static int cap_hior;
72 static int cap_one_reg;
73 static int cap_epr;
74 static int cap_ppc_watchdog;
75 static int cap_papr;
76 static int cap_htab_fd;
77 static int cap_fixup_hcalls;
79 static uint32_t debug_inst_opcode;
81 /* XXX We have a race condition where we actually have a level triggered
82 * interrupt, but the infrastructure can't expose that yet, so the guest
83 * takes but ignores it, goes to sleep and never gets notified that there's
84 * still an interrupt pending.
86 * As a quick workaround, let's just wake up again 20 ms after we injected
87 * an interrupt. That way we can assure that we're always reinjecting
88 * interrupts in case the guest swallowed them.
90 static QEMUTimer *idle_timer;
92 static void kvm_kick_cpu(void *opaque)
94 PowerPCCPU *cpu = opaque;
96 qemu_cpu_kick(CPU(cpu));
99 static int kvm_ppc_register_host_cpu_type(void);
101 int kvm_arch_init(MachineState *ms, KVMState *s)
103 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
104 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
105 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
106 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
107 cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
108 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
109 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
110 cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
111 cap_spapr_vfio = false;
112 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
113 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
114 cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
115 cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
116 /* Note: we don't set cap_papr here, because this capability is
117 * only activated after this by kvmppc_set_papr() */
118 cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
119 cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
121 if (!cap_interrupt_level) {
122 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
123 "VM to stall at times!\n");
126 kvm_ppc_register_host_cpu_type();
128 return 0;
131 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
133 CPUPPCState *cenv = &cpu->env;
134 CPUState *cs = CPU(cpu);
135 struct kvm_sregs sregs;
136 int ret;
138 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
139 /* What we're really trying to say is "if we're on BookE, we use
140 the native PVR for now". This is the only sane way to check
141 it though, so we potentially confuse users that they can run
142 BookE guests on BookS. Let's hope nobody dares enough :) */
143 return 0;
144 } else {
145 if (!cap_segstate) {
146 fprintf(stderr, "kvm error: missing PVR setting capability\n");
147 return -ENOSYS;
151 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
152 if (ret) {
153 return ret;
156 sregs.pvr = cenv->spr[SPR_PVR];
157 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
160 /* Set up a shared TLB array with KVM */
161 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
163 CPUPPCState *env = &cpu->env;
164 CPUState *cs = CPU(cpu);
165 struct kvm_book3e_206_tlb_params params = {};
166 struct kvm_config_tlb cfg = {};
167 unsigned int entries = 0;
168 int ret, i;
170 if (!kvm_enabled() ||
171 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
172 return 0;
175 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
177 for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
178 params.tlb_sizes[i] = booke206_tlb_size(env, i);
179 params.tlb_ways[i] = booke206_tlb_ways(env, i);
180 entries += params.tlb_sizes[i];
183 assert(entries == env->nb_tlb);
184 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
186 env->tlb_dirty = true;
188 cfg.array = (uintptr_t)env->tlb.tlbm;
189 cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
190 cfg.params = (uintptr_t)&params;
191 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
193 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
194 if (ret < 0) {
195 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
196 __func__, strerror(-ret));
197 return ret;
200 env->kvm_sw_tlb = true;
201 return 0;
205 #if defined(TARGET_PPC64)
206 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
207 struct kvm_ppc_smmu_info *info)
209 CPUPPCState *env = &cpu->env;
210 CPUState *cs = CPU(cpu);
212 memset(info, 0, sizeof(*info));
214 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
215 * need to "guess" what the supported page sizes are.
217 * For that to work we make a few assumptions:
219 * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
220 * KVM which only supports 4K and 16M pages, but supports them
221 * regardless of the backing store characteritics. We also don't
222 * support 1T segments.
224 * This is safe as if HV KVM ever supports that capability or PR
225 * KVM grows supports for more page/segment sizes, those versions
226 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
227 * will not hit this fallback
229 * - Else we are running HV KVM. This means we only support page
230 * sizes that fit in the backing store. Additionally we only
231 * advertize 64K pages if the processor is ARCH 2.06 and we assume
232 * P7 encodings for the SLB and hash table. Here too, we assume
233 * support for any newer processor will mean a kernel that
234 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
235 * this fallback.
237 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
238 /* No flags */
239 info->flags = 0;
240 info->slb_size = 64;
242 /* Standard 4k base page size segment */
243 info->sps[0].page_shift = 12;
244 info->sps[0].slb_enc = 0;
245 info->sps[0].enc[0].page_shift = 12;
246 info->sps[0].enc[0].pte_enc = 0;
248 /* Standard 16M large page size segment */
249 info->sps[1].page_shift = 24;
250 info->sps[1].slb_enc = SLB_VSID_L;
251 info->sps[1].enc[0].page_shift = 24;
252 info->sps[1].enc[0].pte_enc = 0;
253 } else {
254 int i = 0;
256 /* HV KVM has backing store size restrictions */
257 info->flags = KVM_PPC_PAGE_SIZES_REAL;
259 if (env->mmu_model & POWERPC_MMU_1TSEG) {
260 info->flags |= KVM_PPC_1T_SEGMENTS;
263 if (env->mmu_model == POWERPC_MMU_2_06 ||
264 env->mmu_model == POWERPC_MMU_2_07) {
265 info->slb_size = 32;
266 } else {
267 info->slb_size = 64;
270 /* Standard 4k base page size segment */
271 info->sps[i].page_shift = 12;
272 info->sps[i].slb_enc = 0;
273 info->sps[i].enc[0].page_shift = 12;
274 info->sps[i].enc[0].pte_enc = 0;
275 i++;
277 /* 64K on MMU 2.06 and later */
278 if (env->mmu_model == POWERPC_MMU_2_06 ||
279 env->mmu_model == POWERPC_MMU_2_07) {
280 info->sps[i].page_shift = 16;
281 info->sps[i].slb_enc = 0x110;
282 info->sps[i].enc[0].page_shift = 16;
283 info->sps[i].enc[0].pte_enc = 1;
284 i++;
287 /* Standard 16M large page size segment */
288 info->sps[i].page_shift = 24;
289 info->sps[i].slb_enc = SLB_VSID_L;
290 info->sps[i].enc[0].page_shift = 24;
291 info->sps[i].enc[0].pte_enc = 0;
295 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
297 CPUState *cs = CPU(cpu);
298 int ret;
300 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
301 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
302 if (ret == 0) {
303 return;
307 kvm_get_fallback_smmu_info(cpu, info);
310 static long gethugepagesize(const char *mem_path)
312 struct statfs fs;
313 int ret;
315 do {
316 ret = statfs(mem_path, &fs);
317 } while (ret != 0 && errno == EINTR);
319 if (ret != 0) {
320 fprintf(stderr, "Couldn't statfs() memory path: %s\n",
321 strerror(errno));
322 exit(1);
325 #define HUGETLBFS_MAGIC 0x958458f6
327 if (fs.f_type != HUGETLBFS_MAGIC) {
328 /* Explicit mempath, but it's ordinary pages */
329 return getpagesize();
332 /* It's hugepage, return the huge page size */
333 return fs.f_bsize;
336 static int find_max_supported_pagesize(Object *obj, void *opaque)
338 char *mem_path;
339 long *hpsize_min = opaque;
341 if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) {
342 mem_path = object_property_get_str(obj, "mem-path", NULL);
343 if (mem_path) {
344 long hpsize = gethugepagesize(mem_path);
345 if (hpsize < *hpsize_min) {
346 *hpsize_min = hpsize;
348 } else {
349 *hpsize_min = getpagesize();
353 return 0;
356 static long getrampagesize(void)
358 long hpsize = LONG_MAX;
359 Object *memdev_root;
361 if (mem_path) {
362 return gethugepagesize(mem_path);
365 /* it's possible we have memory-backend objects with
366 * hugepage-backed RAM. these may get mapped into system
367 * address space via -numa parameters or memory hotplug
368 * hooks. we want to take these into account, but we
369 * also want to make sure these supported hugepage
370 * sizes are applicable across the entire range of memory
371 * we may boot from, so we take the min across all
372 * backends, and assume normal pages in cases where a
373 * backend isn't backed by hugepages.
375 memdev_root = object_resolve_path("/objects", NULL);
376 if (!memdev_root) {
377 return getpagesize();
380 object_child_foreach(memdev_root, find_max_supported_pagesize, &hpsize);
382 return (hpsize == LONG_MAX) ? getpagesize() : hpsize;
385 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
387 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
388 return true;
391 return (1ul << shift) <= rampgsize;
394 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
396 static struct kvm_ppc_smmu_info smmu_info;
397 static bool has_smmu_info;
398 CPUPPCState *env = &cpu->env;
399 long rampagesize;
400 int iq, ik, jq, jk;
402 /* We only handle page sizes for 64-bit server guests for now */
403 if (!(env->mmu_model & POWERPC_MMU_64)) {
404 return;
407 /* Collect MMU info from kernel if not already */
408 if (!has_smmu_info) {
409 kvm_get_smmu_info(cpu, &smmu_info);
410 has_smmu_info = true;
413 rampagesize = getrampagesize();
415 /* Convert to QEMU form */
416 memset(&env->sps, 0, sizeof(env->sps));
418 /* If we have HV KVM, we need to forbid CI large pages if our
419 * host page size is smaller than 64K.
421 if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
422 env->ci_large_pages = getpagesize() >= 0x10000;
426 * XXX This loop should be an entry wide AND of the capabilities that
427 * the selected CPU has with the capabilities that KVM supports.
429 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
430 struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
431 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
433 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
434 ksps->page_shift)) {
435 continue;
437 qsps->page_shift = ksps->page_shift;
438 qsps->slb_enc = ksps->slb_enc;
439 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
440 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
441 ksps->enc[jk].page_shift)) {
442 continue;
444 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
445 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
446 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
447 break;
450 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
451 break;
454 env->slb_nr = smmu_info.slb_size;
455 if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
456 env->mmu_model &= ~POWERPC_MMU_1TSEG;
459 #else /* defined (TARGET_PPC64) */
461 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
465 #endif /* !defined (TARGET_PPC64) */
467 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
469 return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
472 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
473 * book3s supports only 1 watchpoint, so array size
474 * of 4 is sufficient for now.
476 #define MAX_HW_BKPTS 4
478 static struct HWBreakpoint {
479 target_ulong addr;
480 int type;
481 } hw_debug_points[MAX_HW_BKPTS];
483 static CPUWatchpoint hw_watchpoint;
485 /* Default there is no breakpoint and watchpoint supported */
486 static int max_hw_breakpoint;
487 static int max_hw_watchpoint;
488 static int nb_hw_breakpoint;
489 static int nb_hw_watchpoint;
491 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
493 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
494 max_hw_breakpoint = 2;
495 max_hw_watchpoint = 2;
498 if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
499 fprintf(stderr, "Error initializing h/w breakpoints\n");
500 return;
504 int kvm_arch_init_vcpu(CPUState *cs)
506 PowerPCCPU *cpu = POWERPC_CPU(cs);
507 CPUPPCState *cenv = &cpu->env;
508 int ret;
510 /* Gather server mmu info from KVM and update the CPU state */
511 kvm_fixup_page_sizes(cpu);
513 /* Synchronize sregs with kvm */
514 ret = kvm_arch_sync_sregs(cpu);
515 if (ret) {
516 if (ret == -EINVAL) {
517 error_report("Register sync failed... If you're using kvm-hv.ko,"
518 " only \"-cpu host\" is possible");
520 return ret;
523 idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
525 /* Some targets support access to KVM's guest TLB. */
526 switch (cenv->mmu_model) {
527 case POWERPC_MMU_BOOKE206:
528 ret = kvm_booke206_tlb_init(cpu);
529 break;
530 default:
531 break;
534 kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
535 kvmppc_hw_debug_points_init(cenv);
537 return ret;
540 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
542 CPUPPCState *env = &cpu->env;
543 CPUState *cs = CPU(cpu);
544 struct kvm_dirty_tlb dirty_tlb;
545 unsigned char *bitmap;
546 int ret;
548 if (!env->kvm_sw_tlb) {
549 return;
552 bitmap = g_malloc((env->nb_tlb + 7) / 8);
553 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
555 dirty_tlb.bitmap = (uintptr_t)bitmap;
556 dirty_tlb.num_dirty = env->nb_tlb;
558 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
559 if (ret) {
560 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
561 __func__, strerror(-ret));
564 g_free(bitmap);
567 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
569 PowerPCCPU *cpu = POWERPC_CPU(cs);
570 CPUPPCState *env = &cpu->env;
571 union {
572 uint32_t u32;
573 uint64_t u64;
574 } val;
575 struct kvm_one_reg reg = {
576 .id = id,
577 .addr = (uintptr_t) &val,
579 int ret;
581 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
582 if (ret != 0) {
583 trace_kvm_failed_spr_get(spr, strerror(errno));
584 } else {
585 switch (id & KVM_REG_SIZE_MASK) {
586 case KVM_REG_SIZE_U32:
587 env->spr[spr] = val.u32;
588 break;
590 case KVM_REG_SIZE_U64:
591 env->spr[spr] = val.u64;
592 break;
594 default:
595 /* Don't handle this size yet */
596 abort();
601 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
603 PowerPCCPU *cpu = POWERPC_CPU(cs);
604 CPUPPCState *env = &cpu->env;
605 union {
606 uint32_t u32;
607 uint64_t u64;
608 } val;
609 struct kvm_one_reg reg = {
610 .id = id,
611 .addr = (uintptr_t) &val,
613 int ret;
615 switch (id & KVM_REG_SIZE_MASK) {
616 case KVM_REG_SIZE_U32:
617 val.u32 = env->spr[spr];
618 break;
620 case KVM_REG_SIZE_U64:
621 val.u64 = env->spr[spr];
622 break;
624 default:
625 /* Don't handle this size yet */
626 abort();
629 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
630 if (ret != 0) {
631 trace_kvm_failed_spr_set(spr, strerror(errno));
635 static int kvm_put_fp(CPUState *cs)
637 PowerPCCPU *cpu = POWERPC_CPU(cs);
638 CPUPPCState *env = &cpu->env;
639 struct kvm_one_reg reg;
640 int i;
641 int ret;
643 if (env->insns_flags & PPC_FLOAT) {
644 uint64_t fpscr = env->fpscr;
645 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
647 reg.id = KVM_REG_PPC_FPSCR;
648 reg.addr = (uintptr_t)&fpscr;
649 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
650 if (ret < 0) {
651 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
652 return ret;
655 for (i = 0; i < 32; i++) {
656 uint64_t vsr[2];
658 #ifdef HOST_WORDS_BIGENDIAN
659 vsr[0] = float64_val(env->fpr[i]);
660 vsr[1] = env->vsr[i];
661 #else
662 vsr[0] = env->vsr[i];
663 vsr[1] = float64_val(env->fpr[i]);
664 #endif
665 reg.addr = (uintptr_t) &vsr;
666 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
668 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
669 if (ret < 0) {
670 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
671 i, strerror(errno));
672 return ret;
677 if (env->insns_flags & PPC_ALTIVEC) {
678 reg.id = KVM_REG_PPC_VSCR;
679 reg.addr = (uintptr_t)&env->vscr;
680 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
681 if (ret < 0) {
682 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
683 return ret;
686 for (i = 0; i < 32; i++) {
687 reg.id = KVM_REG_PPC_VR(i);
688 reg.addr = (uintptr_t)&env->avr[i];
689 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
690 if (ret < 0) {
691 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
692 return ret;
697 return 0;
700 static int kvm_get_fp(CPUState *cs)
702 PowerPCCPU *cpu = POWERPC_CPU(cs);
703 CPUPPCState *env = &cpu->env;
704 struct kvm_one_reg reg;
705 int i;
706 int ret;
708 if (env->insns_flags & PPC_FLOAT) {
709 uint64_t fpscr;
710 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
712 reg.id = KVM_REG_PPC_FPSCR;
713 reg.addr = (uintptr_t)&fpscr;
714 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
715 if (ret < 0) {
716 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
717 return ret;
718 } else {
719 env->fpscr = fpscr;
722 for (i = 0; i < 32; i++) {
723 uint64_t vsr[2];
725 reg.addr = (uintptr_t) &vsr;
726 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
728 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
729 if (ret < 0) {
730 DPRINTF("Unable to get %s%d from KVM: %s\n",
731 vsx ? "VSR" : "FPR", i, strerror(errno));
732 return ret;
733 } else {
734 #ifdef HOST_WORDS_BIGENDIAN
735 env->fpr[i] = vsr[0];
736 if (vsx) {
737 env->vsr[i] = vsr[1];
739 #else
740 env->fpr[i] = vsr[1];
741 if (vsx) {
742 env->vsr[i] = vsr[0];
744 #endif
749 if (env->insns_flags & PPC_ALTIVEC) {
750 reg.id = KVM_REG_PPC_VSCR;
751 reg.addr = (uintptr_t)&env->vscr;
752 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
753 if (ret < 0) {
754 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
755 return ret;
758 for (i = 0; i < 32; i++) {
759 reg.id = KVM_REG_PPC_VR(i);
760 reg.addr = (uintptr_t)&env->avr[i];
761 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
762 if (ret < 0) {
763 DPRINTF("Unable to get VR%d from KVM: %s\n",
764 i, strerror(errno));
765 return ret;
770 return 0;
773 #if defined(TARGET_PPC64)
774 static int kvm_get_vpa(CPUState *cs)
776 PowerPCCPU *cpu = POWERPC_CPU(cs);
777 CPUPPCState *env = &cpu->env;
778 struct kvm_one_reg reg;
779 int ret;
781 reg.id = KVM_REG_PPC_VPA_ADDR;
782 reg.addr = (uintptr_t)&env->vpa_addr;
783 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
784 if (ret < 0) {
785 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
786 return ret;
789 assert((uintptr_t)&env->slb_shadow_size
790 == ((uintptr_t)&env->slb_shadow_addr + 8));
791 reg.id = KVM_REG_PPC_VPA_SLB;
792 reg.addr = (uintptr_t)&env->slb_shadow_addr;
793 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
794 if (ret < 0) {
795 DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
796 strerror(errno));
797 return ret;
800 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
801 reg.id = KVM_REG_PPC_VPA_DTL;
802 reg.addr = (uintptr_t)&env->dtl_addr;
803 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
804 if (ret < 0) {
805 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
806 strerror(errno));
807 return ret;
810 return 0;
813 static int kvm_put_vpa(CPUState *cs)
815 PowerPCCPU *cpu = POWERPC_CPU(cs);
816 CPUPPCState *env = &cpu->env;
817 struct kvm_one_reg reg;
818 int ret;
820 /* SLB shadow or DTL can't be registered unless a master VPA is
821 * registered. That means when restoring state, if a VPA *is*
822 * registered, we need to set that up first. If not, we need to
823 * deregister the others before deregistering the master VPA */
824 assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
826 if (env->vpa_addr) {
827 reg.id = KVM_REG_PPC_VPA_ADDR;
828 reg.addr = (uintptr_t)&env->vpa_addr;
829 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
830 if (ret < 0) {
831 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
832 return ret;
836 assert((uintptr_t)&env->slb_shadow_size
837 == ((uintptr_t)&env->slb_shadow_addr + 8));
838 reg.id = KVM_REG_PPC_VPA_SLB;
839 reg.addr = (uintptr_t)&env->slb_shadow_addr;
840 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
841 if (ret < 0) {
842 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
843 return ret;
846 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
847 reg.id = KVM_REG_PPC_VPA_DTL;
848 reg.addr = (uintptr_t)&env->dtl_addr;
849 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
850 if (ret < 0) {
851 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
852 strerror(errno));
853 return ret;
856 if (!env->vpa_addr) {
857 reg.id = KVM_REG_PPC_VPA_ADDR;
858 reg.addr = (uintptr_t)&env->vpa_addr;
859 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
860 if (ret < 0) {
861 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
862 return ret;
866 return 0;
868 #endif /* TARGET_PPC64 */
870 int kvm_arch_put_registers(CPUState *cs, int level)
872 PowerPCCPU *cpu = POWERPC_CPU(cs);
873 CPUPPCState *env = &cpu->env;
874 struct kvm_regs regs;
875 int ret;
876 int i;
878 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
879 if (ret < 0) {
880 return ret;
883 regs.ctr = env->ctr;
884 regs.lr = env->lr;
885 regs.xer = cpu_read_xer(env);
886 regs.msr = env->msr;
887 regs.pc = env->nip;
889 regs.srr0 = env->spr[SPR_SRR0];
890 regs.srr1 = env->spr[SPR_SRR1];
892 regs.sprg0 = env->spr[SPR_SPRG0];
893 regs.sprg1 = env->spr[SPR_SPRG1];
894 regs.sprg2 = env->spr[SPR_SPRG2];
895 regs.sprg3 = env->spr[SPR_SPRG3];
896 regs.sprg4 = env->spr[SPR_SPRG4];
897 regs.sprg5 = env->spr[SPR_SPRG5];
898 regs.sprg6 = env->spr[SPR_SPRG6];
899 regs.sprg7 = env->spr[SPR_SPRG7];
901 regs.pid = env->spr[SPR_BOOKE_PID];
903 for (i = 0;i < 32; i++)
904 regs.gpr[i] = env->gpr[i];
906 regs.cr = 0;
907 for (i = 0; i < 8; i++) {
908 regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
911 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
912 if (ret < 0)
913 return ret;
915 kvm_put_fp(cs);
917 if (env->tlb_dirty) {
918 kvm_sw_tlb_put(cpu);
919 env->tlb_dirty = false;
922 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
923 struct kvm_sregs sregs;
925 sregs.pvr = env->spr[SPR_PVR];
927 sregs.u.s.sdr1 = env->spr[SPR_SDR1];
929 /* Sync SLB */
930 #ifdef TARGET_PPC64
931 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
932 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
933 if (env->slb[i].esid & SLB_ESID_V) {
934 sregs.u.s.ppc64.slb[i].slbe |= i;
936 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
938 #endif
940 /* Sync SRs */
941 for (i = 0; i < 16; i++) {
942 sregs.u.s.ppc32.sr[i] = env->sr[i];
945 /* Sync BATs */
946 for (i = 0; i < 8; i++) {
947 /* Beware. We have to swap upper and lower bits here */
948 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
949 | env->DBAT[1][i];
950 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
951 | env->IBAT[1][i];
954 ret = kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
955 if (ret) {
956 return ret;
960 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
961 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
964 if (cap_one_reg) {
965 int i;
967 /* We deliberately ignore errors here, for kernels which have
968 * the ONE_REG calls, but don't support the specific
969 * registers, there's a reasonable chance things will still
970 * work, at least until we try to migrate. */
971 for (i = 0; i < 1024; i++) {
972 uint64_t id = env->spr_cb[i].one_reg_id;
974 if (id != 0) {
975 kvm_put_one_spr(cs, id, i);
979 #ifdef TARGET_PPC64
980 if (msr_ts) {
981 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
982 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
984 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
985 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
987 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
988 kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
989 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
990 kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
991 kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
992 kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
993 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
994 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
995 kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
996 kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
999 if (cap_papr) {
1000 if (kvm_put_vpa(cs) < 0) {
1001 DPRINTF("Warning: Unable to set VPA information to KVM\n");
1005 kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1006 #endif /* TARGET_PPC64 */
1009 return ret;
1012 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1014 env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1017 int kvm_arch_get_registers(CPUState *cs)
1019 PowerPCCPU *cpu = POWERPC_CPU(cs);
1020 CPUPPCState *env = &cpu->env;
1021 struct kvm_regs regs;
1022 struct kvm_sregs sregs;
1023 uint32_t cr;
1024 int i, ret;
1026 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1027 if (ret < 0)
1028 return ret;
1030 cr = regs.cr;
1031 for (i = 7; i >= 0; i--) {
1032 env->crf[i] = cr & 15;
1033 cr >>= 4;
1036 env->ctr = regs.ctr;
1037 env->lr = regs.lr;
1038 cpu_write_xer(env, regs.xer);
1039 env->msr = regs.msr;
1040 env->nip = regs.pc;
1042 env->spr[SPR_SRR0] = regs.srr0;
1043 env->spr[SPR_SRR1] = regs.srr1;
1045 env->spr[SPR_SPRG0] = regs.sprg0;
1046 env->spr[SPR_SPRG1] = regs.sprg1;
1047 env->spr[SPR_SPRG2] = regs.sprg2;
1048 env->spr[SPR_SPRG3] = regs.sprg3;
1049 env->spr[SPR_SPRG4] = regs.sprg4;
1050 env->spr[SPR_SPRG5] = regs.sprg5;
1051 env->spr[SPR_SPRG6] = regs.sprg6;
1052 env->spr[SPR_SPRG7] = regs.sprg7;
1054 env->spr[SPR_BOOKE_PID] = regs.pid;
1056 for (i = 0;i < 32; i++)
1057 env->gpr[i] = regs.gpr[i];
1059 kvm_get_fp(cs);
1061 if (cap_booke_sregs) {
1062 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
1063 if (ret < 0) {
1064 return ret;
1067 if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1068 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1069 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1070 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1071 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1072 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1073 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1074 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1075 env->spr[SPR_DECR] = sregs.u.e.dec;
1076 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1077 env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1078 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1081 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1082 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1083 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1084 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1085 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1086 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1089 if (sregs.u.e.features & KVM_SREGS_E_64) {
1090 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1093 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1094 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1097 if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1098 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1099 kvm_sync_excp(env, POWERPC_EXCP_CRITICAL, SPR_BOOKE_IVOR0);
1100 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1101 kvm_sync_excp(env, POWERPC_EXCP_MCHECK, SPR_BOOKE_IVOR1);
1102 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1103 kvm_sync_excp(env, POWERPC_EXCP_DSI, SPR_BOOKE_IVOR2);
1104 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1105 kvm_sync_excp(env, POWERPC_EXCP_ISI, SPR_BOOKE_IVOR3);
1106 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1107 kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL, SPR_BOOKE_IVOR4);
1108 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1109 kvm_sync_excp(env, POWERPC_EXCP_ALIGN, SPR_BOOKE_IVOR5);
1110 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1111 kvm_sync_excp(env, POWERPC_EXCP_PROGRAM, SPR_BOOKE_IVOR6);
1112 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1113 kvm_sync_excp(env, POWERPC_EXCP_FPU, SPR_BOOKE_IVOR7);
1114 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1115 kvm_sync_excp(env, POWERPC_EXCP_SYSCALL, SPR_BOOKE_IVOR8);
1116 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1117 kvm_sync_excp(env, POWERPC_EXCP_APU, SPR_BOOKE_IVOR9);
1118 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1119 kvm_sync_excp(env, POWERPC_EXCP_DECR, SPR_BOOKE_IVOR10);
1120 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1121 kvm_sync_excp(env, POWERPC_EXCP_FIT, SPR_BOOKE_IVOR11);
1122 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1123 kvm_sync_excp(env, POWERPC_EXCP_WDT, SPR_BOOKE_IVOR12);
1124 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1125 kvm_sync_excp(env, POWERPC_EXCP_DTLB, SPR_BOOKE_IVOR13);
1126 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1127 kvm_sync_excp(env, POWERPC_EXCP_ITLB, SPR_BOOKE_IVOR14);
1128 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1129 kvm_sync_excp(env, POWERPC_EXCP_DEBUG, SPR_BOOKE_IVOR15);
1131 if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1132 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1133 kvm_sync_excp(env, POWERPC_EXCP_SPEU, SPR_BOOKE_IVOR32);
1134 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1135 kvm_sync_excp(env, POWERPC_EXCP_EFPDI, SPR_BOOKE_IVOR33);
1136 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1137 kvm_sync_excp(env, POWERPC_EXCP_EFPRI, SPR_BOOKE_IVOR34);
1140 if (sregs.u.e.features & KVM_SREGS_E_PM) {
1141 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1142 kvm_sync_excp(env, POWERPC_EXCP_EPERFM, SPR_BOOKE_IVOR35);
1145 if (sregs.u.e.features & KVM_SREGS_E_PC) {
1146 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1147 kvm_sync_excp(env, POWERPC_EXCP_DOORI, SPR_BOOKE_IVOR36);
1148 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1149 kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1153 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1154 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1155 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1156 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1157 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1158 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1159 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1160 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1161 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1162 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1163 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1166 if (sregs.u.e.features & KVM_SREGS_EXP) {
1167 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1170 if (sregs.u.e.features & KVM_SREGS_E_PD) {
1171 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1172 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1175 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1176 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1177 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1178 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1180 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1181 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1182 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1187 if (cap_segstate) {
1188 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
1189 if (ret < 0) {
1190 return ret;
1193 if (!env->external_htab) {
1194 ppc_store_sdr1(env, sregs.u.s.sdr1);
1197 /* Sync SLB */
1198 #ifdef TARGET_PPC64
1200 * The packed SLB array we get from KVM_GET_SREGS only contains
1201 * information about valid entries. So we flush our internal
1202 * copy to get rid of stale ones, then put all valid SLB entries
1203 * back in.
1205 memset(env->slb, 0, sizeof(env->slb));
1206 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1207 target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1208 target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1210 * Only restore valid entries
1212 if (rb & SLB_ESID_V) {
1213 ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1216 #endif
1218 /* Sync SRs */
1219 for (i = 0; i < 16; i++) {
1220 env->sr[i] = sregs.u.s.ppc32.sr[i];
1223 /* Sync BATs */
1224 for (i = 0; i < 8; i++) {
1225 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1226 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1227 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1228 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1232 if (cap_hior) {
1233 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1236 if (cap_one_reg) {
1237 int i;
1239 /* We deliberately ignore errors here, for kernels which have
1240 * the ONE_REG calls, but don't support the specific
1241 * registers, there's a reasonable chance things will still
1242 * work, at least until we try to migrate. */
1243 for (i = 0; i < 1024; i++) {
1244 uint64_t id = env->spr_cb[i].one_reg_id;
1246 if (id != 0) {
1247 kvm_get_one_spr(cs, id, i);
1251 #ifdef TARGET_PPC64
1252 if (msr_ts) {
1253 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1254 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1256 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1257 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1259 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1260 kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1261 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1262 kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1263 kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1264 kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1265 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1266 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1267 kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1268 kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1271 if (cap_papr) {
1272 if (kvm_get_vpa(cs) < 0) {
1273 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1277 kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1278 #endif
1281 return 0;
1284 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1286 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1288 if (irq != PPC_INTERRUPT_EXT) {
1289 return 0;
1292 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1293 return 0;
1296 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1298 return 0;
1301 #if defined(TARGET_PPCEMB)
1302 #define PPC_INPUT_INT PPC40x_INPUT_INT
1303 #elif defined(TARGET_PPC64)
1304 #define PPC_INPUT_INT PPC970_INPUT_INT
1305 #else
1306 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1307 #endif
1309 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1311 PowerPCCPU *cpu = POWERPC_CPU(cs);
1312 CPUPPCState *env = &cpu->env;
1313 int r;
1314 unsigned irq;
1316 qemu_mutex_lock_iothread();
1318 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1319 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1320 if (!cap_interrupt_level &&
1321 run->ready_for_interrupt_injection &&
1322 (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1323 (env->irq_input_state & (1<<PPC_INPUT_INT)))
1325 /* For now KVM disregards the 'irq' argument. However, in the
1326 * future KVM could cache it in-kernel to avoid a heavyweight exit
1327 * when reading the UIC.
1329 irq = KVM_INTERRUPT_SET;
1331 DPRINTF("injected interrupt %d\n", irq);
1332 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1333 if (r < 0) {
1334 printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1337 /* Always wake up soon in case the interrupt was level based */
1338 timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1339 (get_ticks_per_sec() / 50));
1342 /* We don't know if there are more interrupts pending after this. However,
1343 * the guest will return to userspace in the course of handling this one
1344 * anyways, so we will get a chance to deliver the rest. */
1346 qemu_mutex_unlock_iothread();
1349 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1351 return MEMTXATTRS_UNSPECIFIED;
1354 int kvm_arch_process_async_events(CPUState *cs)
1356 return cs->halted;
1359 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1361 CPUState *cs = CPU(cpu);
1362 CPUPPCState *env = &cpu->env;
1364 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1365 cs->halted = 1;
1366 cs->exception_index = EXCP_HLT;
1369 return 0;
1372 /* map dcr access to existing qemu dcr emulation */
1373 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1375 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1376 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1378 return 0;
1381 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1383 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1384 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1386 return 0;
1389 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1391 /* Mixed endian case is not handled */
1392 uint32_t sc = debug_inst_opcode;
1394 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1395 sizeof(sc), 0) ||
1396 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1397 return -EINVAL;
1400 return 0;
1403 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1405 uint32_t sc;
1407 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1408 sc != debug_inst_opcode ||
1409 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1410 sizeof(sc), 1)) {
1411 return -EINVAL;
1414 return 0;
1417 static int find_hw_breakpoint(target_ulong addr, int type)
1419 int n;
1421 assert((nb_hw_breakpoint + nb_hw_watchpoint)
1422 <= ARRAY_SIZE(hw_debug_points));
1424 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1425 if (hw_debug_points[n].addr == addr &&
1426 hw_debug_points[n].type == type) {
1427 return n;
1431 return -1;
1434 static int find_hw_watchpoint(target_ulong addr, int *flag)
1436 int n;
1438 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1439 if (n >= 0) {
1440 *flag = BP_MEM_ACCESS;
1441 return n;
1444 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1445 if (n >= 0) {
1446 *flag = BP_MEM_WRITE;
1447 return n;
1450 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1451 if (n >= 0) {
1452 *flag = BP_MEM_READ;
1453 return n;
1456 return -1;
1459 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1460 target_ulong len, int type)
1462 if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1463 return -ENOBUFS;
1466 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1467 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1469 switch (type) {
1470 case GDB_BREAKPOINT_HW:
1471 if (nb_hw_breakpoint >= max_hw_breakpoint) {
1472 return -ENOBUFS;
1475 if (find_hw_breakpoint(addr, type) >= 0) {
1476 return -EEXIST;
1479 nb_hw_breakpoint++;
1480 break;
1482 case GDB_WATCHPOINT_WRITE:
1483 case GDB_WATCHPOINT_READ:
1484 case GDB_WATCHPOINT_ACCESS:
1485 if (nb_hw_watchpoint >= max_hw_watchpoint) {
1486 return -ENOBUFS;
1489 if (find_hw_breakpoint(addr, type) >= 0) {
1490 return -EEXIST;
1493 nb_hw_watchpoint++;
1494 break;
1496 default:
1497 return -ENOSYS;
1500 return 0;
1503 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1504 target_ulong len, int type)
1506 int n;
1508 n = find_hw_breakpoint(addr, type);
1509 if (n < 0) {
1510 return -ENOENT;
1513 switch (type) {
1514 case GDB_BREAKPOINT_HW:
1515 nb_hw_breakpoint--;
1516 break;
1518 case GDB_WATCHPOINT_WRITE:
1519 case GDB_WATCHPOINT_READ:
1520 case GDB_WATCHPOINT_ACCESS:
1521 nb_hw_watchpoint--;
1522 break;
1524 default:
1525 return -ENOSYS;
1527 hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1529 return 0;
1532 void kvm_arch_remove_all_hw_breakpoints(void)
1534 nb_hw_breakpoint = nb_hw_watchpoint = 0;
1537 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1539 int n;
1541 /* Software Breakpoint updates */
1542 if (kvm_sw_breakpoints_active(cs)) {
1543 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1546 assert((nb_hw_breakpoint + nb_hw_watchpoint)
1547 <= ARRAY_SIZE(hw_debug_points));
1548 assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1550 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1551 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1552 memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1553 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1554 switch (hw_debug_points[n].type) {
1555 case GDB_BREAKPOINT_HW:
1556 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1557 break;
1558 case GDB_WATCHPOINT_WRITE:
1559 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1560 break;
1561 case GDB_WATCHPOINT_READ:
1562 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1563 break;
1564 case GDB_WATCHPOINT_ACCESS:
1565 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1566 KVMPPC_DEBUG_WATCH_READ;
1567 break;
1568 default:
1569 cpu_abort(cs, "Unsupported breakpoint type\n");
1571 dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1576 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1578 CPUState *cs = CPU(cpu);
1579 CPUPPCState *env = &cpu->env;
1580 struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1581 int handle = 0;
1582 int n;
1583 int flag = 0;
1585 if (cs->singlestep_enabled) {
1586 handle = 1;
1587 } else if (arch_info->status) {
1588 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1589 if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1590 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1591 if (n >= 0) {
1592 handle = 1;
1594 } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1595 KVMPPC_DEBUG_WATCH_WRITE)) {
1596 n = find_hw_watchpoint(arch_info->address, &flag);
1597 if (n >= 0) {
1598 handle = 1;
1599 cs->watchpoint_hit = &hw_watchpoint;
1600 hw_watchpoint.vaddr = hw_debug_points[n].addr;
1601 hw_watchpoint.flags = flag;
1605 } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1606 handle = 1;
1607 } else {
1608 /* QEMU is not able to handle debug exception, so inject
1609 * program exception to guest;
1610 * Yes program exception NOT debug exception !!
1611 * When QEMU is using debug resources then debug exception must
1612 * be always set. To achieve this we set MSR_DE and also set
1613 * MSRP_DEP so guest cannot change MSR_DE.
1614 * When emulating debug resource for guest we want guest
1615 * to control MSR_DE (enable/disable debug interrupt on need).
1616 * Supporting both configurations are NOT possible.
1617 * So the result is that we cannot share debug resources
1618 * between QEMU and Guest on BOOKE architecture.
1619 * In the current design QEMU gets the priority over guest,
1620 * this means that if QEMU is using debug resources then guest
1621 * cannot use them;
1622 * For software breakpoint QEMU uses a privileged instruction;
1623 * So there cannot be any reason that we are here for guest
1624 * set debug exception, only possibility is guest executed a
1625 * privileged / illegal instruction and that's why we are
1626 * injecting a program interrupt.
1629 cpu_synchronize_state(cs);
1630 /* env->nip is PC, so increment this by 4 to use
1631 * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1633 env->nip += 4;
1634 cs->exception_index = POWERPC_EXCP_PROGRAM;
1635 env->error_code = POWERPC_EXCP_INVAL;
1636 ppc_cpu_do_interrupt(cs);
1639 return handle;
1642 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1644 PowerPCCPU *cpu = POWERPC_CPU(cs);
1645 CPUPPCState *env = &cpu->env;
1646 int ret;
1648 qemu_mutex_lock_iothread();
1650 switch (run->exit_reason) {
1651 case KVM_EXIT_DCR:
1652 if (run->dcr.is_write) {
1653 DPRINTF("handle dcr write\n");
1654 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1655 } else {
1656 DPRINTF("handle dcr read\n");
1657 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1659 break;
1660 case KVM_EXIT_HLT:
1661 DPRINTF("handle halt\n");
1662 ret = kvmppc_handle_halt(cpu);
1663 break;
1664 #if defined(TARGET_PPC64)
1665 case KVM_EXIT_PAPR_HCALL:
1666 DPRINTF("handle PAPR hypercall\n");
1667 run->papr_hcall.ret = spapr_hypercall(cpu,
1668 run->papr_hcall.nr,
1669 run->papr_hcall.args);
1670 ret = 0;
1671 break;
1672 #endif
1673 case KVM_EXIT_EPR:
1674 DPRINTF("handle epr\n");
1675 run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1676 ret = 0;
1677 break;
1678 case KVM_EXIT_WATCHDOG:
1679 DPRINTF("handle watchdog expiry\n");
1680 watchdog_perform_action();
1681 ret = 0;
1682 break;
1684 case KVM_EXIT_DEBUG:
1685 DPRINTF("handle debug exception\n");
1686 if (kvm_handle_debug(cpu, run)) {
1687 ret = EXCP_DEBUG;
1688 break;
1690 /* re-enter, this exception was guest-internal */
1691 ret = 0;
1692 break;
1694 default:
1695 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1696 ret = -1;
1697 break;
1700 qemu_mutex_unlock_iothread();
1701 return ret;
1704 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1706 CPUState *cs = CPU(cpu);
1707 uint32_t bits = tsr_bits;
1708 struct kvm_one_reg reg = {
1709 .id = KVM_REG_PPC_OR_TSR,
1710 .addr = (uintptr_t) &bits,
1713 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1716 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1719 CPUState *cs = CPU(cpu);
1720 uint32_t bits = tsr_bits;
1721 struct kvm_one_reg reg = {
1722 .id = KVM_REG_PPC_CLEAR_TSR,
1723 .addr = (uintptr_t) &bits,
1726 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1729 int kvmppc_set_tcr(PowerPCCPU *cpu)
1731 CPUState *cs = CPU(cpu);
1732 CPUPPCState *env = &cpu->env;
1733 uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1735 struct kvm_one_reg reg = {
1736 .id = KVM_REG_PPC_TCR,
1737 .addr = (uintptr_t) &tcr,
1740 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1743 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1745 CPUState *cs = CPU(cpu);
1746 int ret;
1748 if (!kvm_enabled()) {
1749 return -1;
1752 if (!cap_ppc_watchdog) {
1753 printf("warning: KVM does not support watchdog");
1754 return -1;
1757 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1758 if (ret < 0) {
1759 fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1760 __func__, strerror(-ret));
1761 return ret;
1764 return ret;
1767 static int read_cpuinfo(const char *field, char *value, int len)
1769 FILE *f;
1770 int ret = -1;
1771 int field_len = strlen(field);
1772 char line[512];
1774 f = fopen("/proc/cpuinfo", "r");
1775 if (!f) {
1776 return -1;
1779 do {
1780 if (!fgets(line, sizeof(line), f)) {
1781 break;
1783 if (!strncmp(line, field, field_len)) {
1784 pstrcpy(value, len, line);
1785 ret = 0;
1786 break;
1788 } while(*line);
1790 fclose(f);
1792 return ret;
1795 uint32_t kvmppc_get_tbfreq(void)
1797 char line[512];
1798 char *ns;
1799 uint32_t retval = get_ticks_per_sec();
1801 if (read_cpuinfo("timebase", line, sizeof(line))) {
1802 return retval;
1805 if (!(ns = strchr(line, ':'))) {
1806 return retval;
1809 ns++;
1811 return atoi(ns);
1814 bool kvmppc_get_host_serial(char **value)
1816 return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1817 NULL);
1820 bool kvmppc_get_host_model(char **value)
1822 return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1825 /* Try to find a device tree node for a CPU with clock-frequency property */
1826 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1828 struct dirent *dirp;
1829 DIR *dp;
1831 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1832 printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1833 return -1;
1836 buf[0] = '\0';
1837 while ((dirp = readdir(dp)) != NULL) {
1838 FILE *f;
1839 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1840 dirp->d_name);
1841 f = fopen(buf, "r");
1842 if (f) {
1843 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1844 fclose(f);
1845 break;
1847 buf[0] = '\0';
1849 closedir(dp);
1850 if (buf[0] == '\0') {
1851 printf("Unknown host!\n");
1852 return -1;
1855 return 0;
1858 static uint64_t kvmppc_read_int_dt(const char *filename)
1860 union {
1861 uint32_t v32;
1862 uint64_t v64;
1863 } u;
1864 FILE *f;
1865 int len;
1867 f = fopen(filename, "rb");
1868 if (!f) {
1869 return -1;
1872 len = fread(&u, 1, sizeof(u), f);
1873 fclose(f);
1874 switch (len) {
1875 case 4:
1876 /* property is a 32-bit quantity */
1877 return be32_to_cpu(u.v32);
1878 case 8:
1879 return be64_to_cpu(u.v64);
1882 return 0;
1885 /* Read a CPU node property from the host device tree that's a single
1886 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1887 * (can't find or open the property, or doesn't understand the
1888 * format) */
1889 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1891 char buf[PATH_MAX], *tmp;
1892 uint64_t val;
1894 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1895 return -1;
1898 tmp = g_strdup_printf("%s/%s", buf, propname);
1899 val = kvmppc_read_int_dt(tmp);
1900 g_free(tmp);
1902 return val;
1905 uint64_t kvmppc_get_clockfreq(void)
1907 return kvmppc_read_int_cpu_dt("clock-frequency");
1910 uint32_t kvmppc_get_vmx(void)
1912 return kvmppc_read_int_cpu_dt("ibm,vmx");
1915 uint32_t kvmppc_get_dfp(void)
1917 return kvmppc_read_int_cpu_dt("ibm,dfp");
1920 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1922 PowerPCCPU *cpu = ppc_env_get_cpu(env);
1923 CPUState *cs = CPU(cpu);
1925 if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1926 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1927 return 0;
1930 return 1;
1933 int kvmppc_get_hasidle(CPUPPCState *env)
1935 struct kvm_ppc_pvinfo pvinfo;
1937 if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1938 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1939 return 1;
1942 return 0;
1945 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1947 uint32_t *hc = (uint32_t*)buf;
1948 struct kvm_ppc_pvinfo pvinfo;
1950 if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1951 memcpy(buf, pvinfo.hcall, buf_len);
1952 return 0;
1956 * Fallback to always fail hypercalls regardless of endianness:
1958 * tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
1959 * li r3, -1
1960 * b .+8 (becomes nop in wrong endian)
1961 * bswap32(li r3, -1)
1964 hc[0] = cpu_to_be32(0x08000048);
1965 hc[1] = cpu_to_be32(0x3860ffff);
1966 hc[2] = cpu_to_be32(0x48000008);
1967 hc[3] = cpu_to_be32(bswap32(0x3860ffff));
1969 return 0;
1972 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
1974 return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
1977 void kvmppc_enable_logical_ci_hcalls(void)
1980 * FIXME: it would be nice if we could detect the cases where
1981 * we're using a device which requires the in kernel
1982 * implementation of these hcalls, but the kernel lacks them and
1983 * produce a warning.
1985 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
1986 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
1989 void kvmppc_enable_set_mode_hcall(void)
1991 kvmppc_enable_hcall(kvm_state, H_SET_MODE);
1994 void kvmppc_set_papr(PowerPCCPU *cpu)
1996 CPUState *cs = CPU(cpu);
1997 int ret;
1999 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2000 if (ret) {
2001 error_report("This vCPU type or KVM version does not support PAPR");
2002 exit(1);
2005 /* Update the capability flag so we sync the right information
2006 * with kvm */
2007 cap_papr = 1;
2010 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t cpu_version)
2012 return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &cpu_version);
2015 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2017 CPUState *cs = CPU(cpu);
2018 int ret;
2020 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2021 if (ret && mpic_proxy) {
2022 error_report("This KVM version does not support EPR");
2023 exit(1);
2027 int kvmppc_smt_threads(void)
2029 return cap_ppc_smt ? cap_ppc_smt : 1;
2032 #ifdef TARGET_PPC64
2033 off_t kvmppc_alloc_rma(void **rma)
2035 off_t size;
2036 int fd;
2037 struct kvm_allocate_rma ret;
2039 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2040 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2041 * not necessary on this hardware
2042 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2044 * FIXME: We should allow the user to force contiguous RMA
2045 * allocation in the cap_ppc_rma==1 case.
2047 if (cap_ppc_rma < 2) {
2048 return 0;
2051 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
2052 if (fd < 0) {
2053 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2054 strerror(errno));
2055 return -1;
2058 size = MIN(ret.rma_size, 256ul << 20);
2060 *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2061 if (*rma == MAP_FAILED) {
2062 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
2063 return -1;
2066 return size;
2069 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2071 struct kvm_ppc_smmu_info info;
2072 long rampagesize, best_page_shift;
2073 int i;
2075 if (cap_ppc_rma >= 2) {
2076 return current_size;
2079 /* Find the largest hardware supported page size that's less than
2080 * or equal to the (logical) backing page size of guest RAM */
2081 kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2082 rampagesize = getrampagesize();
2083 best_page_shift = 0;
2085 for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2086 struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2088 if (!sps->page_shift) {
2089 continue;
2092 if ((sps->page_shift > best_page_shift)
2093 && ((1UL << sps->page_shift) <= rampagesize)) {
2094 best_page_shift = sps->page_shift;
2098 return MIN(current_size,
2099 1ULL << (best_page_shift + hash_shift - 7));
2101 #endif
2103 bool kvmppc_spapr_use_multitce(void)
2105 return cap_spapr_multitce;
2108 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
2109 bool need_vfio)
2111 struct kvm_create_spapr_tce args = {
2112 .liobn = liobn,
2113 .window_size = window_size,
2115 long len;
2116 int fd;
2117 void *table;
2119 /* Must set fd to -1 so we don't try to munmap when called for
2120 * destroying the table, which the upper layers -will- do
2122 *pfd = -1;
2123 if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2124 return NULL;
2127 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2128 if (fd < 0) {
2129 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2130 liobn);
2131 return NULL;
2134 len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
2135 /* FIXME: round this up to page size */
2137 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2138 if (table == MAP_FAILED) {
2139 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2140 liobn);
2141 close(fd);
2142 return NULL;
2145 *pfd = fd;
2146 return table;
2149 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2151 long len;
2153 if (fd < 0) {
2154 return -1;
2157 len = nb_table * sizeof(uint64_t);
2158 if ((munmap(table, len) < 0) ||
2159 (close(fd) < 0)) {
2160 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2161 strerror(errno));
2162 /* Leak the table */
2165 return 0;
2168 int kvmppc_reset_htab(int shift_hint)
2170 uint32_t shift = shift_hint;
2172 if (!kvm_enabled()) {
2173 /* Full emulation, tell caller to allocate htab itself */
2174 return 0;
2176 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2177 int ret;
2178 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2179 if (ret == -ENOTTY) {
2180 /* At least some versions of PR KVM advertise the
2181 * capability, but don't implement the ioctl(). Oops.
2182 * Return 0 so that we allocate the htab in qemu, as is
2183 * correct for PR. */
2184 return 0;
2185 } else if (ret < 0) {
2186 return ret;
2188 return shift;
2191 /* We have a kernel that predates the htab reset calls. For PR
2192 * KVM, we need to allocate the htab ourselves, for an HV KVM of
2193 * this era, it has allocated a 16MB fixed size hash table
2194 * already. Kernels of this era have the GET_PVINFO capability
2195 * only on PR, so we use this hack to determine the right
2196 * answer */
2197 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
2198 /* PR - tell caller to allocate htab */
2199 return 0;
2200 } else {
2201 /* HV - assume 16MB kernel allocated htab */
2202 return 24;
2206 static inline uint32_t mfpvr(void)
2208 uint32_t pvr;
2210 asm ("mfpvr %0"
2211 : "=r"(pvr));
2212 return pvr;
2215 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2217 if (on) {
2218 *word |= flags;
2219 } else {
2220 *word &= ~flags;
2224 static void kvmppc_host_cpu_initfn(Object *obj)
2226 assert(kvm_enabled());
2229 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2231 DeviceClass *dc = DEVICE_CLASS(oc);
2232 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2233 uint32_t vmx = kvmppc_get_vmx();
2234 uint32_t dfp = kvmppc_get_dfp();
2235 uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2236 uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2238 /* Now fix up the class with information we can query from the host */
2239 pcc->pvr = mfpvr();
2241 if (vmx != -1) {
2242 /* Only override when we know what the host supports */
2243 alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
2244 alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
2246 if (dfp != -1) {
2247 /* Only override when we know what the host supports */
2248 alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
2251 if (dcache_size != -1) {
2252 pcc->l1_dcache_size = dcache_size;
2255 if (icache_size != -1) {
2256 pcc->l1_icache_size = icache_size;
2259 /* Reason: kvmppc_host_cpu_initfn() dies when !kvm_enabled() */
2260 dc->cannot_destroy_with_object_finalize_yet = true;
2263 bool kvmppc_has_cap_epr(void)
2265 return cap_epr;
2268 bool kvmppc_has_cap_htab_fd(void)
2270 return cap_htab_fd;
2273 bool kvmppc_has_cap_fixup_hcalls(void)
2275 return cap_fixup_hcalls;
2278 static PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc)
2280 ObjectClass *oc = OBJECT_CLASS(pcc);
2282 while (oc && !object_class_is_abstract(oc)) {
2283 oc = object_class_get_parent(oc);
2285 assert(oc);
2287 return POWERPC_CPU_CLASS(oc);
2290 static int kvm_ppc_register_host_cpu_type(void)
2292 TypeInfo type_info = {
2293 .name = TYPE_HOST_POWERPC_CPU,
2294 .instance_init = kvmppc_host_cpu_initfn,
2295 .class_init = kvmppc_host_cpu_class_init,
2297 uint32_t host_pvr = mfpvr();
2298 PowerPCCPUClass *pvr_pcc;
2299 DeviceClass *dc;
2301 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2302 if (pvr_pcc == NULL) {
2303 pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2305 if (pvr_pcc == NULL) {
2306 return -1;
2308 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2309 type_register(&type_info);
2311 /* Register generic family CPU class for a family */
2312 pvr_pcc = ppc_cpu_get_family_class(pvr_pcc);
2313 dc = DEVICE_CLASS(pvr_pcc);
2314 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2315 type_info.name = g_strdup_printf("%s-"TYPE_POWERPC_CPU, dc->desc);
2316 type_register(&type_info);
2318 return 0;
2321 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2323 struct kvm_rtas_token_args args = {
2324 .token = token,
2327 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2328 return -ENOENT;
2331 strncpy(args.name, function, sizeof(args.name));
2333 return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2336 int kvmppc_get_htab_fd(bool write)
2338 struct kvm_get_htab_fd s = {
2339 .flags = write ? KVM_GET_HTAB_WRITE : 0,
2340 .start_index = 0,
2343 if (!cap_htab_fd) {
2344 fprintf(stderr, "KVM version doesn't support saving the hash table\n");
2345 return -1;
2348 return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2351 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2353 int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2354 uint8_t buf[bufsize];
2355 ssize_t rc;
2357 do {
2358 rc = read(fd, buf, bufsize);
2359 if (rc < 0) {
2360 fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2361 strerror(errno));
2362 return rc;
2363 } else if (rc) {
2364 uint8_t *buffer = buf;
2365 ssize_t n = rc;
2366 while (n) {
2367 struct kvm_get_htab_header *head =
2368 (struct kvm_get_htab_header *) buffer;
2369 size_t chunksize = sizeof(*head) +
2370 HASH_PTE_SIZE_64 * head->n_valid;
2372 qemu_put_be32(f, head->index);
2373 qemu_put_be16(f, head->n_valid);
2374 qemu_put_be16(f, head->n_invalid);
2375 qemu_put_buffer(f, (void *)(head + 1),
2376 HASH_PTE_SIZE_64 * head->n_valid);
2378 buffer += chunksize;
2379 n -= chunksize;
2382 } while ((rc != 0)
2383 && ((max_ns < 0)
2384 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2386 return (rc == 0) ? 1 : 0;
2389 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2390 uint16_t n_valid, uint16_t n_invalid)
2392 struct kvm_get_htab_header *buf;
2393 size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2394 ssize_t rc;
2396 buf = alloca(chunksize);
2397 buf->index = index;
2398 buf->n_valid = n_valid;
2399 buf->n_invalid = n_invalid;
2401 qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2403 rc = write(fd, buf, chunksize);
2404 if (rc < 0) {
2405 fprintf(stderr, "Error writing KVM hash table: %s\n",
2406 strerror(errno));
2407 return rc;
2409 if (rc != chunksize) {
2410 /* We should never get a short write on a single chunk */
2411 fprintf(stderr, "Short write, restoring KVM hash table\n");
2412 return -1;
2414 return 0;
2417 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2419 return true;
2422 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
2424 return 1;
2427 int kvm_arch_on_sigbus(int code, void *addr)
2429 return 1;
2432 void kvm_arch_init_irq_routing(KVMState *s)
2436 struct kvm_get_htab_buf {
2437 struct kvm_get_htab_header header;
2439 * We require one extra byte for read
2441 target_ulong hpte[(HPTES_PER_GROUP * 2) + 1];
2444 uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index)
2446 int htab_fd;
2447 struct kvm_get_htab_fd ghf;
2448 struct kvm_get_htab_buf *hpte_buf;
2450 ghf.flags = 0;
2451 ghf.start_index = pte_index;
2452 htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2453 if (htab_fd < 0) {
2454 goto error_out;
2457 hpte_buf = g_malloc0(sizeof(*hpte_buf));
2459 * Read the hpte group
2461 if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) {
2462 goto out_close;
2465 close(htab_fd);
2466 return (uint64_t)(uintptr_t) hpte_buf->hpte;
2468 out_close:
2469 g_free(hpte_buf);
2470 close(htab_fd);
2471 error_out:
2472 return 0;
2475 void kvmppc_hash64_free_pteg(uint64_t token)
2477 struct kvm_get_htab_buf *htab_buf;
2479 htab_buf = container_of((void *)(uintptr_t) token, struct kvm_get_htab_buf,
2480 hpte);
2481 g_free(htab_buf);
2482 return;
2485 void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
2486 target_ulong pte0, target_ulong pte1)
2488 int htab_fd;
2489 struct kvm_get_htab_fd ghf;
2490 struct kvm_get_htab_buf hpte_buf;
2492 ghf.flags = 0;
2493 ghf.start_index = 0; /* Ignored */
2494 htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2495 if (htab_fd < 0) {
2496 goto error_out;
2499 hpte_buf.header.n_valid = 1;
2500 hpte_buf.header.n_invalid = 0;
2501 hpte_buf.header.index = pte_index;
2502 hpte_buf.hpte[0] = pte0;
2503 hpte_buf.hpte[1] = pte1;
2505 * Write the hpte entry.
2506 * CAUTION: write() has the warn_unused_result attribute. Hence we
2507 * need to check the return value, even though we do nothing.
2509 if (write(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) {
2510 goto out_close;
2513 out_close:
2514 close(htab_fd);
2515 return;
2517 error_out:
2518 return;
2521 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2522 uint64_t address, uint32_t data, PCIDevice *dev)
2524 return 0;
2527 int kvm_arch_msi_data_to_gsi(uint32_t data)
2529 return data & 0xffff;
2532 int kvmppc_enable_hwrng(void)
2534 if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2535 return -1;
2538 return kvmppc_enable_hcall(kvm_state, H_RANDOM);