iscsi: Switch .bdrv_co_discard() to byte-based
[qemu/kevin.git] / target-ppc / kvm.c
blob7a8f5559d986556afbe29be4d5a20d9f5c1de99f
1 /*
2 * PowerPC implementation of KVM hooks
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
7 * Authors:
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
17 #include "qemu/osdep.h"
18 #include <dirent.h>
19 #include <sys/ioctl.h>
20 #include <sys/vfs.h>
22 #include <linux/kvm.h>
24 #include "qemu-common.h"
25 #include "qemu/error-report.h"
26 #include "cpu.h"
27 #include "qemu/timer.h"
28 #include "sysemu/sysemu.h"
29 #include "sysemu/kvm.h"
30 #include "sysemu/numa.h"
31 #include "kvm_ppc.h"
32 #include "sysemu/cpus.h"
33 #include "sysemu/device_tree.h"
34 #include "mmu-hash64.h"
36 #include "hw/sysbus.h"
37 #include "hw/ppc/spapr.h"
38 #include "hw/ppc/spapr_vio.h"
39 #include "hw/ppc/ppc.h"
40 #include "sysemu/watchdog.h"
41 #include "trace.h"
42 #include "exec/gdbstub.h"
43 #include "exec/memattrs.h"
44 #include "sysemu/hostmem.h"
45 #include "qemu/cutils.h"
46 #if defined(TARGET_PPC64)
47 #include "hw/ppc/spapr_cpu_core.h"
48 #endif
50 //#define DEBUG_KVM
52 #ifdef DEBUG_KVM
53 #define DPRINTF(fmt, ...) \
54 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
55 #else
56 #define DPRINTF(fmt, ...) \
57 do { } while (0)
58 #endif
60 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
62 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
63 KVM_CAP_LAST_INFO
66 static int cap_interrupt_unset = false;
67 static int cap_interrupt_level = false;
68 static int cap_segstate;
69 static int cap_booke_sregs;
70 static int cap_ppc_smt;
71 static int cap_ppc_rma;
72 static int cap_spapr_tce;
73 static int cap_spapr_multitce;
74 static int cap_spapr_vfio;
75 static int cap_hior;
76 static int cap_one_reg;
77 static int cap_epr;
78 static int cap_ppc_watchdog;
79 static int cap_papr;
80 static int cap_htab_fd;
81 static int cap_fixup_hcalls;
83 static uint32_t debug_inst_opcode;
85 /* XXX We have a race condition where we actually have a level triggered
86 * interrupt, but the infrastructure can't expose that yet, so the guest
87 * takes but ignores it, goes to sleep and never gets notified that there's
88 * still an interrupt pending.
90 * As a quick workaround, let's just wake up again 20 ms after we injected
91 * an interrupt. That way we can assure that we're always reinjecting
92 * interrupts in case the guest swallowed them.
94 static QEMUTimer *idle_timer;
96 static void kvm_kick_cpu(void *opaque)
98 PowerPCCPU *cpu = opaque;
100 qemu_cpu_kick(CPU(cpu));
103 static int kvm_ppc_register_host_cpu_type(void);
105 int kvm_arch_init(MachineState *ms, KVMState *s)
107 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
108 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
109 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
110 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
111 cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
112 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
113 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
114 cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
115 cap_spapr_vfio = false;
116 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
117 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
118 cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
119 cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
120 /* Note: we don't set cap_papr here, because this capability is
121 * only activated after this by kvmppc_set_papr() */
122 cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
123 cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
125 if (!cap_interrupt_level) {
126 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
127 "VM to stall at times!\n");
130 kvm_ppc_register_host_cpu_type();
132 return 0;
135 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
137 CPUPPCState *cenv = &cpu->env;
138 CPUState *cs = CPU(cpu);
139 struct kvm_sregs sregs;
140 int ret;
142 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
143 /* What we're really trying to say is "if we're on BookE, we use
144 the native PVR for now". This is the only sane way to check
145 it though, so we potentially confuse users that they can run
146 BookE guests on BookS. Let's hope nobody dares enough :) */
147 return 0;
148 } else {
149 if (!cap_segstate) {
150 fprintf(stderr, "kvm error: missing PVR setting capability\n");
151 return -ENOSYS;
155 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
156 if (ret) {
157 return ret;
160 sregs.pvr = cenv->spr[SPR_PVR];
161 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
164 /* Set up a shared TLB array with KVM */
165 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
167 CPUPPCState *env = &cpu->env;
168 CPUState *cs = CPU(cpu);
169 struct kvm_book3e_206_tlb_params params = {};
170 struct kvm_config_tlb cfg = {};
171 unsigned int entries = 0;
172 int ret, i;
174 if (!kvm_enabled() ||
175 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
176 return 0;
179 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
181 for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
182 params.tlb_sizes[i] = booke206_tlb_size(env, i);
183 params.tlb_ways[i] = booke206_tlb_ways(env, i);
184 entries += params.tlb_sizes[i];
187 assert(entries == env->nb_tlb);
188 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
190 env->tlb_dirty = true;
192 cfg.array = (uintptr_t)env->tlb.tlbm;
193 cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
194 cfg.params = (uintptr_t)&params;
195 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
197 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
198 if (ret < 0) {
199 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
200 __func__, strerror(-ret));
201 return ret;
204 env->kvm_sw_tlb = true;
205 return 0;
209 #if defined(TARGET_PPC64)
210 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
211 struct kvm_ppc_smmu_info *info)
213 CPUPPCState *env = &cpu->env;
214 CPUState *cs = CPU(cpu);
216 memset(info, 0, sizeof(*info));
218 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
219 * need to "guess" what the supported page sizes are.
221 * For that to work we make a few assumptions:
223 * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
224 * KVM which only supports 4K and 16M pages, but supports them
225 * regardless of the backing store characteritics. We also don't
226 * support 1T segments.
228 * This is safe as if HV KVM ever supports that capability or PR
229 * KVM grows supports for more page/segment sizes, those versions
230 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
231 * will not hit this fallback
233 * - Else we are running HV KVM. This means we only support page
234 * sizes that fit in the backing store. Additionally we only
235 * advertize 64K pages if the processor is ARCH 2.06 and we assume
236 * P7 encodings for the SLB and hash table. Here too, we assume
237 * support for any newer processor will mean a kernel that
238 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
239 * this fallback.
241 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
242 /* No flags */
243 info->flags = 0;
244 info->slb_size = 64;
246 /* Standard 4k base page size segment */
247 info->sps[0].page_shift = 12;
248 info->sps[0].slb_enc = 0;
249 info->sps[0].enc[0].page_shift = 12;
250 info->sps[0].enc[0].pte_enc = 0;
252 /* Standard 16M large page size segment */
253 info->sps[1].page_shift = 24;
254 info->sps[1].slb_enc = SLB_VSID_L;
255 info->sps[1].enc[0].page_shift = 24;
256 info->sps[1].enc[0].pte_enc = 0;
257 } else {
258 int i = 0;
260 /* HV KVM has backing store size restrictions */
261 info->flags = KVM_PPC_PAGE_SIZES_REAL;
263 if (env->mmu_model & POWERPC_MMU_1TSEG) {
264 info->flags |= KVM_PPC_1T_SEGMENTS;
267 if (env->mmu_model == POWERPC_MMU_2_06 ||
268 env->mmu_model == POWERPC_MMU_2_07) {
269 info->slb_size = 32;
270 } else {
271 info->slb_size = 64;
274 /* Standard 4k base page size segment */
275 info->sps[i].page_shift = 12;
276 info->sps[i].slb_enc = 0;
277 info->sps[i].enc[0].page_shift = 12;
278 info->sps[i].enc[0].pte_enc = 0;
279 i++;
281 /* 64K on MMU 2.06 and later */
282 if (env->mmu_model == POWERPC_MMU_2_06 ||
283 env->mmu_model == POWERPC_MMU_2_07) {
284 info->sps[i].page_shift = 16;
285 info->sps[i].slb_enc = 0x110;
286 info->sps[i].enc[0].page_shift = 16;
287 info->sps[i].enc[0].pte_enc = 1;
288 i++;
291 /* Standard 16M large page size segment */
292 info->sps[i].page_shift = 24;
293 info->sps[i].slb_enc = SLB_VSID_L;
294 info->sps[i].enc[0].page_shift = 24;
295 info->sps[i].enc[0].pte_enc = 0;
299 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
301 CPUState *cs = CPU(cpu);
302 int ret;
304 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
305 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
306 if (ret == 0) {
307 return;
311 kvm_get_fallback_smmu_info(cpu, info);
314 static long gethugepagesize(const char *mem_path)
316 struct statfs fs;
317 int ret;
319 do {
320 ret = statfs(mem_path, &fs);
321 } while (ret != 0 && errno == EINTR);
323 if (ret != 0) {
324 fprintf(stderr, "Couldn't statfs() memory path: %s\n",
325 strerror(errno));
326 exit(1);
329 #define HUGETLBFS_MAGIC 0x958458f6
331 if (fs.f_type != HUGETLBFS_MAGIC) {
332 /* Explicit mempath, but it's ordinary pages */
333 return getpagesize();
336 /* It's hugepage, return the huge page size */
337 return fs.f_bsize;
341 * FIXME TOCTTOU: this iterates over memory backends' mem-path, which
342 * may or may not name the same files / on the same filesystem now as
343 * when we actually open and map them. Iterate over the file
344 * descriptors instead, and use qemu_fd_getpagesize().
346 static int find_max_supported_pagesize(Object *obj, void *opaque)
348 char *mem_path;
349 long *hpsize_min = opaque;
351 if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) {
352 mem_path = object_property_get_str(obj, "mem-path", NULL);
353 if (mem_path) {
354 long hpsize = gethugepagesize(mem_path);
355 if (hpsize < *hpsize_min) {
356 *hpsize_min = hpsize;
358 } else {
359 *hpsize_min = getpagesize();
363 return 0;
366 static long getrampagesize(void)
368 long hpsize = LONG_MAX;
369 Object *memdev_root;
371 if (mem_path) {
372 return gethugepagesize(mem_path);
375 /* it's possible we have memory-backend objects with
376 * hugepage-backed RAM. these may get mapped into system
377 * address space via -numa parameters or memory hotplug
378 * hooks. we want to take these into account, but we
379 * also want to make sure these supported hugepage
380 * sizes are applicable across the entire range of memory
381 * we may boot from, so we take the min across all
382 * backends, and assume normal pages in cases where a
383 * backend isn't backed by hugepages.
385 memdev_root = object_resolve_path("/objects", NULL);
386 if (!memdev_root) {
387 return getpagesize();
390 object_child_foreach(memdev_root, find_max_supported_pagesize, &hpsize);
392 if (hpsize == LONG_MAX || hpsize == getpagesize()) {
393 return getpagesize();
396 /* If NUMA is disabled or the NUMA nodes are not backed with a
397 * memory-backend, then there is at least one node using "normal"
398 * RAM. And since normal RAM has not been configured with "-mem-path"
399 * (what we've checked earlier here already), we can not use huge pages!
401 if (nb_numa_nodes == 0 || numa_info[0].node_memdev == NULL) {
402 static bool warned;
403 if (!warned) {
404 error_report("Huge page support disabled (n/a for main memory).");
405 warned = true;
407 return getpagesize();
410 return hpsize;
413 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
415 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
416 return true;
419 return (1ul << shift) <= rampgsize;
422 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
424 static struct kvm_ppc_smmu_info smmu_info;
425 static bool has_smmu_info;
426 CPUPPCState *env = &cpu->env;
427 long rampagesize;
428 int iq, ik, jq, jk;
430 /* We only handle page sizes for 64-bit server guests for now */
431 if (!(env->mmu_model & POWERPC_MMU_64)) {
432 return;
435 /* Collect MMU info from kernel if not already */
436 if (!has_smmu_info) {
437 kvm_get_smmu_info(cpu, &smmu_info);
438 has_smmu_info = true;
441 rampagesize = getrampagesize();
443 /* Convert to QEMU form */
444 memset(&env->sps, 0, sizeof(env->sps));
446 /* If we have HV KVM, we need to forbid CI large pages if our
447 * host page size is smaller than 64K.
449 if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
450 env->ci_large_pages = getpagesize() >= 0x10000;
454 * XXX This loop should be an entry wide AND of the capabilities that
455 * the selected CPU has with the capabilities that KVM supports.
457 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
458 struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
459 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
461 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
462 ksps->page_shift)) {
463 continue;
465 qsps->page_shift = ksps->page_shift;
466 qsps->slb_enc = ksps->slb_enc;
467 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
468 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
469 ksps->enc[jk].page_shift)) {
470 continue;
472 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
473 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
474 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
475 break;
478 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
479 break;
482 env->slb_nr = smmu_info.slb_size;
483 if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
484 env->mmu_model &= ~POWERPC_MMU_1TSEG;
487 #else /* defined (TARGET_PPC64) */
489 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
493 #endif /* !defined (TARGET_PPC64) */
495 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
497 return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
500 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
501 * book3s supports only 1 watchpoint, so array size
502 * of 4 is sufficient for now.
504 #define MAX_HW_BKPTS 4
506 static struct HWBreakpoint {
507 target_ulong addr;
508 int type;
509 } hw_debug_points[MAX_HW_BKPTS];
511 static CPUWatchpoint hw_watchpoint;
513 /* Default there is no breakpoint and watchpoint supported */
514 static int max_hw_breakpoint;
515 static int max_hw_watchpoint;
516 static int nb_hw_breakpoint;
517 static int nb_hw_watchpoint;
519 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
521 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
522 max_hw_breakpoint = 2;
523 max_hw_watchpoint = 2;
526 if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
527 fprintf(stderr, "Error initializing h/w breakpoints\n");
528 return;
532 int kvm_arch_init_vcpu(CPUState *cs)
534 PowerPCCPU *cpu = POWERPC_CPU(cs);
535 CPUPPCState *cenv = &cpu->env;
536 int ret;
538 /* Gather server mmu info from KVM and update the CPU state */
539 kvm_fixup_page_sizes(cpu);
541 /* Synchronize sregs with kvm */
542 ret = kvm_arch_sync_sregs(cpu);
543 if (ret) {
544 if (ret == -EINVAL) {
545 error_report("Register sync failed... If you're using kvm-hv.ko,"
546 " only \"-cpu host\" is possible");
548 return ret;
551 idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
553 /* Some targets support access to KVM's guest TLB. */
554 switch (cenv->mmu_model) {
555 case POWERPC_MMU_BOOKE206:
556 ret = kvm_booke206_tlb_init(cpu);
557 break;
558 default:
559 break;
562 kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
563 kvmppc_hw_debug_points_init(cenv);
565 return ret;
568 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
570 CPUPPCState *env = &cpu->env;
571 CPUState *cs = CPU(cpu);
572 struct kvm_dirty_tlb dirty_tlb;
573 unsigned char *bitmap;
574 int ret;
576 if (!env->kvm_sw_tlb) {
577 return;
580 bitmap = g_malloc((env->nb_tlb + 7) / 8);
581 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
583 dirty_tlb.bitmap = (uintptr_t)bitmap;
584 dirty_tlb.num_dirty = env->nb_tlb;
586 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
587 if (ret) {
588 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
589 __func__, strerror(-ret));
592 g_free(bitmap);
595 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
597 PowerPCCPU *cpu = POWERPC_CPU(cs);
598 CPUPPCState *env = &cpu->env;
599 union {
600 uint32_t u32;
601 uint64_t u64;
602 } val;
603 struct kvm_one_reg reg = {
604 .id = id,
605 .addr = (uintptr_t) &val,
607 int ret;
609 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
610 if (ret != 0) {
611 trace_kvm_failed_spr_get(spr, strerror(errno));
612 } else {
613 switch (id & KVM_REG_SIZE_MASK) {
614 case KVM_REG_SIZE_U32:
615 env->spr[spr] = val.u32;
616 break;
618 case KVM_REG_SIZE_U64:
619 env->spr[spr] = val.u64;
620 break;
622 default:
623 /* Don't handle this size yet */
624 abort();
629 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
631 PowerPCCPU *cpu = POWERPC_CPU(cs);
632 CPUPPCState *env = &cpu->env;
633 union {
634 uint32_t u32;
635 uint64_t u64;
636 } val;
637 struct kvm_one_reg reg = {
638 .id = id,
639 .addr = (uintptr_t) &val,
641 int ret;
643 switch (id & KVM_REG_SIZE_MASK) {
644 case KVM_REG_SIZE_U32:
645 val.u32 = env->spr[spr];
646 break;
648 case KVM_REG_SIZE_U64:
649 val.u64 = env->spr[spr];
650 break;
652 default:
653 /* Don't handle this size yet */
654 abort();
657 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
658 if (ret != 0) {
659 trace_kvm_failed_spr_set(spr, strerror(errno));
663 static int kvm_put_fp(CPUState *cs)
665 PowerPCCPU *cpu = POWERPC_CPU(cs);
666 CPUPPCState *env = &cpu->env;
667 struct kvm_one_reg reg;
668 int i;
669 int ret;
671 if (env->insns_flags & PPC_FLOAT) {
672 uint64_t fpscr = env->fpscr;
673 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
675 reg.id = KVM_REG_PPC_FPSCR;
676 reg.addr = (uintptr_t)&fpscr;
677 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
678 if (ret < 0) {
679 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
680 return ret;
683 for (i = 0; i < 32; i++) {
684 uint64_t vsr[2];
686 #ifdef HOST_WORDS_BIGENDIAN
687 vsr[0] = float64_val(env->fpr[i]);
688 vsr[1] = env->vsr[i];
689 #else
690 vsr[0] = env->vsr[i];
691 vsr[1] = float64_val(env->fpr[i]);
692 #endif
693 reg.addr = (uintptr_t) &vsr;
694 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
696 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
697 if (ret < 0) {
698 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
699 i, strerror(errno));
700 return ret;
705 if (env->insns_flags & PPC_ALTIVEC) {
706 reg.id = KVM_REG_PPC_VSCR;
707 reg.addr = (uintptr_t)&env->vscr;
708 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
709 if (ret < 0) {
710 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
711 return ret;
714 for (i = 0; i < 32; i++) {
715 reg.id = KVM_REG_PPC_VR(i);
716 reg.addr = (uintptr_t)&env->avr[i];
717 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
718 if (ret < 0) {
719 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
720 return ret;
725 return 0;
728 static int kvm_get_fp(CPUState *cs)
730 PowerPCCPU *cpu = POWERPC_CPU(cs);
731 CPUPPCState *env = &cpu->env;
732 struct kvm_one_reg reg;
733 int i;
734 int ret;
736 if (env->insns_flags & PPC_FLOAT) {
737 uint64_t fpscr;
738 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
740 reg.id = KVM_REG_PPC_FPSCR;
741 reg.addr = (uintptr_t)&fpscr;
742 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
743 if (ret < 0) {
744 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
745 return ret;
746 } else {
747 env->fpscr = fpscr;
750 for (i = 0; i < 32; i++) {
751 uint64_t vsr[2];
753 reg.addr = (uintptr_t) &vsr;
754 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
756 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
757 if (ret < 0) {
758 DPRINTF("Unable to get %s%d from KVM: %s\n",
759 vsx ? "VSR" : "FPR", i, strerror(errno));
760 return ret;
761 } else {
762 #ifdef HOST_WORDS_BIGENDIAN
763 env->fpr[i] = vsr[0];
764 if (vsx) {
765 env->vsr[i] = vsr[1];
767 #else
768 env->fpr[i] = vsr[1];
769 if (vsx) {
770 env->vsr[i] = vsr[0];
772 #endif
777 if (env->insns_flags & PPC_ALTIVEC) {
778 reg.id = KVM_REG_PPC_VSCR;
779 reg.addr = (uintptr_t)&env->vscr;
780 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
781 if (ret < 0) {
782 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
783 return ret;
786 for (i = 0; i < 32; i++) {
787 reg.id = KVM_REG_PPC_VR(i);
788 reg.addr = (uintptr_t)&env->avr[i];
789 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
790 if (ret < 0) {
791 DPRINTF("Unable to get VR%d from KVM: %s\n",
792 i, strerror(errno));
793 return ret;
798 return 0;
801 #if defined(TARGET_PPC64)
802 static int kvm_get_vpa(CPUState *cs)
804 PowerPCCPU *cpu = POWERPC_CPU(cs);
805 CPUPPCState *env = &cpu->env;
806 struct kvm_one_reg reg;
807 int ret;
809 reg.id = KVM_REG_PPC_VPA_ADDR;
810 reg.addr = (uintptr_t)&env->vpa_addr;
811 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
812 if (ret < 0) {
813 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
814 return ret;
817 assert((uintptr_t)&env->slb_shadow_size
818 == ((uintptr_t)&env->slb_shadow_addr + 8));
819 reg.id = KVM_REG_PPC_VPA_SLB;
820 reg.addr = (uintptr_t)&env->slb_shadow_addr;
821 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
822 if (ret < 0) {
823 DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
824 strerror(errno));
825 return ret;
828 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
829 reg.id = KVM_REG_PPC_VPA_DTL;
830 reg.addr = (uintptr_t)&env->dtl_addr;
831 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
832 if (ret < 0) {
833 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
834 strerror(errno));
835 return ret;
838 return 0;
841 static int kvm_put_vpa(CPUState *cs)
843 PowerPCCPU *cpu = POWERPC_CPU(cs);
844 CPUPPCState *env = &cpu->env;
845 struct kvm_one_reg reg;
846 int ret;
848 /* SLB shadow or DTL can't be registered unless a master VPA is
849 * registered. That means when restoring state, if a VPA *is*
850 * registered, we need to set that up first. If not, we need to
851 * deregister the others before deregistering the master VPA */
852 assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
854 if (env->vpa_addr) {
855 reg.id = KVM_REG_PPC_VPA_ADDR;
856 reg.addr = (uintptr_t)&env->vpa_addr;
857 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
858 if (ret < 0) {
859 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
860 return ret;
864 assert((uintptr_t)&env->slb_shadow_size
865 == ((uintptr_t)&env->slb_shadow_addr + 8));
866 reg.id = KVM_REG_PPC_VPA_SLB;
867 reg.addr = (uintptr_t)&env->slb_shadow_addr;
868 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
869 if (ret < 0) {
870 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
871 return ret;
874 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
875 reg.id = KVM_REG_PPC_VPA_DTL;
876 reg.addr = (uintptr_t)&env->dtl_addr;
877 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
878 if (ret < 0) {
879 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
880 strerror(errno));
881 return ret;
884 if (!env->vpa_addr) {
885 reg.id = KVM_REG_PPC_VPA_ADDR;
886 reg.addr = (uintptr_t)&env->vpa_addr;
887 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
888 if (ret < 0) {
889 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
890 return ret;
894 return 0;
896 #endif /* TARGET_PPC64 */
898 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
900 CPUPPCState *env = &cpu->env;
901 struct kvm_sregs sregs;
902 int i;
904 sregs.pvr = env->spr[SPR_PVR];
906 sregs.u.s.sdr1 = env->spr[SPR_SDR1];
908 /* Sync SLB */
909 #ifdef TARGET_PPC64
910 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
911 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
912 if (env->slb[i].esid & SLB_ESID_V) {
913 sregs.u.s.ppc64.slb[i].slbe |= i;
915 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
917 #endif
919 /* Sync SRs */
920 for (i = 0; i < 16; i++) {
921 sregs.u.s.ppc32.sr[i] = env->sr[i];
924 /* Sync BATs */
925 for (i = 0; i < 8; i++) {
926 /* Beware. We have to swap upper and lower bits here */
927 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
928 | env->DBAT[1][i];
929 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
930 | env->IBAT[1][i];
933 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
936 int kvm_arch_put_registers(CPUState *cs, int level)
938 PowerPCCPU *cpu = POWERPC_CPU(cs);
939 CPUPPCState *env = &cpu->env;
940 struct kvm_regs regs;
941 int ret;
942 int i;
944 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
945 if (ret < 0) {
946 return ret;
949 regs.ctr = env->ctr;
950 regs.lr = env->lr;
951 regs.xer = cpu_read_xer(env);
952 regs.msr = env->msr;
953 regs.pc = env->nip;
955 regs.srr0 = env->spr[SPR_SRR0];
956 regs.srr1 = env->spr[SPR_SRR1];
958 regs.sprg0 = env->spr[SPR_SPRG0];
959 regs.sprg1 = env->spr[SPR_SPRG1];
960 regs.sprg2 = env->spr[SPR_SPRG2];
961 regs.sprg3 = env->spr[SPR_SPRG3];
962 regs.sprg4 = env->spr[SPR_SPRG4];
963 regs.sprg5 = env->spr[SPR_SPRG5];
964 regs.sprg6 = env->spr[SPR_SPRG6];
965 regs.sprg7 = env->spr[SPR_SPRG7];
967 regs.pid = env->spr[SPR_BOOKE_PID];
969 for (i = 0;i < 32; i++)
970 regs.gpr[i] = env->gpr[i];
972 regs.cr = 0;
973 for (i = 0; i < 8; i++) {
974 regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
977 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
978 if (ret < 0)
979 return ret;
981 kvm_put_fp(cs);
983 if (env->tlb_dirty) {
984 kvm_sw_tlb_put(cpu);
985 env->tlb_dirty = false;
988 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
989 ret = kvmppc_put_books_sregs(cpu);
990 if (ret < 0) {
991 return ret;
995 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
996 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
999 if (cap_one_reg) {
1000 int i;
1002 /* We deliberately ignore errors here, for kernels which have
1003 * the ONE_REG calls, but don't support the specific
1004 * registers, there's a reasonable chance things will still
1005 * work, at least until we try to migrate. */
1006 for (i = 0; i < 1024; i++) {
1007 uint64_t id = env->spr_cb[i].one_reg_id;
1009 if (id != 0) {
1010 kvm_put_one_spr(cs, id, i);
1014 #ifdef TARGET_PPC64
1015 if (msr_ts) {
1016 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1017 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1019 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1020 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1022 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1023 kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1024 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1025 kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1026 kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1027 kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1028 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1029 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1030 kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1031 kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1034 if (cap_papr) {
1035 if (kvm_put_vpa(cs) < 0) {
1036 DPRINTF("Warning: Unable to set VPA information to KVM\n");
1040 kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1041 #endif /* TARGET_PPC64 */
1044 return ret;
1047 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1049 env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1052 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1054 CPUPPCState *env = &cpu->env;
1055 struct kvm_sregs sregs;
1056 int ret;
1058 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1059 if (ret < 0) {
1060 return ret;
1063 if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1064 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1065 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1066 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1067 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1068 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1069 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1070 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1071 env->spr[SPR_DECR] = sregs.u.e.dec;
1072 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1073 env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1074 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1077 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1078 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1079 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1080 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1081 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1082 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1085 if (sregs.u.e.features & KVM_SREGS_E_64) {
1086 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1089 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1090 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1093 if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1094 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1095 kvm_sync_excp(env, POWERPC_EXCP_CRITICAL, SPR_BOOKE_IVOR0);
1096 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1097 kvm_sync_excp(env, POWERPC_EXCP_MCHECK, SPR_BOOKE_IVOR1);
1098 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1099 kvm_sync_excp(env, POWERPC_EXCP_DSI, SPR_BOOKE_IVOR2);
1100 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1101 kvm_sync_excp(env, POWERPC_EXCP_ISI, SPR_BOOKE_IVOR3);
1102 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1103 kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL, SPR_BOOKE_IVOR4);
1104 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1105 kvm_sync_excp(env, POWERPC_EXCP_ALIGN, SPR_BOOKE_IVOR5);
1106 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1107 kvm_sync_excp(env, POWERPC_EXCP_PROGRAM, SPR_BOOKE_IVOR6);
1108 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1109 kvm_sync_excp(env, POWERPC_EXCP_FPU, SPR_BOOKE_IVOR7);
1110 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1111 kvm_sync_excp(env, POWERPC_EXCP_SYSCALL, SPR_BOOKE_IVOR8);
1112 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1113 kvm_sync_excp(env, POWERPC_EXCP_APU, SPR_BOOKE_IVOR9);
1114 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1115 kvm_sync_excp(env, POWERPC_EXCP_DECR, SPR_BOOKE_IVOR10);
1116 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1117 kvm_sync_excp(env, POWERPC_EXCP_FIT, SPR_BOOKE_IVOR11);
1118 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1119 kvm_sync_excp(env, POWERPC_EXCP_WDT, SPR_BOOKE_IVOR12);
1120 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1121 kvm_sync_excp(env, POWERPC_EXCP_DTLB, SPR_BOOKE_IVOR13);
1122 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1123 kvm_sync_excp(env, POWERPC_EXCP_ITLB, SPR_BOOKE_IVOR14);
1124 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1125 kvm_sync_excp(env, POWERPC_EXCP_DEBUG, SPR_BOOKE_IVOR15);
1127 if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1128 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1129 kvm_sync_excp(env, POWERPC_EXCP_SPEU, SPR_BOOKE_IVOR32);
1130 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1131 kvm_sync_excp(env, POWERPC_EXCP_EFPDI, SPR_BOOKE_IVOR33);
1132 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1133 kvm_sync_excp(env, POWERPC_EXCP_EFPRI, SPR_BOOKE_IVOR34);
1136 if (sregs.u.e.features & KVM_SREGS_E_PM) {
1137 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1138 kvm_sync_excp(env, POWERPC_EXCP_EPERFM, SPR_BOOKE_IVOR35);
1141 if (sregs.u.e.features & KVM_SREGS_E_PC) {
1142 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1143 kvm_sync_excp(env, POWERPC_EXCP_DOORI, SPR_BOOKE_IVOR36);
1144 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1145 kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1149 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1150 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1151 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1152 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1153 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1154 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1155 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1156 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1157 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1158 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1159 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1162 if (sregs.u.e.features & KVM_SREGS_EXP) {
1163 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1166 if (sregs.u.e.features & KVM_SREGS_E_PD) {
1167 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1168 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1171 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1172 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1173 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1174 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1176 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1177 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1178 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1182 return 0;
1185 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1187 CPUPPCState *env = &cpu->env;
1188 struct kvm_sregs sregs;
1189 int ret;
1190 int i;
1192 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1193 if (ret < 0) {
1194 return ret;
1197 if (!env->external_htab) {
1198 ppc_store_sdr1(env, sregs.u.s.sdr1);
1201 /* Sync SLB */
1202 #ifdef TARGET_PPC64
1204 * The packed SLB array we get from KVM_GET_SREGS only contains
1205 * information about valid entries. So we flush our internal copy
1206 * to get rid of stale ones, then put all valid SLB entries back
1207 * in.
1209 memset(env->slb, 0, sizeof(env->slb));
1210 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1211 target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1212 target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1214 * Only restore valid entries
1216 if (rb & SLB_ESID_V) {
1217 ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1220 #endif
1222 /* Sync SRs */
1223 for (i = 0; i < 16; i++) {
1224 env->sr[i] = sregs.u.s.ppc32.sr[i];
1227 /* Sync BATs */
1228 for (i = 0; i < 8; i++) {
1229 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1230 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1231 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1232 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1235 return 0;
1238 int kvm_arch_get_registers(CPUState *cs)
1240 PowerPCCPU *cpu = POWERPC_CPU(cs);
1241 CPUPPCState *env = &cpu->env;
1242 struct kvm_regs regs;
1243 uint32_t cr;
1244 int i, ret;
1246 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1247 if (ret < 0)
1248 return ret;
1250 cr = regs.cr;
1251 for (i = 7; i >= 0; i--) {
1252 env->crf[i] = cr & 15;
1253 cr >>= 4;
1256 env->ctr = regs.ctr;
1257 env->lr = regs.lr;
1258 cpu_write_xer(env, regs.xer);
1259 env->msr = regs.msr;
1260 env->nip = regs.pc;
1262 env->spr[SPR_SRR0] = regs.srr0;
1263 env->spr[SPR_SRR1] = regs.srr1;
1265 env->spr[SPR_SPRG0] = regs.sprg0;
1266 env->spr[SPR_SPRG1] = regs.sprg1;
1267 env->spr[SPR_SPRG2] = regs.sprg2;
1268 env->spr[SPR_SPRG3] = regs.sprg3;
1269 env->spr[SPR_SPRG4] = regs.sprg4;
1270 env->spr[SPR_SPRG5] = regs.sprg5;
1271 env->spr[SPR_SPRG6] = regs.sprg6;
1272 env->spr[SPR_SPRG7] = regs.sprg7;
1274 env->spr[SPR_BOOKE_PID] = regs.pid;
1276 for (i = 0;i < 32; i++)
1277 env->gpr[i] = regs.gpr[i];
1279 kvm_get_fp(cs);
1281 if (cap_booke_sregs) {
1282 ret = kvmppc_get_booke_sregs(cpu);
1283 if (ret < 0) {
1284 return ret;
1288 if (cap_segstate) {
1289 ret = kvmppc_get_books_sregs(cpu);
1290 if (ret < 0) {
1291 return ret;
1295 if (cap_hior) {
1296 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1299 if (cap_one_reg) {
1300 int i;
1302 /* We deliberately ignore errors here, for kernels which have
1303 * the ONE_REG calls, but don't support the specific
1304 * registers, there's a reasonable chance things will still
1305 * work, at least until we try to migrate. */
1306 for (i = 0; i < 1024; i++) {
1307 uint64_t id = env->spr_cb[i].one_reg_id;
1309 if (id != 0) {
1310 kvm_get_one_spr(cs, id, i);
1314 #ifdef TARGET_PPC64
1315 if (msr_ts) {
1316 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1317 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1319 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1320 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1322 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1323 kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1324 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1325 kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1326 kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1327 kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1328 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1329 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1330 kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1331 kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1334 if (cap_papr) {
1335 if (kvm_get_vpa(cs) < 0) {
1336 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1340 kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1341 #endif
1344 return 0;
1347 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1349 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1351 if (irq != PPC_INTERRUPT_EXT) {
1352 return 0;
1355 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1356 return 0;
1359 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1361 return 0;
1364 #if defined(TARGET_PPCEMB)
1365 #define PPC_INPUT_INT PPC40x_INPUT_INT
1366 #elif defined(TARGET_PPC64)
1367 #define PPC_INPUT_INT PPC970_INPUT_INT
1368 #else
1369 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1370 #endif
1372 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1374 PowerPCCPU *cpu = POWERPC_CPU(cs);
1375 CPUPPCState *env = &cpu->env;
1376 int r;
1377 unsigned irq;
1379 qemu_mutex_lock_iothread();
1381 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1382 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1383 if (!cap_interrupt_level &&
1384 run->ready_for_interrupt_injection &&
1385 (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1386 (env->irq_input_state & (1<<PPC_INPUT_INT)))
1388 /* For now KVM disregards the 'irq' argument. However, in the
1389 * future KVM could cache it in-kernel to avoid a heavyweight exit
1390 * when reading the UIC.
1392 irq = KVM_INTERRUPT_SET;
1394 DPRINTF("injected interrupt %d\n", irq);
1395 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1396 if (r < 0) {
1397 printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1400 /* Always wake up soon in case the interrupt was level based */
1401 timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1402 (NANOSECONDS_PER_SECOND / 50));
1405 /* We don't know if there are more interrupts pending after this. However,
1406 * the guest will return to userspace in the course of handling this one
1407 * anyways, so we will get a chance to deliver the rest. */
1409 qemu_mutex_unlock_iothread();
1412 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1414 return MEMTXATTRS_UNSPECIFIED;
1417 int kvm_arch_process_async_events(CPUState *cs)
1419 return cs->halted;
1422 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1424 CPUState *cs = CPU(cpu);
1425 CPUPPCState *env = &cpu->env;
1427 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1428 cs->halted = 1;
1429 cs->exception_index = EXCP_HLT;
1432 return 0;
1435 /* map dcr access to existing qemu dcr emulation */
1436 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1438 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1439 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1441 return 0;
1444 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1446 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1447 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1449 return 0;
1452 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1454 /* Mixed endian case is not handled */
1455 uint32_t sc = debug_inst_opcode;
1457 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1458 sizeof(sc), 0) ||
1459 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1460 return -EINVAL;
1463 return 0;
1466 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1468 uint32_t sc;
1470 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1471 sc != debug_inst_opcode ||
1472 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1473 sizeof(sc), 1)) {
1474 return -EINVAL;
1477 return 0;
1480 static int find_hw_breakpoint(target_ulong addr, int type)
1482 int n;
1484 assert((nb_hw_breakpoint + nb_hw_watchpoint)
1485 <= ARRAY_SIZE(hw_debug_points));
1487 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1488 if (hw_debug_points[n].addr == addr &&
1489 hw_debug_points[n].type == type) {
1490 return n;
1494 return -1;
1497 static int find_hw_watchpoint(target_ulong addr, int *flag)
1499 int n;
1501 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1502 if (n >= 0) {
1503 *flag = BP_MEM_ACCESS;
1504 return n;
1507 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1508 if (n >= 0) {
1509 *flag = BP_MEM_WRITE;
1510 return n;
1513 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1514 if (n >= 0) {
1515 *flag = BP_MEM_READ;
1516 return n;
1519 return -1;
1522 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1523 target_ulong len, int type)
1525 if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1526 return -ENOBUFS;
1529 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1530 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1532 switch (type) {
1533 case GDB_BREAKPOINT_HW:
1534 if (nb_hw_breakpoint >= max_hw_breakpoint) {
1535 return -ENOBUFS;
1538 if (find_hw_breakpoint(addr, type) >= 0) {
1539 return -EEXIST;
1542 nb_hw_breakpoint++;
1543 break;
1545 case GDB_WATCHPOINT_WRITE:
1546 case GDB_WATCHPOINT_READ:
1547 case GDB_WATCHPOINT_ACCESS:
1548 if (nb_hw_watchpoint >= max_hw_watchpoint) {
1549 return -ENOBUFS;
1552 if (find_hw_breakpoint(addr, type) >= 0) {
1553 return -EEXIST;
1556 nb_hw_watchpoint++;
1557 break;
1559 default:
1560 return -ENOSYS;
1563 return 0;
1566 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1567 target_ulong len, int type)
1569 int n;
1571 n = find_hw_breakpoint(addr, type);
1572 if (n < 0) {
1573 return -ENOENT;
1576 switch (type) {
1577 case GDB_BREAKPOINT_HW:
1578 nb_hw_breakpoint--;
1579 break;
1581 case GDB_WATCHPOINT_WRITE:
1582 case GDB_WATCHPOINT_READ:
1583 case GDB_WATCHPOINT_ACCESS:
1584 nb_hw_watchpoint--;
1585 break;
1587 default:
1588 return -ENOSYS;
1590 hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1592 return 0;
1595 void kvm_arch_remove_all_hw_breakpoints(void)
1597 nb_hw_breakpoint = nb_hw_watchpoint = 0;
1600 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1602 int n;
1604 /* Software Breakpoint updates */
1605 if (kvm_sw_breakpoints_active(cs)) {
1606 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1609 assert((nb_hw_breakpoint + nb_hw_watchpoint)
1610 <= ARRAY_SIZE(hw_debug_points));
1611 assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1613 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1614 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1615 memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1616 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1617 switch (hw_debug_points[n].type) {
1618 case GDB_BREAKPOINT_HW:
1619 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1620 break;
1621 case GDB_WATCHPOINT_WRITE:
1622 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1623 break;
1624 case GDB_WATCHPOINT_READ:
1625 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1626 break;
1627 case GDB_WATCHPOINT_ACCESS:
1628 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1629 KVMPPC_DEBUG_WATCH_READ;
1630 break;
1631 default:
1632 cpu_abort(cs, "Unsupported breakpoint type\n");
1634 dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1639 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1641 CPUState *cs = CPU(cpu);
1642 CPUPPCState *env = &cpu->env;
1643 struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1644 int handle = 0;
1645 int n;
1646 int flag = 0;
1648 if (cs->singlestep_enabled) {
1649 handle = 1;
1650 } else if (arch_info->status) {
1651 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1652 if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1653 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1654 if (n >= 0) {
1655 handle = 1;
1657 } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1658 KVMPPC_DEBUG_WATCH_WRITE)) {
1659 n = find_hw_watchpoint(arch_info->address, &flag);
1660 if (n >= 0) {
1661 handle = 1;
1662 cs->watchpoint_hit = &hw_watchpoint;
1663 hw_watchpoint.vaddr = hw_debug_points[n].addr;
1664 hw_watchpoint.flags = flag;
1668 } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1669 handle = 1;
1670 } else {
1671 /* QEMU is not able to handle debug exception, so inject
1672 * program exception to guest;
1673 * Yes program exception NOT debug exception !!
1674 * When QEMU is using debug resources then debug exception must
1675 * be always set. To achieve this we set MSR_DE and also set
1676 * MSRP_DEP so guest cannot change MSR_DE.
1677 * When emulating debug resource for guest we want guest
1678 * to control MSR_DE (enable/disable debug interrupt on need).
1679 * Supporting both configurations are NOT possible.
1680 * So the result is that we cannot share debug resources
1681 * between QEMU and Guest on BOOKE architecture.
1682 * In the current design QEMU gets the priority over guest,
1683 * this means that if QEMU is using debug resources then guest
1684 * cannot use them;
1685 * For software breakpoint QEMU uses a privileged instruction;
1686 * So there cannot be any reason that we are here for guest
1687 * set debug exception, only possibility is guest executed a
1688 * privileged / illegal instruction and that's why we are
1689 * injecting a program interrupt.
1692 cpu_synchronize_state(cs);
1693 /* env->nip is PC, so increment this by 4 to use
1694 * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1696 env->nip += 4;
1697 cs->exception_index = POWERPC_EXCP_PROGRAM;
1698 env->error_code = POWERPC_EXCP_INVAL;
1699 ppc_cpu_do_interrupt(cs);
1702 return handle;
1705 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1707 PowerPCCPU *cpu = POWERPC_CPU(cs);
1708 CPUPPCState *env = &cpu->env;
1709 int ret;
1711 qemu_mutex_lock_iothread();
1713 switch (run->exit_reason) {
1714 case KVM_EXIT_DCR:
1715 if (run->dcr.is_write) {
1716 DPRINTF("handle dcr write\n");
1717 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1718 } else {
1719 DPRINTF("handle dcr read\n");
1720 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1722 break;
1723 case KVM_EXIT_HLT:
1724 DPRINTF("handle halt\n");
1725 ret = kvmppc_handle_halt(cpu);
1726 break;
1727 #if defined(TARGET_PPC64)
1728 case KVM_EXIT_PAPR_HCALL:
1729 DPRINTF("handle PAPR hypercall\n");
1730 run->papr_hcall.ret = spapr_hypercall(cpu,
1731 run->papr_hcall.nr,
1732 run->papr_hcall.args);
1733 ret = 0;
1734 break;
1735 #endif
1736 case KVM_EXIT_EPR:
1737 DPRINTF("handle epr\n");
1738 run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1739 ret = 0;
1740 break;
1741 case KVM_EXIT_WATCHDOG:
1742 DPRINTF("handle watchdog expiry\n");
1743 watchdog_perform_action();
1744 ret = 0;
1745 break;
1747 case KVM_EXIT_DEBUG:
1748 DPRINTF("handle debug exception\n");
1749 if (kvm_handle_debug(cpu, run)) {
1750 ret = EXCP_DEBUG;
1751 break;
1753 /* re-enter, this exception was guest-internal */
1754 ret = 0;
1755 break;
1757 default:
1758 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1759 ret = -1;
1760 break;
1763 qemu_mutex_unlock_iothread();
1764 return ret;
1767 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1769 CPUState *cs = CPU(cpu);
1770 uint32_t bits = tsr_bits;
1771 struct kvm_one_reg reg = {
1772 .id = KVM_REG_PPC_OR_TSR,
1773 .addr = (uintptr_t) &bits,
1776 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1779 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1782 CPUState *cs = CPU(cpu);
1783 uint32_t bits = tsr_bits;
1784 struct kvm_one_reg reg = {
1785 .id = KVM_REG_PPC_CLEAR_TSR,
1786 .addr = (uintptr_t) &bits,
1789 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1792 int kvmppc_set_tcr(PowerPCCPU *cpu)
1794 CPUState *cs = CPU(cpu);
1795 CPUPPCState *env = &cpu->env;
1796 uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1798 struct kvm_one_reg reg = {
1799 .id = KVM_REG_PPC_TCR,
1800 .addr = (uintptr_t) &tcr,
1803 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1806 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1808 CPUState *cs = CPU(cpu);
1809 int ret;
1811 if (!kvm_enabled()) {
1812 return -1;
1815 if (!cap_ppc_watchdog) {
1816 printf("warning: KVM does not support watchdog");
1817 return -1;
1820 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1821 if (ret < 0) {
1822 fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1823 __func__, strerror(-ret));
1824 return ret;
1827 return ret;
1830 static int read_cpuinfo(const char *field, char *value, int len)
1832 FILE *f;
1833 int ret = -1;
1834 int field_len = strlen(field);
1835 char line[512];
1837 f = fopen("/proc/cpuinfo", "r");
1838 if (!f) {
1839 return -1;
1842 do {
1843 if (!fgets(line, sizeof(line), f)) {
1844 break;
1846 if (!strncmp(line, field, field_len)) {
1847 pstrcpy(value, len, line);
1848 ret = 0;
1849 break;
1851 } while(*line);
1853 fclose(f);
1855 return ret;
1858 uint32_t kvmppc_get_tbfreq(void)
1860 char line[512];
1861 char *ns;
1862 uint32_t retval = NANOSECONDS_PER_SECOND;
1864 if (read_cpuinfo("timebase", line, sizeof(line))) {
1865 return retval;
1868 if (!(ns = strchr(line, ':'))) {
1869 return retval;
1872 ns++;
1874 return atoi(ns);
1877 bool kvmppc_get_host_serial(char **value)
1879 return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1880 NULL);
1883 bool kvmppc_get_host_model(char **value)
1885 return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1888 /* Try to find a device tree node for a CPU with clock-frequency property */
1889 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1891 struct dirent *dirp;
1892 DIR *dp;
1894 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1895 printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1896 return -1;
1899 buf[0] = '\0';
1900 while ((dirp = readdir(dp)) != NULL) {
1901 FILE *f;
1902 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1903 dirp->d_name);
1904 f = fopen(buf, "r");
1905 if (f) {
1906 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1907 fclose(f);
1908 break;
1910 buf[0] = '\0';
1912 closedir(dp);
1913 if (buf[0] == '\0') {
1914 printf("Unknown host!\n");
1915 return -1;
1918 return 0;
1921 static uint64_t kvmppc_read_int_dt(const char *filename)
1923 union {
1924 uint32_t v32;
1925 uint64_t v64;
1926 } u;
1927 FILE *f;
1928 int len;
1930 f = fopen(filename, "rb");
1931 if (!f) {
1932 return -1;
1935 len = fread(&u, 1, sizeof(u), f);
1936 fclose(f);
1937 switch (len) {
1938 case 4:
1939 /* property is a 32-bit quantity */
1940 return be32_to_cpu(u.v32);
1941 case 8:
1942 return be64_to_cpu(u.v64);
1945 return 0;
1948 /* Read a CPU node property from the host device tree that's a single
1949 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1950 * (can't find or open the property, or doesn't understand the
1951 * format) */
1952 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1954 char buf[PATH_MAX], *tmp;
1955 uint64_t val;
1957 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1958 return -1;
1961 tmp = g_strdup_printf("%s/%s", buf, propname);
1962 val = kvmppc_read_int_dt(tmp);
1963 g_free(tmp);
1965 return val;
1968 uint64_t kvmppc_get_clockfreq(void)
1970 return kvmppc_read_int_cpu_dt("clock-frequency");
1973 uint32_t kvmppc_get_vmx(void)
1975 return kvmppc_read_int_cpu_dt("ibm,vmx");
1978 uint32_t kvmppc_get_dfp(void)
1980 return kvmppc_read_int_cpu_dt("ibm,dfp");
1983 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1985 PowerPCCPU *cpu = ppc_env_get_cpu(env);
1986 CPUState *cs = CPU(cpu);
1988 if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1989 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1990 return 0;
1993 return 1;
1996 int kvmppc_get_hasidle(CPUPPCState *env)
1998 struct kvm_ppc_pvinfo pvinfo;
2000 if (!kvmppc_get_pvinfo(env, &pvinfo) &&
2001 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
2002 return 1;
2005 return 0;
2008 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
2010 uint32_t *hc = (uint32_t*)buf;
2011 struct kvm_ppc_pvinfo pvinfo;
2013 if (!kvmppc_get_pvinfo(env, &pvinfo)) {
2014 memcpy(buf, pvinfo.hcall, buf_len);
2015 return 0;
2019 * Fallback to always fail hypercalls regardless of endianness:
2021 * tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
2022 * li r3, -1
2023 * b .+8 (becomes nop in wrong endian)
2024 * bswap32(li r3, -1)
2027 hc[0] = cpu_to_be32(0x08000048);
2028 hc[1] = cpu_to_be32(0x3860ffff);
2029 hc[2] = cpu_to_be32(0x48000008);
2030 hc[3] = cpu_to_be32(bswap32(0x3860ffff));
2032 return 1;
2035 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
2037 return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2040 void kvmppc_enable_logical_ci_hcalls(void)
2043 * FIXME: it would be nice if we could detect the cases where
2044 * we're using a device which requires the in kernel
2045 * implementation of these hcalls, but the kernel lacks them and
2046 * produce a warning.
2048 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2049 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2052 void kvmppc_enable_set_mode_hcall(void)
2054 kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2057 void kvmppc_set_papr(PowerPCCPU *cpu)
2059 CPUState *cs = CPU(cpu);
2060 int ret;
2062 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2063 if (ret) {
2064 error_report("This vCPU type or KVM version does not support PAPR");
2065 exit(1);
2068 /* Update the capability flag so we sync the right information
2069 * with kvm */
2070 cap_papr = 1;
2073 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t cpu_version)
2075 return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &cpu_version);
2078 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2080 CPUState *cs = CPU(cpu);
2081 int ret;
2083 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2084 if (ret && mpic_proxy) {
2085 error_report("This KVM version does not support EPR");
2086 exit(1);
2090 int kvmppc_smt_threads(void)
2092 return cap_ppc_smt ? cap_ppc_smt : 1;
2095 #ifdef TARGET_PPC64
2096 off_t kvmppc_alloc_rma(void **rma)
2098 off_t size;
2099 int fd;
2100 struct kvm_allocate_rma ret;
2102 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2103 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2104 * not necessary on this hardware
2105 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2107 * FIXME: We should allow the user to force contiguous RMA
2108 * allocation in the cap_ppc_rma==1 case.
2110 if (cap_ppc_rma < 2) {
2111 return 0;
2114 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
2115 if (fd < 0) {
2116 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2117 strerror(errno));
2118 return -1;
2121 size = MIN(ret.rma_size, 256ul << 20);
2123 *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2124 if (*rma == MAP_FAILED) {
2125 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
2126 return -1;
2129 return size;
2132 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2134 struct kvm_ppc_smmu_info info;
2135 long rampagesize, best_page_shift;
2136 int i;
2138 if (cap_ppc_rma >= 2) {
2139 return current_size;
2142 /* Find the largest hardware supported page size that's less than
2143 * or equal to the (logical) backing page size of guest RAM */
2144 kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2145 rampagesize = getrampagesize();
2146 best_page_shift = 0;
2148 for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2149 struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2151 if (!sps->page_shift) {
2152 continue;
2155 if ((sps->page_shift > best_page_shift)
2156 && ((1UL << sps->page_shift) <= rampagesize)) {
2157 best_page_shift = sps->page_shift;
2161 return MIN(current_size,
2162 1ULL << (best_page_shift + hash_shift - 7));
2164 #endif
2166 bool kvmppc_spapr_use_multitce(void)
2168 return cap_spapr_multitce;
2171 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
2172 bool need_vfio)
2174 struct kvm_create_spapr_tce args = {
2175 .liobn = liobn,
2176 .window_size = window_size,
2178 long len;
2179 int fd;
2180 void *table;
2182 /* Must set fd to -1 so we don't try to munmap when called for
2183 * destroying the table, which the upper layers -will- do
2185 *pfd = -1;
2186 if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2187 return NULL;
2190 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2191 if (fd < 0) {
2192 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2193 liobn);
2194 return NULL;
2197 len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
2198 /* FIXME: round this up to page size */
2200 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2201 if (table == MAP_FAILED) {
2202 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2203 liobn);
2204 close(fd);
2205 return NULL;
2208 *pfd = fd;
2209 return table;
2212 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2214 long len;
2216 if (fd < 0) {
2217 return -1;
2220 len = nb_table * sizeof(uint64_t);
2221 if ((munmap(table, len) < 0) ||
2222 (close(fd) < 0)) {
2223 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2224 strerror(errno));
2225 /* Leak the table */
2228 return 0;
2231 int kvmppc_reset_htab(int shift_hint)
2233 uint32_t shift = shift_hint;
2235 if (!kvm_enabled()) {
2236 /* Full emulation, tell caller to allocate htab itself */
2237 return 0;
2239 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2240 int ret;
2241 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2242 if (ret == -ENOTTY) {
2243 /* At least some versions of PR KVM advertise the
2244 * capability, but don't implement the ioctl(). Oops.
2245 * Return 0 so that we allocate the htab in qemu, as is
2246 * correct for PR. */
2247 return 0;
2248 } else if (ret < 0) {
2249 return ret;
2251 return shift;
2254 /* We have a kernel that predates the htab reset calls. For PR
2255 * KVM, we need to allocate the htab ourselves, for an HV KVM of
2256 * this era, it has allocated a 16MB fixed size hash table
2257 * already. Kernels of this era have the GET_PVINFO capability
2258 * only on PR, so we use this hack to determine the right
2259 * answer */
2260 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
2261 /* PR - tell caller to allocate htab */
2262 return 0;
2263 } else {
2264 /* HV - assume 16MB kernel allocated htab */
2265 return 24;
2269 static inline uint32_t mfpvr(void)
2271 uint32_t pvr;
2273 asm ("mfpvr %0"
2274 : "=r"(pvr));
2275 return pvr;
2278 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2280 if (on) {
2281 *word |= flags;
2282 } else {
2283 *word &= ~flags;
2287 static void kvmppc_host_cpu_initfn(Object *obj)
2289 assert(kvm_enabled());
2292 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2294 DeviceClass *dc = DEVICE_CLASS(oc);
2295 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2296 uint32_t vmx = kvmppc_get_vmx();
2297 uint32_t dfp = kvmppc_get_dfp();
2298 uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2299 uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2301 /* Now fix up the class with information we can query from the host */
2302 pcc->pvr = mfpvr();
2304 if (vmx != -1) {
2305 /* Only override when we know what the host supports */
2306 alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
2307 alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
2309 if (dfp != -1) {
2310 /* Only override when we know what the host supports */
2311 alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
2314 if (dcache_size != -1) {
2315 pcc->l1_dcache_size = dcache_size;
2318 if (icache_size != -1) {
2319 pcc->l1_icache_size = icache_size;
2322 /* Reason: kvmppc_host_cpu_initfn() dies when !kvm_enabled() */
2323 dc->cannot_destroy_with_object_finalize_yet = true;
2326 bool kvmppc_has_cap_epr(void)
2328 return cap_epr;
2331 bool kvmppc_has_cap_htab_fd(void)
2333 return cap_htab_fd;
2336 bool kvmppc_has_cap_fixup_hcalls(void)
2338 return cap_fixup_hcalls;
2341 static PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc)
2343 ObjectClass *oc = OBJECT_CLASS(pcc);
2345 while (oc && !object_class_is_abstract(oc)) {
2346 oc = object_class_get_parent(oc);
2348 assert(oc);
2350 return POWERPC_CPU_CLASS(oc);
2353 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2355 uint32_t host_pvr = mfpvr();
2356 PowerPCCPUClass *pvr_pcc;
2358 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2359 if (pvr_pcc == NULL) {
2360 pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2363 return pvr_pcc;
2366 #if defined(TARGET_PPC64)
2367 static void spapr_cpu_core_host_initfn(Object *obj)
2369 sPAPRCPUCore *core = SPAPR_CPU_CORE(obj);
2370 char *name = g_strdup_printf("%s-" TYPE_POWERPC_CPU, "host");
2371 ObjectClass *oc = object_class_by_name(name);
2373 g_assert(oc);
2374 g_free((void *)name);
2375 core->cpu_class = oc;
2377 #endif
2379 static int kvm_ppc_register_host_cpu_type(void)
2381 TypeInfo type_info = {
2382 .name = TYPE_HOST_POWERPC_CPU,
2383 .instance_init = kvmppc_host_cpu_initfn,
2384 .class_init = kvmppc_host_cpu_class_init,
2386 PowerPCCPUClass *pvr_pcc;
2387 DeviceClass *dc;
2389 pvr_pcc = kvm_ppc_get_host_cpu_class();
2390 if (pvr_pcc == NULL) {
2391 return -1;
2393 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2394 type_register(&type_info);
2396 #if defined(TARGET_PPC64)
2397 type_info.name = g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE, "host");
2398 type_info.parent = TYPE_SPAPR_CPU_CORE,
2399 type_info.instance_size = sizeof(sPAPRCPUCore),
2400 type_info.instance_init = spapr_cpu_core_host_initfn,
2401 type_info.class_init = NULL;
2402 type_register(&type_info);
2403 g_free((void *)type_info.name);
2404 type_info.instance_size = 0;
2405 type_info.instance_init = NULL;
2406 #endif
2408 /* Register generic family CPU class for a family */
2409 pvr_pcc = ppc_cpu_get_family_class(pvr_pcc);
2410 dc = DEVICE_CLASS(pvr_pcc);
2411 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2412 type_info.name = g_strdup_printf("%s-"TYPE_POWERPC_CPU, dc->desc);
2413 type_register(&type_info);
2415 return 0;
2418 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2420 struct kvm_rtas_token_args args = {
2421 .token = token,
2424 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2425 return -ENOENT;
2428 strncpy(args.name, function, sizeof(args.name));
2430 return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2433 int kvmppc_get_htab_fd(bool write)
2435 struct kvm_get_htab_fd s = {
2436 .flags = write ? KVM_GET_HTAB_WRITE : 0,
2437 .start_index = 0,
2440 if (!cap_htab_fd) {
2441 fprintf(stderr, "KVM version doesn't support saving the hash table\n");
2442 return -1;
2445 return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2448 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2450 int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2451 uint8_t buf[bufsize];
2452 ssize_t rc;
2454 do {
2455 rc = read(fd, buf, bufsize);
2456 if (rc < 0) {
2457 fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2458 strerror(errno));
2459 return rc;
2460 } else if (rc) {
2461 uint8_t *buffer = buf;
2462 ssize_t n = rc;
2463 while (n) {
2464 struct kvm_get_htab_header *head =
2465 (struct kvm_get_htab_header *) buffer;
2466 size_t chunksize = sizeof(*head) +
2467 HASH_PTE_SIZE_64 * head->n_valid;
2469 qemu_put_be32(f, head->index);
2470 qemu_put_be16(f, head->n_valid);
2471 qemu_put_be16(f, head->n_invalid);
2472 qemu_put_buffer(f, (void *)(head + 1),
2473 HASH_PTE_SIZE_64 * head->n_valid);
2475 buffer += chunksize;
2476 n -= chunksize;
2479 } while ((rc != 0)
2480 && ((max_ns < 0)
2481 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2483 return (rc == 0) ? 1 : 0;
2486 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2487 uint16_t n_valid, uint16_t n_invalid)
2489 struct kvm_get_htab_header *buf;
2490 size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2491 ssize_t rc;
2493 buf = alloca(chunksize);
2494 buf->index = index;
2495 buf->n_valid = n_valid;
2496 buf->n_invalid = n_invalid;
2498 qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2500 rc = write(fd, buf, chunksize);
2501 if (rc < 0) {
2502 fprintf(stderr, "Error writing KVM hash table: %s\n",
2503 strerror(errno));
2504 return rc;
2506 if (rc != chunksize) {
2507 /* We should never get a short write on a single chunk */
2508 fprintf(stderr, "Short write, restoring KVM hash table\n");
2509 return -1;
2511 return 0;
2514 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2516 return true;
2519 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
2521 return 1;
2524 int kvm_arch_on_sigbus(int code, void *addr)
2526 return 1;
2529 void kvm_arch_init_irq_routing(KVMState *s)
2533 struct kvm_get_htab_buf {
2534 struct kvm_get_htab_header header;
2536 * We require one extra byte for read
2538 target_ulong hpte[(HPTES_PER_GROUP * 2) + 1];
2541 uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index)
2543 int htab_fd;
2544 struct kvm_get_htab_fd ghf;
2545 struct kvm_get_htab_buf *hpte_buf;
2547 ghf.flags = 0;
2548 ghf.start_index = pte_index;
2549 htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2550 if (htab_fd < 0) {
2551 goto error_out;
2554 hpte_buf = g_malloc0(sizeof(*hpte_buf));
2556 * Read the hpte group
2558 if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) {
2559 goto out_close;
2562 close(htab_fd);
2563 return (uint64_t)(uintptr_t) hpte_buf->hpte;
2565 out_close:
2566 g_free(hpte_buf);
2567 close(htab_fd);
2568 error_out:
2569 return 0;
2572 void kvmppc_hash64_free_pteg(uint64_t token)
2574 struct kvm_get_htab_buf *htab_buf;
2576 htab_buf = container_of((void *)(uintptr_t) token, struct kvm_get_htab_buf,
2577 hpte);
2578 g_free(htab_buf);
2579 return;
2582 void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
2583 target_ulong pte0, target_ulong pte1)
2585 int htab_fd;
2586 struct kvm_get_htab_fd ghf;
2587 struct kvm_get_htab_buf hpte_buf;
2589 ghf.flags = 0;
2590 ghf.start_index = 0; /* Ignored */
2591 htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2592 if (htab_fd < 0) {
2593 goto error_out;
2596 hpte_buf.header.n_valid = 1;
2597 hpte_buf.header.n_invalid = 0;
2598 hpte_buf.header.index = pte_index;
2599 hpte_buf.hpte[0] = pte0;
2600 hpte_buf.hpte[1] = pte1;
2602 * Write the hpte entry.
2603 * CAUTION: write() has the warn_unused_result attribute. Hence we
2604 * need to check the return value, even though we do nothing.
2606 if (write(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) {
2607 goto out_close;
2610 out_close:
2611 close(htab_fd);
2612 return;
2614 error_out:
2615 return;
2618 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2619 uint64_t address, uint32_t data, PCIDevice *dev)
2621 return 0;
2624 int kvm_arch_msi_data_to_gsi(uint32_t data)
2626 return data & 0xffff;
2629 int kvmppc_enable_hwrng(void)
2631 if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2632 return -1;
2635 return kvmppc_enable_hcall(kvm_state, H_RANDOM);