spapr: Add ibm,processor-radix-AP-encodings to the device tree
[qemu/ar7.git] / target / ppc / kvm.c
blob9dc2f7fa5578f0f0541f011e28296c5fe1b182a2
1 /*
2 * PowerPC implementation of KVM hooks
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
7 * Authors:
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
17 #include "qemu/osdep.h"
18 #include <dirent.h>
19 #include <sys/ioctl.h>
20 #include <sys/vfs.h>
22 #include <linux/kvm.h>
24 #include "qemu-common.h"
25 #include "qemu/error-report.h"
26 #include "cpu.h"
27 #include "cpu-models.h"
28 #include "qemu/timer.h"
29 #include "sysemu/sysemu.h"
30 #include "sysemu/hw_accel.h"
31 #include "kvm_ppc.h"
32 #include "sysemu/cpus.h"
33 #include "sysemu/device_tree.h"
34 #include "mmu-hash64.h"
36 #include "hw/sysbus.h"
37 #include "hw/ppc/spapr.h"
38 #include "hw/ppc/spapr_vio.h"
39 #include "hw/ppc/spapr_cpu_core.h"
40 #include "hw/ppc/ppc.h"
41 #include "sysemu/watchdog.h"
42 #include "trace.h"
43 #include "exec/gdbstub.h"
44 #include "exec/memattrs.h"
45 #include "exec/ram_addr.h"
46 #include "sysemu/hostmem.h"
47 #include "qemu/cutils.h"
48 #include "qemu/mmap-alloc.h"
49 #if defined(TARGET_PPC64)
50 #include "hw/ppc/spapr_cpu_core.h"
51 #endif
52 #include "elf.h"
53 #include "sysemu/kvm_int.h"
55 //#define DEBUG_KVM
57 #ifdef DEBUG_KVM
58 #define DPRINTF(fmt, ...) \
59 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
60 #else
61 #define DPRINTF(fmt, ...) \
62 do { } while (0)
63 #endif
65 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
67 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
68 KVM_CAP_LAST_INFO
71 static int cap_interrupt_unset = false;
72 static int cap_interrupt_level = false;
73 static int cap_segstate;
74 static int cap_booke_sregs;
75 static int cap_ppc_smt;
76 static int cap_ppc_rma;
77 static int cap_spapr_tce;
78 static int cap_spapr_tce_64;
79 static int cap_spapr_multitce;
80 static int cap_spapr_vfio;
81 static int cap_hior;
82 static int cap_one_reg;
83 static int cap_epr;
84 static int cap_ppc_watchdog;
85 static int cap_papr;
86 static int cap_htab_fd;
87 static int cap_fixup_hcalls;
88 static int cap_htm; /* Hardware transactional memory support */
90 static uint32_t debug_inst_opcode;
92 /* XXX We have a race condition where we actually have a level triggered
93 * interrupt, but the infrastructure can't expose that yet, so the guest
94 * takes but ignores it, goes to sleep and never gets notified that there's
95 * still an interrupt pending.
97 * As a quick workaround, let's just wake up again 20 ms after we injected
98 * an interrupt. That way we can assure that we're always reinjecting
99 * interrupts in case the guest swallowed them.
101 static QEMUTimer *idle_timer;
103 static void kvm_kick_cpu(void *opaque)
105 PowerPCCPU *cpu = opaque;
107 qemu_cpu_kick(CPU(cpu));
110 /* Check whether we are running with KVM-PR (instead of KVM-HV). This
111 * should only be used for fallback tests - generally we should use
112 * explicit capabilities for the features we want, rather than
113 * assuming what is/isn't available depending on the KVM variant. */
114 static bool kvmppc_is_pr(KVMState *ks)
116 /* Assume KVM-PR if the GET_PVINFO capability is available */
117 return kvm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0;
120 static int kvm_ppc_register_host_cpu_type(void);
122 int kvm_arch_init(MachineState *ms, KVMState *s)
124 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
125 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
126 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
127 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
128 cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
129 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
130 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
131 cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64);
132 cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
133 cap_spapr_vfio = false;
134 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
135 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
136 cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
137 cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
138 /* Note: we don't set cap_papr here, because this capability is
139 * only activated after this by kvmppc_set_papr() */
140 cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
141 cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
142 cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
144 if (!cap_interrupt_level) {
145 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
146 "VM to stall at times!\n");
149 kvm_ppc_register_host_cpu_type();
151 return 0;
154 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
156 return 0;
159 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
161 CPUPPCState *cenv = &cpu->env;
162 CPUState *cs = CPU(cpu);
163 struct kvm_sregs sregs;
164 int ret;
166 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
167 /* What we're really trying to say is "if we're on BookE, we use
168 the native PVR for now". This is the only sane way to check
169 it though, so we potentially confuse users that they can run
170 BookE guests on BookS. Let's hope nobody dares enough :) */
171 return 0;
172 } else {
173 if (!cap_segstate) {
174 fprintf(stderr, "kvm error: missing PVR setting capability\n");
175 return -ENOSYS;
179 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
180 if (ret) {
181 return ret;
184 sregs.pvr = cenv->spr[SPR_PVR];
185 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
188 /* Set up a shared TLB array with KVM */
189 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
191 CPUPPCState *env = &cpu->env;
192 CPUState *cs = CPU(cpu);
193 struct kvm_book3e_206_tlb_params params = {};
194 struct kvm_config_tlb cfg = {};
195 unsigned int entries = 0;
196 int ret, i;
198 if (!kvm_enabled() ||
199 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
200 return 0;
203 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
205 for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
206 params.tlb_sizes[i] = booke206_tlb_size(env, i);
207 params.tlb_ways[i] = booke206_tlb_ways(env, i);
208 entries += params.tlb_sizes[i];
211 assert(entries == env->nb_tlb);
212 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
214 env->tlb_dirty = true;
216 cfg.array = (uintptr_t)env->tlb.tlbm;
217 cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
218 cfg.params = (uintptr_t)&params;
219 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
221 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
222 if (ret < 0) {
223 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
224 __func__, strerror(-ret));
225 return ret;
228 env->kvm_sw_tlb = true;
229 return 0;
233 #if defined(TARGET_PPC64)
234 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
235 struct kvm_ppc_smmu_info *info)
237 CPUPPCState *env = &cpu->env;
238 CPUState *cs = CPU(cpu);
240 memset(info, 0, sizeof(*info));
242 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
243 * need to "guess" what the supported page sizes are.
245 * For that to work we make a few assumptions:
247 * - Check whether we are running "PR" KVM which only supports 4K
248 * and 16M pages, but supports them regardless of the backing
249 * store characteritics. We also don't support 1T segments.
251 * This is safe as if HV KVM ever supports that capability or PR
252 * KVM grows supports for more page/segment sizes, those versions
253 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
254 * will not hit this fallback
256 * - Else we are running HV KVM. This means we only support page
257 * sizes that fit in the backing store. Additionally we only
258 * advertize 64K pages if the processor is ARCH 2.06 and we assume
259 * P7 encodings for the SLB and hash table. Here too, we assume
260 * support for any newer processor will mean a kernel that
261 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
262 * this fallback.
264 if (kvmppc_is_pr(cs->kvm_state)) {
265 /* No flags */
266 info->flags = 0;
267 info->slb_size = 64;
269 /* Standard 4k base page size segment */
270 info->sps[0].page_shift = 12;
271 info->sps[0].slb_enc = 0;
272 info->sps[0].enc[0].page_shift = 12;
273 info->sps[0].enc[0].pte_enc = 0;
275 /* Standard 16M large page size segment */
276 info->sps[1].page_shift = 24;
277 info->sps[1].slb_enc = SLB_VSID_L;
278 info->sps[1].enc[0].page_shift = 24;
279 info->sps[1].enc[0].pte_enc = 0;
280 } else {
281 int i = 0;
283 /* HV KVM has backing store size restrictions */
284 info->flags = KVM_PPC_PAGE_SIZES_REAL;
286 if (env->mmu_model & POWERPC_MMU_1TSEG) {
287 info->flags |= KVM_PPC_1T_SEGMENTS;
290 if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 ||
291 POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) {
292 info->slb_size = 32;
293 } else {
294 info->slb_size = 64;
297 /* Standard 4k base page size segment */
298 info->sps[i].page_shift = 12;
299 info->sps[i].slb_enc = 0;
300 info->sps[i].enc[0].page_shift = 12;
301 info->sps[i].enc[0].pte_enc = 0;
302 i++;
304 /* 64K on MMU 2.06 and later */
305 if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 ||
306 POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) {
307 info->sps[i].page_shift = 16;
308 info->sps[i].slb_enc = 0x110;
309 info->sps[i].enc[0].page_shift = 16;
310 info->sps[i].enc[0].pte_enc = 1;
311 i++;
314 /* Standard 16M large page size segment */
315 info->sps[i].page_shift = 24;
316 info->sps[i].slb_enc = SLB_VSID_L;
317 info->sps[i].enc[0].page_shift = 24;
318 info->sps[i].enc[0].pte_enc = 0;
322 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
324 CPUState *cs = CPU(cpu);
325 int ret;
327 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
328 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
329 if (ret == 0) {
330 return;
334 kvm_get_fallback_smmu_info(cpu, info);
337 struct ppc_radix_page_info *kvm_get_radix_page_info(void)
339 KVMState *s = KVM_STATE(current_machine->accelerator);
340 struct ppc_radix_page_info *radix_page_info;
341 struct kvm_ppc_rmmu_info rmmu_info;
342 int i;
344 if (!kvm_check_extension(s, KVM_CAP_PPC_MMU_RADIX)) {
345 return NULL;
347 if (kvm_vm_ioctl(s, KVM_PPC_GET_RMMU_INFO, &rmmu_info)) {
348 return NULL;
350 radix_page_info = g_malloc0(sizeof(*radix_page_info));
351 radix_page_info->count = 0;
352 for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) {
353 if (rmmu_info.ap_encodings[i]) {
354 radix_page_info->entries[i] = rmmu_info.ap_encodings[i];
355 radix_page_info->count++;
358 return radix_page_info;
361 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
363 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
364 return true;
367 return (1ul << shift) <= rampgsize;
370 static long max_cpu_page_size;
372 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
374 static struct kvm_ppc_smmu_info smmu_info;
375 static bool has_smmu_info;
376 CPUPPCState *env = &cpu->env;
377 int iq, ik, jq, jk;
378 bool has_64k_pages = false;
380 /* We only handle page sizes for 64-bit server guests for now */
381 if (!(env->mmu_model & POWERPC_MMU_64)) {
382 return;
385 /* Collect MMU info from kernel if not already */
386 if (!has_smmu_info) {
387 kvm_get_smmu_info(cpu, &smmu_info);
388 has_smmu_info = true;
391 if (!max_cpu_page_size) {
392 max_cpu_page_size = qemu_getrampagesize();
395 /* Convert to QEMU form */
396 memset(&env->sps, 0, sizeof(env->sps));
398 /* If we have HV KVM, we need to forbid CI large pages if our
399 * host page size is smaller than 64K.
401 if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
402 env->ci_large_pages = getpagesize() >= 0x10000;
406 * XXX This loop should be an entry wide AND of the capabilities that
407 * the selected CPU has with the capabilities that KVM supports.
409 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
410 struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
411 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
413 if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
414 ksps->page_shift)) {
415 continue;
417 qsps->page_shift = ksps->page_shift;
418 qsps->slb_enc = ksps->slb_enc;
419 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
420 if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
421 ksps->enc[jk].page_shift)) {
422 continue;
424 if (ksps->enc[jk].page_shift == 16) {
425 has_64k_pages = true;
427 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
428 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
429 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
430 break;
433 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
434 break;
437 env->slb_nr = smmu_info.slb_size;
438 if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
439 env->mmu_model &= ~POWERPC_MMU_1TSEG;
441 if (!has_64k_pages) {
442 env->mmu_model &= ~POWERPC_MMU_64K;
446 bool kvmppc_is_mem_backend_page_size_ok(char *obj_path)
448 Object *mem_obj = object_resolve_path(obj_path, NULL);
449 char *mempath = object_property_get_str(mem_obj, "mem-path", NULL);
450 long pagesize;
452 if (mempath) {
453 pagesize = qemu_mempath_getpagesize(mempath);
454 } else {
455 pagesize = getpagesize();
458 return pagesize >= max_cpu_page_size;
461 #else /* defined (TARGET_PPC64) */
463 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
467 bool kvmppc_is_mem_backend_page_size_ok(char *obj_path)
469 return true;
472 #endif /* !defined (TARGET_PPC64) */
474 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
476 return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
479 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
480 * book3s supports only 1 watchpoint, so array size
481 * of 4 is sufficient for now.
483 #define MAX_HW_BKPTS 4
485 static struct HWBreakpoint {
486 target_ulong addr;
487 int type;
488 } hw_debug_points[MAX_HW_BKPTS];
490 static CPUWatchpoint hw_watchpoint;
492 /* Default there is no breakpoint and watchpoint supported */
493 static int max_hw_breakpoint;
494 static int max_hw_watchpoint;
495 static int nb_hw_breakpoint;
496 static int nb_hw_watchpoint;
498 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
500 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
501 max_hw_breakpoint = 2;
502 max_hw_watchpoint = 2;
505 if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
506 fprintf(stderr, "Error initializing h/w breakpoints\n");
507 return;
511 int kvm_arch_init_vcpu(CPUState *cs)
513 PowerPCCPU *cpu = POWERPC_CPU(cs);
514 CPUPPCState *cenv = &cpu->env;
515 int ret;
517 /* Gather server mmu info from KVM and update the CPU state */
518 kvm_fixup_page_sizes(cpu);
520 /* Synchronize sregs with kvm */
521 ret = kvm_arch_sync_sregs(cpu);
522 if (ret) {
523 if (ret == -EINVAL) {
524 error_report("Register sync failed... If you're using kvm-hv.ko,"
525 " only \"-cpu host\" is possible");
527 return ret;
530 idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
532 switch (cenv->mmu_model) {
533 case POWERPC_MMU_BOOKE206:
534 /* This target supports access to KVM's guest TLB */
535 ret = kvm_booke206_tlb_init(cpu);
536 break;
537 case POWERPC_MMU_2_07:
538 if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) {
539 /* KVM-HV has transactional memory on POWER8 also without the
540 * KVM_CAP_PPC_HTM extension, so enable it here instead as
541 * long as it's availble to userspace on the host. */
542 if (qemu_getauxval(AT_HWCAP2) & PPC_FEATURE2_HAS_HTM) {
543 cap_htm = true;
546 break;
547 default:
548 break;
551 kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
552 kvmppc_hw_debug_points_init(cenv);
554 return ret;
557 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
559 CPUPPCState *env = &cpu->env;
560 CPUState *cs = CPU(cpu);
561 struct kvm_dirty_tlb dirty_tlb;
562 unsigned char *bitmap;
563 int ret;
565 if (!env->kvm_sw_tlb) {
566 return;
569 bitmap = g_malloc((env->nb_tlb + 7) / 8);
570 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
572 dirty_tlb.bitmap = (uintptr_t)bitmap;
573 dirty_tlb.num_dirty = env->nb_tlb;
575 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
576 if (ret) {
577 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
578 __func__, strerror(-ret));
581 g_free(bitmap);
584 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
586 PowerPCCPU *cpu = POWERPC_CPU(cs);
587 CPUPPCState *env = &cpu->env;
588 union {
589 uint32_t u32;
590 uint64_t u64;
591 } val;
592 struct kvm_one_reg reg = {
593 .id = id,
594 .addr = (uintptr_t) &val,
596 int ret;
598 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
599 if (ret != 0) {
600 trace_kvm_failed_spr_get(spr, strerror(errno));
601 } else {
602 switch (id & KVM_REG_SIZE_MASK) {
603 case KVM_REG_SIZE_U32:
604 env->spr[spr] = val.u32;
605 break;
607 case KVM_REG_SIZE_U64:
608 env->spr[spr] = val.u64;
609 break;
611 default:
612 /* Don't handle this size yet */
613 abort();
618 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
620 PowerPCCPU *cpu = POWERPC_CPU(cs);
621 CPUPPCState *env = &cpu->env;
622 union {
623 uint32_t u32;
624 uint64_t u64;
625 } val;
626 struct kvm_one_reg reg = {
627 .id = id,
628 .addr = (uintptr_t) &val,
630 int ret;
632 switch (id & KVM_REG_SIZE_MASK) {
633 case KVM_REG_SIZE_U32:
634 val.u32 = env->spr[spr];
635 break;
637 case KVM_REG_SIZE_U64:
638 val.u64 = env->spr[spr];
639 break;
641 default:
642 /* Don't handle this size yet */
643 abort();
646 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
647 if (ret != 0) {
648 trace_kvm_failed_spr_set(spr, strerror(errno));
652 static int kvm_put_fp(CPUState *cs)
654 PowerPCCPU *cpu = POWERPC_CPU(cs);
655 CPUPPCState *env = &cpu->env;
656 struct kvm_one_reg reg;
657 int i;
658 int ret;
660 if (env->insns_flags & PPC_FLOAT) {
661 uint64_t fpscr = env->fpscr;
662 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
664 reg.id = KVM_REG_PPC_FPSCR;
665 reg.addr = (uintptr_t)&fpscr;
666 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
667 if (ret < 0) {
668 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
669 return ret;
672 for (i = 0; i < 32; i++) {
673 uint64_t vsr[2];
675 #ifdef HOST_WORDS_BIGENDIAN
676 vsr[0] = float64_val(env->fpr[i]);
677 vsr[1] = env->vsr[i];
678 #else
679 vsr[0] = env->vsr[i];
680 vsr[1] = float64_val(env->fpr[i]);
681 #endif
682 reg.addr = (uintptr_t) &vsr;
683 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
685 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
686 if (ret < 0) {
687 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
688 i, strerror(errno));
689 return ret;
694 if (env->insns_flags & PPC_ALTIVEC) {
695 reg.id = KVM_REG_PPC_VSCR;
696 reg.addr = (uintptr_t)&env->vscr;
697 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
698 if (ret < 0) {
699 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
700 return ret;
703 for (i = 0; i < 32; i++) {
704 reg.id = KVM_REG_PPC_VR(i);
705 reg.addr = (uintptr_t)&env->avr[i];
706 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
707 if (ret < 0) {
708 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
709 return ret;
714 return 0;
717 static int kvm_get_fp(CPUState *cs)
719 PowerPCCPU *cpu = POWERPC_CPU(cs);
720 CPUPPCState *env = &cpu->env;
721 struct kvm_one_reg reg;
722 int i;
723 int ret;
725 if (env->insns_flags & PPC_FLOAT) {
726 uint64_t fpscr;
727 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
729 reg.id = KVM_REG_PPC_FPSCR;
730 reg.addr = (uintptr_t)&fpscr;
731 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
732 if (ret < 0) {
733 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
734 return ret;
735 } else {
736 env->fpscr = fpscr;
739 for (i = 0; i < 32; i++) {
740 uint64_t vsr[2];
742 reg.addr = (uintptr_t) &vsr;
743 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
745 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
746 if (ret < 0) {
747 DPRINTF("Unable to get %s%d from KVM: %s\n",
748 vsx ? "VSR" : "FPR", i, strerror(errno));
749 return ret;
750 } else {
751 #ifdef HOST_WORDS_BIGENDIAN
752 env->fpr[i] = vsr[0];
753 if (vsx) {
754 env->vsr[i] = vsr[1];
756 #else
757 env->fpr[i] = vsr[1];
758 if (vsx) {
759 env->vsr[i] = vsr[0];
761 #endif
766 if (env->insns_flags & PPC_ALTIVEC) {
767 reg.id = KVM_REG_PPC_VSCR;
768 reg.addr = (uintptr_t)&env->vscr;
769 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
770 if (ret < 0) {
771 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
772 return ret;
775 for (i = 0; i < 32; i++) {
776 reg.id = KVM_REG_PPC_VR(i);
777 reg.addr = (uintptr_t)&env->avr[i];
778 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
779 if (ret < 0) {
780 DPRINTF("Unable to get VR%d from KVM: %s\n",
781 i, strerror(errno));
782 return ret;
787 return 0;
790 #if defined(TARGET_PPC64)
791 static int kvm_get_vpa(CPUState *cs)
793 PowerPCCPU *cpu = POWERPC_CPU(cs);
794 CPUPPCState *env = &cpu->env;
795 struct kvm_one_reg reg;
796 int ret;
798 reg.id = KVM_REG_PPC_VPA_ADDR;
799 reg.addr = (uintptr_t)&env->vpa_addr;
800 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
801 if (ret < 0) {
802 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
803 return ret;
806 assert((uintptr_t)&env->slb_shadow_size
807 == ((uintptr_t)&env->slb_shadow_addr + 8));
808 reg.id = KVM_REG_PPC_VPA_SLB;
809 reg.addr = (uintptr_t)&env->slb_shadow_addr;
810 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
811 if (ret < 0) {
812 DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
813 strerror(errno));
814 return ret;
817 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
818 reg.id = KVM_REG_PPC_VPA_DTL;
819 reg.addr = (uintptr_t)&env->dtl_addr;
820 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
821 if (ret < 0) {
822 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
823 strerror(errno));
824 return ret;
827 return 0;
830 static int kvm_put_vpa(CPUState *cs)
832 PowerPCCPU *cpu = POWERPC_CPU(cs);
833 CPUPPCState *env = &cpu->env;
834 struct kvm_one_reg reg;
835 int ret;
837 /* SLB shadow or DTL can't be registered unless a master VPA is
838 * registered. That means when restoring state, if a VPA *is*
839 * registered, we need to set that up first. If not, we need to
840 * deregister the others before deregistering the master VPA */
841 assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
843 if (env->vpa_addr) {
844 reg.id = KVM_REG_PPC_VPA_ADDR;
845 reg.addr = (uintptr_t)&env->vpa_addr;
846 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
847 if (ret < 0) {
848 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
849 return ret;
853 assert((uintptr_t)&env->slb_shadow_size
854 == ((uintptr_t)&env->slb_shadow_addr + 8));
855 reg.id = KVM_REG_PPC_VPA_SLB;
856 reg.addr = (uintptr_t)&env->slb_shadow_addr;
857 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
858 if (ret < 0) {
859 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
860 return ret;
863 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
864 reg.id = KVM_REG_PPC_VPA_DTL;
865 reg.addr = (uintptr_t)&env->dtl_addr;
866 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
867 if (ret < 0) {
868 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
869 strerror(errno));
870 return ret;
873 if (!env->vpa_addr) {
874 reg.id = KVM_REG_PPC_VPA_ADDR;
875 reg.addr = (uintptr_t)&env->vpa_addr;
876 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
877 if (ret < 0) {
878 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
879 return ret;
883 return 0;
885 #endif /* TARGET_PPC64 */
887 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
889 CPUPPCState *env = &cpu->env;
890 struct kvm_sregs sregs;
891 int i;
893 sregs.pvr = env->spr[SPR_PVR];
895 sregs.u.s.sdr1 = env->spr[SPR_SDR1];
897 /* Sync SLB */
898 #ifdef TARGET_PPC64
899 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
900 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
901 if (env->slb[i].esid & SLB_ESID_V) {
902 sregs.u.s.ppc64.slb[i].slbe |= i;
904 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
906 #endif
908 /* Sync SRs */
909 for (i = 0; i < 16; i++) {
910 sregs.u.s.ppc32.sr[i] = env->sr[i];
913 /* Sync BATs */
914 for (i = 0; i < 8; i++) {
915 /* Beware. We have to swap upper and lower bits here */
916 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
917 | env->DBAT[1][i];
918 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
919 | env->IBAT[1][i];
922 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
925 int kvm_arch_put_registers(CPUState *cs, int level)
927 PowerPCCPU *cpu = POWERPC_CPU(cs);
928 CPUPPCState *env = &cpu->env;
929 struct kvm_regs regs;
930 int ret;
931 int i;
933 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
934 if (ret < 0) {
935 return ret;
938 regs.ctr = env->ctr;
939 regs.lr = env->lr;
940 regs.xer = cpu_read_xer(env);
941 regs.msr = env->msr;
942 regs.pc = env->nip;
944 regs.srr0 = env->spr[SPR_SRR0];
945 regs.srr1 = env->spr[SPR_SRR1];
947 regs.sprg0 = env->spr[SPR_SPRG0];
948 regs.sprg1 = env->spr[SPR_SPRG1];
949 regs.sprg2 = env->spr[SPR_SPRG2];
950 regs.sprg3 = env->spr[SPR_SPRG3];
951 regs.sprg4 = env->spr[SPR_SPRG4];
952 regs.sprg5 = env->spr[SPR_SPRG5];
953 regs.sprg6 = env->spr[SPR_SPRG6];
954 regs.sprg7 = env->spr[SPR_SPRG7];
956 regs.pid = env->spr[SPR_BOOKE_PID];
958 for (i = 0;i < 32; i++)
959 regs.gpr[i] = env->gpr[i];
961 regs.cr = 0;
962 for (i = 0; i < 8; i++) {
963 regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
966 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
967 if (ret < 0)
968 return ret;
970 kvm_put_fp(cs);
972 if (env->tlb_dirty) {
973 kvm_sw_tlb_put(cpu);
974 env->tlb_dirty = false;
977 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
978 ret = kvmppc_put_books_sregs(cpu);
979 if (ret < 0) {
980 return ret;
984 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
985 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
988 if (cap_one_reg) {
989 int i;
991 /* We deliberately ignore errors here, for kernels which have
992 * the ONE_REG calls, but don't support the specific
993 * registers, there's a reasonable chance things will still
994 * work, at least until we try to migrate. */
995 for (i = 0; i < 1024; i++) {
996 uint64_t id = env->spr_cb[i].one_reg_id;
998 if (id != 0) {
999 kvm_put_one_spr(cs, id, i);
1003 #ifdef TARGET_PPC64
1004 if (msr_ts) {
1005 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1006 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1008 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1009 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1011 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1012 kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1013 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1014 kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1015 kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1016 kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1017 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1018 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1019 kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1020 kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1023 if (cap_papr) {
1024 if (kvm_put_vpa(cs) < 0) {
1025 DPRINTF("Warning: Unable to set VPA information to KVM\n");
1029 kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1030 #endif /* TARGET_PPC64 */
1033 return ret;
1036 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1038 env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1041 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1043 CPUPPCState *env = &cpu->env;
1044 struct kvm_sregs sregs;
1045 int ret;
1047 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1048 if (ret < 0) {
1049 return ret;
1052 if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1053 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1054 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1055 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1056 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1057 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1058 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1059 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1060 env->spr[SPR_DECR] = sregs.u.e.dec;
1061 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1062 env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1063 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1066 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1067 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1068 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1069 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1070 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1071 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1074 if (sregs.u.e.features & KVM_SREGS_E_64) {
1075 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1078 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1079 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1082 if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1083 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1084 kvm_sync_excp(env, POWERPC_EXCP_CRITICAL, SPR_BOOKE_IVOR0);
1085 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1086 kvm_sync_excp(env, POWERPC_EXCP_MCHECK, SPR_BOOKE_IVOR1);
1087 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1088 kvm_sync_excp(env, POWERPC_EXCP_DSI, SPR_BOOKE_IVOR2);
1089 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1090 kvm_sync_excp(env, POWERPC_EXCP_ISI, SPR_BOOKE_IVOR3);
1091 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1092 kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL, SPR_BOOKE_IVOR4);
1093 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1094 kvm_sync_excp(env, POWERPC_EXCP_ALIGN, SPR_BOOKE_IVOR5);
1095 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1096 kvm_sync_excp(env, POWERPC_EXCP_PROGRAM, SPR_BOOKE_IVOR6);
1097 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1098 kvm_sync_excp(env, POWERPC_EXCP_FPU, SPR_BOOKE_IVOR7);
1099 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1100 kvm_sync_excp(env, POWERPC_EXCP_SYSCALL, SPR_BOOKE_IVOR8);
1101 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1102 kvm_sync_excp(env, POWERPC_EXCP_APU, SPR_BOOKE_IVOR9);
1103 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1104 kvm_sync_excp(env, POWERPC_EXCP_DECR, SPR_BOOKE_IVOR10);
1105 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1106 kvm_sync_excp(env, POWERPC_EXCP_FIT, SPR_BOOKE_IVOR11);
1107 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1108 kvm_sync_excp(env, POWERPC_EXCP_WDT, SPR_BOOKE_IVOR12);
1109 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1110 kvm_sync_excp(env, POWERPC_EXCP_DTLB, SPR_BOOKE_IVOR13);
1111 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1112 kvm_sync_excp(env, POWERPC_EXCP_ITLB, SPR_BOOKE_IVOR14);
1113 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1114 kvm_sync_excp(env, POWERPC_EXCP_DEBUG, SPR_BOOKE_IVOR15);
1116 if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1117 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1118 kvm_sync_excp(env, POWERPC_EXCP_SPEU, SPR_BOOKE_IVOR32);
1119 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1120 kvm_sync_excp(env, POWERPC_EXCP_EFPDI, SPR_BOOKE_IVOR33);
1121 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1122 kvm_sync_excp(env, POWERPC_EXCP_EFPRI, SPR_BOOKE_IVOR34);
1125 if (sregs.u.e.features & KVM_SREGS_E_PM) {
1126 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1127 kvm_sync_excp(env, POWERPC_EXCP_EPERFM, SPR_BOOKE_IVOR35);
1130 if (sregs.u.e.features & KVM_SREGS_E_PC) {
1131 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1132 kvm_sync_excp(env, POWERPC_EXCP_DOORI, SPR_BOOKE_IVOR36);
1133 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1134 kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1138 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1139 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1140 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1141 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1142 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1143 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1144 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1145 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1146 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1147 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1148 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1151 if (sregs.u.e.features & KVM_SREGS_EXP) {
1152 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1155 if (sregs.u.e.features & KVM_SREGS_E_PD) {
1156 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1157 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1160 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1161 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1162 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1163 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1165 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1166 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1167 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1171 return 0;
1174 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1176 CPUPPCState *env = &cpu->env;
1177 struct kvm_sregs sregs;
1178 int ret;
1179 int i;
1181 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1182 if (ret < 0) {
1183 return ret;
1186 if (!cpu->vhyp) {
1187 ppc_store_sdr1(env, sregs.u.s.sdr1);
1190 /* Sync SLB */
1191 #ifdef TARGET_PPC64
1193 * The packed SLB array we get from KVM_GET_SREGS only contains
1194 * information about valid entries. So we flush our internal copy
1195 * to get rid of stale ones, then put all valid SLB entries back
1196 * in.
1198 memset(env->slb, 0, sizeof(env->slb));
1199 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1200 target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1201 target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1203 * Only restore valid entries
1205 if (rb & SLB_ESID_V) {
1206 ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1209 #endif
1211 /* Sync SRs */
1212 for (i = 0; i < 16; i++) {
1213 env->sr[i] = sregs.u.s.ppc32.sr[i];
1216 /* Sync BATs */
1217 for (i = 0; i < 8; i++) {
1218 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1219 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1220 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1221 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1224 return 0;
1227 int kvm_arch_get_registers(CPUState *cs)
1229 PowerPCCPU *cpu = POWERPC_CPU(cs);
1230 CPUPPCState *env = &cpu->env;
1231 struct kvm_regs regs;
1232 uint32_t cr;
1233 int i, ret;
1235 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1236 if (ret < 0)
1237 return ret;
1239 cr = regs.cr;
1240 for (i = 7; i >= 0; i--) {
1241 env->crf[i] = cr & 15;
1242 cr >>= 4;
1245 env->ctr = regs.ctr;
1246 env->lr = regs.lr;
1247 cpu_write_xer(env, regs.xer);
1248 env->msr = regs.msr;
1249 env->nip = regs.pc;
1251 env->spr[SPR_SRR0] = regs.srr0;
1252 env->spr[SPR_SRR1] = regs.srr1;
1254 env->spr[SPR_SPRG0] = regs.sprg0;
1255 env->spr[SPR_SPRG1] = regs.sprg1;
1256 env->spr[SPR_SPRG2] = regs.sprg2;
1257 env->spr[SPR_SPRG3] = regs.sprg3;
1258 env->spr[SPR_SPRG4] = regs.sprg4;
1259 env->spr[SPR_SPRG5] = regs.sprg5;
1260 env->spr[SPR_SPRG6] = regs.sprg6;
1261 env->spr[SPR_SPRG7] = regs.sprg7;
1263 env->spr[SPR_BOOKE_PID] = regs.pid;
1265 for (i = 0;i < 32; i++)
1266 env->gpr[i] = regs.gpr[i];
1268 kvm_get_fp(cs);
1270 if (cap_booke_sregs) {
1271 ret = kvmppc_get_booke_sregs(cpu);
1272 if (ret < 0) {
1273 return ret;
1277 if (cap_segstate) {
1278 ret = kvmppc_get_books_sregs(cpu);
1279 if (ret < 0) {
1280 return ret;
1284 if (cap_hior) {
1285 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1288 if (cap_one_reg) {
1289 int i;
1291 /* We deliberately ignore errors here, for kernels which have
1292 * the ONE_REG calls, but don't support the specific
1293 * registers, there's a reasonable chance things will still
1294 * work, at least until we try to migrate. */
1295 for (i = 0; i < 1024; i++) {
1296 uint64_t id = env->spr_cb[i].one_reg_id;
1298 if (id != 0) {
1299 kvm_get_one_spr(cs, id, i);
1303 #ifdef TARGET_PPC64
1304 if (msr_ts) {
1305 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1306 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1308 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1309 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1311 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1312 kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1313 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1314 kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1315 kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1316 kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1317 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1318 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1319 kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1320 kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1323 if (cap_papr) {
1324 if (kvm_get_vpa(cs) < 0) {
1325 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1329 kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1330 #endif
1333 return 0;
1336 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1338 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1340 if (irq != PPC_INTERRUPT_EXT) {
1341 return 0;
1344 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1345 return 0;
1348 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1350 return 0;
1353 #if defined(TARGET_PPCEMB)
1354 #define PPC_INPUT_INT PPC40x_INPUT_INT
1355 #elif defined(TARGET_PPC64)
1356 #define PPC_INPUT_INT PPC970_INPUT_INT
1357 #else
1358 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1359 #endif
1361 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1363 PowerPCCPU *cpu = POWERPC_CPU(cs);
1364 CPUPPCState *env = &cpu->env;
1365 int r;
1366 unsigned irq;
1368 qemu_mutex_lock_iothread();
1370 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1371 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1372 if (!cap_interrupt_level &&
1373 run->ready_for_interrupt_injection &&
1374 (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1375 (env->irq_input_state & (1<<PPC_INPUT_INT)))
1377 /* For now KVM disregards the 'irq' argument. However, in the
1378 * future KVM could cache it in-kernel to avoid a heavyweight exit
1379 * when reading the UIC.
1381 irq = KVM_INTERRUPT_SET;
1383 DPRINTF("injected interrupt %d\n", irq);
1384 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1385 if (r < 0) {
1386 printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1389 /* Always wake up soon in case the interrupt was level based */
1390 timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1391 (NANOSECONDS_PER_SECOND / 50));
1394 /* We don't know if there are more interrupts pending after this. However,
1395 * the guest will return to userspace in the course of handling this one
1396 * anyways, so we will get a chance to deliver the rest. */
1398 qemu_mutex_unlock_iothread();
1401 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1403 return MEMTXATTRS_UNSPECIFIED;
1406 int kvm_arch_process_async_events(CPUState *cs)
1408 return cs->halted;
1411 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1413 CPUState *cs = CPU(cpu);
1414 CPUPPCState *env = &cpu->env;
1416 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1417 cs->halted = 1;
1418 cs->exception_index = EXCP_HLT;
1421 return 0;
1424 /* map dcr access to existing qemu dcr emulation */
1425 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1427 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1428 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1430 return 0;
1433 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1435 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1436 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1438 return 0;
1441 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1443 /* Mixed endian case is not handled */
1444 uint32_t sc = debug_inst_opcode;
1446 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1447 sizeof(sc), 0) ||
1448 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1449 return -EINVAL;
1452 return 0;
1455 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1457 uint32_t sc;
1459 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1460 sc != debug_inst_opcode ||
1461 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1462 sizeof(sc), 1)) {
1463 return -EINVAL;
1466 return 0;
1469 static int find_hw_breakpoint(target_ulong addr, int type)
1471 int n;
1473 assert((nb_hw_breakpoint + nb_hw_watchpoint)
1474 <= ARRAY_SIZE(hw_debug_points));
1476 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1477 if (hw_debug_points[n].addr == addr &&
1478 hw_debug_points[n].type == type) {
1479 return n;
1483 return -1;
1486 static int find_hw_watchpoint(target_ulong addr, int *flag)
1488 int n;
1490 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1491 if (n >= 0) {
1492 *flag = BP_MEM_ACCESS;
1493 return n;
1496 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1497 if (n >= 0) {
1498 *flag = BP_MEM_WRITE;
1499 return n;
1502 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1503 if (n >= 0) {
1504 *flag = BP_MEM_READ;
1505 return n;
1508 return -1;
1511 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1512 target_ulong len, int type)
1514 if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1515 return -ENOBUFS;
1518 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1519 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1521 switch (type) {
1522 case GDB_BREAKPOINT_HW:
1523 if (nb_hw_breakpoint >= max_hw_breakpoint) {
1524 return -ENOBUFS;
1527 if (find_hw_breakpoint(addr, type) >= 0) {
1528 return -EEXIST;
1531 nb_hw_breakpoint++;
1532 break;
1534 case GDB_WATCHPOINT_WRITE:
1535 case GDB_WATCHPOINT_READ:
1536 case GDB_WATCHPOINT_ACCESS:
1537 if (nb_hw_watchpoint >= max_hw_watchpoint) {
1538 return -ENOBUFS;
1541 if (find_hw_breakpoint(addr, type) >= 0) {
1542 return -EEXIST;
1545 nb_hw_watchpoint++;
1546 break;
1548 default:
1549 return -ENOSYS;
1552 return 0;
1555 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1556 target_ulong len, int type)
1558 int n;
1560 n = find_hw_breakpoint(addr, type);
1561 if (n < 0) {
1562 return -ENOENT;
1565 switch (type) {
1566 case GDB_BREAKPOINT_HW:
1567 nb_hw_breakpoint--;
1568 break;
1570 case GDB_WATCHPOINT_WRITE:
1571 case GDB_WATCHPOINT_READ:
1572 case GDB_WATCHPOINT_ACCESS:
1573 nb_hw_watchpoint--;
1574 break;
1576 default:
1577 return -ENOSYS;
1579 hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1581 return 0;
1584 void kvm_arch_remove_all_hw_breakpoints(void)
1586 nb_hw_breakpoint = nb_hw_watchpoint = 0;
1589 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1591 int n;
1593 /* Software Breakpoint updates */
1594 if (kvm_sw_breakpoints_active(cs)) {
1595 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1598 assert((nb_hw_breakpoint + nb_hw_watchpoint)
1599 <= ARRAY_SIZE(hw_debug_points));
1600 assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1602 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1603 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1604 memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1605 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1606 switch (hw_debug_points[n].type) {
1607 case GDB_BREAKPOINT_HW:
1608 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1609 break;
1610 case GDB_WATCHPOINT_WRITE:
1611 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1612 break;
1613 case GDB_WATCHPOINT_READ:
1614 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1615 break;
1616 case GDB_WATCHPOINT_ACCESS:
1617 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1618 KVMPPC_DEBUG_WATCH_READ;
1619 break;
1620 default:
1621 cpu_abort(cs, "Unsupported breakpoint type\n");
1623 dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1628 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1630 CPUState *cs = CPU(cpu);
1631 CPUPPCState *env = &cpu->env;
1632 struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1633 int handle = 0;
1634 int n;
1635 int flag = 0;
1637 if (cs->singlestep_enabled) {
1638 handle = 1;
1639 } else if (arch_info->status) {
1640 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1641 if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1642 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1643 if (n >= 0) {
1644 handle = 1;
1646 } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1647 KVMPPC_DEBUG_WATCH_WRITE)) {
1648 n = find_hw_watchpoint(arch_info->address, &flag);
1649 if (n >= 0) {
1650 handle = 1;
1651 cs->watchpoint_hit = &hw_watchpoint;
1652 hw_watchpoint.vaddr = hw_debug_points[n].addr;
1653 hw_watchpoint.flags = flag;
1657 } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1658 handle = 1;
1659 } else {
1660 /* QEMU is not able to handle debug exception, so inject
1661 * program exception to guest;
1662 * Yes program exception NOT debug exception !!
1663 * When QEMU is using debug resources then debug exception must
1664 * be always set. To achieve this we set MSR_DE and also set
1665 * MSRP_DEP so guest cannot change MSR_DE.
1666 * When emulating debug resource for guest we want guest
1667 * to control MSR_DE (enable/disable debug interrupt on need).
1668 * Supporting both configurations are NOT possible.
1669 * So the result is that we cannot share debug resources
1670 * between QEMU and Guest on BOOKE architecture.
1671 * In the current design QEMU gets the priority over guest,
1672 * this means that if QEMU is using debug resources then guest
1673 * cannot use them;
1674 * For software breakpoint QEMU uses a privileged instruction;
1675 * So there cannot be any reason that we are here for guest
1676 * set debug exception, only possibility is guest executed a
1677 * privileged / illegal instruction and that's why we are
1678 * injecting a program interrupt.
1681 cpu_synchronize_state(cs);
1682 /* env->nip is PC, so increment this by 4 to use
1683 * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1685 env->nip += 4;
1686 cs->exception_index = POWERPC_EXCP_PROGRAM;
1687 env->error_code = POWERPC_EXCP_INVAL;
1688 ppc_cpu_do_interrupt(cs);
1691 return handle;
1694 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1696 PowerPCCPU *cpu = POWERPC_CPU(cs);
1697 CPUPPCState *env = &cpu->env;
1698 int ret;
1700 qemu_mutex_lock_iothread();
1702 switch (run->exit_reason) {
1703 case KVM_EXIT_DCR:
1704 if (run->dcr.is_write) {
1705 DPRINTF("handle dcr write\n");
1706 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1707 } else {
1708 DPRINTF("handle dcr read\n");
1709 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1711 break;
1712 case KVM_EXIT_HLT:
1713 DPRINTF("handle halt\n");
1714 ret = kvmppc_handle_halt(cpu);
1715 break;
1716 #if defined(TARGET_PPC64)
1717 case KVM_EXIT_PAPR_HCALL:
1718 DPRINTF("handle PAPR hypercall\n");
1719 run->papr_hcall.ret = spapr_hypercall(cpu,
1720 run->papr_hcall.nr,
1721 run->papr_hcall.args);
1722 ret = 0;
1723 break;
1724 #endif
1725 case KVM_EXIT_EPR:
1726 DPRINTF("handle epr\n");
1727 run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1728 ret = 0;
1729 break;
1730 case KVM_EXIT_WATCHDOG:
1731 DPRINTF("handle watchdog expiry\n");
1732 watchdog_perform_action();
1733 ret = 0;
1734 break;
1736 case KVM_EXIT_DEBUG:
1737 DPRINTF("handle debug exception\n");
1738 if (kvm_handle_debug(cpu, run)) {
1739 ret = EXCP_DEBUG;
1740 break;
1742 /* re-enter, this exception was guest-internal */
1743 ret = 0;
1744 break;
1746 default:
1747 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1748 ret = -1;
1749 break;
1752 qemu_mutex_unlock_iothread();
1753 return ret;
1756 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1758 CPUState *cs = CPU(cpu);
1759 uint32_t bits = tsr_bits;
1760 struct kvm_one_reg reg = {
1761 .id = KVM_REG_PPC_OR_TSR,
1762 .addr = (uintptr_t) &bits,
1765 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1768 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1771 CPUState *cs = CPU(cpu);
1772 uint32_t bits = tsr_bits;
1773 struct kvm_one_reg reg = {
1774 .id = KVM_REG_PPC_CLEAR_TSR,
1775 .addr = (uintptr_t) &bits,
1778 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1781 int kvmppc_set_tcr(PowerPCCPU *cpu)
1783 CPUState *cs = CPU(cpu);
1784 CPUPPCState *env = &cpu->env;
1785 uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1787 struct kvm_one_reg reg = {
1788 .id = KVM_REG_PPC_TCR,
1789 .addr = (uintptr_t) &tcr,
1792 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1795 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1797 CPUState *cs = CPU(cpu);
1798 int ret;
1800 if (!kvm_enabled()) {
1801 return -1;
1804 if (!cap_ppc_watchdog) {
1805 printf("warning: KVM does not support watchdog");
1806 return -1;
1809 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1810 if (ret < 0) {
1811 fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1812 __func__, strerror(-ret));
1813 return ret;
1816 return ret;
1819 static int read_cpuinfo(const char *field, char *value, int len)
1821 FILE *f;
1822 int ret = -1;
1823 int field_len = strlen(field);
1824 char line[512];
1826 f = fopen("/proc/cpuinfo", "r");
1827 if (!f) {
1828 return -1;
1831 do {
1832 if (!fgets(line, sizeof(line), f)) {
1833 break;
1835 if (!strncmp(line, field, field_len)) {
1836 pstrcpy(value, len, line);
1837 ret = 0;
1838 break;
1840 } while(*line);
1842 fclose(f);
1844 return ret;
1847 uint32_t kvmppc_get_tbfreq(void)
1849 char line[512];
1850 char *ns;
1851 uint32_t retval = NANOSECONDS_PER_SECOND;
1853 if (read_cpuinfo("timebase", line, sizeof(line))) {
1854 return retval;
1857 if (!(ns = strchr(line, ':'))) {
1858 return retval;
1861 ns++;
1863 return atoi(ns);
1866 bool kvmppc_get_host_serial(char **value)
1868 return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1869 NULL);
1872 bool kvmppc_get_host_model(char **value)
1874 return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1877 /* Try to find a device tree node for a CPU with clock-frequency property */
1878 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1880 struct dirent *dirp;
1881 DIR *dp;
1883 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1884 printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1885 return -1;
1888 buf[0] = '\0';
1889 while ((dirp = readdir(dp)) != NULL) {
1890 FILE *f;
1891 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1892 dirp->d_name);
1893 f = fopen(buf, "r");
1894 if (f) {
1895 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1896 fclose(f);
1897 break;
1899 buf[0] = '\0';
1901 closedir(dp);
1902 if (buf[0] == '\0') {
1903 printf("Unknown host!\n");
1904 return -1;
1907 return 0;
1910 static uint64_t kvmppc_read_int_dt(const char *filename)
1912 union {
1913 uint32_t v32;
1914 uint64_t v64;
1915 } u;
1916 FILE *f;
1917 int len;
1919 f = fopen(filename, "rb");
1920 if (!f) {
1921 return -1;
1924 len = fread(&u, 1, sizeof(u), f);
1925 fclose(f);
1926 switch (len) {
1927 case 4:
1928 /* property is a 32-bit quantity */
1929 return be32_to_cpu(u.v32);
1930 case 8:
1931 return be64_to_cpu(u.v64);
1934 return 0;
1937 /* Read a CPU node property from the host device tree that's a single
1938 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1939 * (can't find or open the property, or doesn't understand the
1940 * format) */
1941 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1943 char buf[PATH_MAX], *tmp;
1944 uint64_t val;
1946 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1947 return -1;
1950 tmp = g_strdup_printf("%s/%s", buf, propname);
1951 val = kvmppc_read_int_dt(tmp);
1952 g_free(tmp);
1954 return val;
1957 uint64_t kvmppc_get_clockfreq(void)
1959 return kvmppc_read_int_cpu_dt("clock-frequency");
1962 uint32_t kvmppc_get_vmx(void)
1964 return kvmppc_read_int_cpu_dt("ibm,vmx");
1967 uint32_t kvmppc_get_dfp(void)
1969 return kvmppc_read_int_cpu_dt("ibm,dfp");
1972 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1974 PowerPCCPU *cpu = ppc_env_get_cpu(env);
1975 CPUState *cs = CPU(cpu);
1977 if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1978 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1979 return 0;
1982 return 1;
1985 int kvmppc_get_hasidle(CPUPPCState *env)
1987 struct kvm_ppc_pvinfo pvinfo;
1989 if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1990 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1991 return 1;
1994 return 0;
1997 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1999 uint32_t *hc = (uint32_t*)buf;
2000 struct kvm_ppc_pvinfo pvinfo;
2002 if (!kvmppc_get_pvinfo(env, &pvinfo)) {
2003 memcpy(buf, pvinfo.hcall, buf_len);
2004 return 0;
2008 * Fallback to always fail hypercalls regardless of endianness:
2010 * tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
2011 * li r3, -1
2012 * b .+8 (becomes nop in wrong endian)
2013 * bswap32(li r3, -1)
2016 hc[0] = cpu_to_be32(0x08000048);
2017 hc[1] = cpu_to_be32(0x3860ffff);
2018 hc[2] = cpu_to_be32(0x48000008);
2019 hc[3] = cpu_to_be32(bswap32(0x3860ffff));
2021 return 1;
2024 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
2026 return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2029 void kvmppc_enable_logical_ci_hcalls(void)
2032 * FIXME: it would be nice if we could detect the cases where
2033 * we're using a device which requires the in kernel
2034 * implementation of these hcalls, but the kernel lacks them and
2035 * produce a warning.
2037 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2038 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2041 void kvmppc_enable_set_mode_hcall(void)
2043 kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2046 void kvmppc_enable_clear_ref_mod_hcalls(void)
2048 kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
2049 kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
2052 void kvmppc_set_papr(PowerPCCPU *cpu)
2054 CPUState *cs = CPU(cpu);
2055 int ret;
2057 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2058 if (ret) {
2059 error_report("This vCPU type or KVM version does not support PAPR");
2060 exit(1);
2063 /* Update the capability flag so we sync the right information
2064 * with kvm */
2065 cap_papr = 1;
2068 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr)
2070 return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr);
2073 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2075 CPUState *cs = CPU(cpu);
2076 int ret;
2078 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2079 if (ret && mpic_proxy) {
2080 error_report("This KVM version does not support EPR");
2081 exit(1);
2085 int kvmppc_smt_threads(void)
2087 return cap_ppc_smt ? cap_ppc_smt : 1;
2090 #ifdef TARGET_PPC64
2091 off_t kvmppc_alloc_rma(void **rma)
2093 off_t size;
2094 int fd;
2095 struct kvm_allocate_rma ret;
2097 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2098 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2099 * not necessary on this hardware
2100 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2102 * FIXME: We should allow the user to force contiguous RMA
2103 * allocation in the cap_ppc_rma==1 case.
2105 if (cap_ppc_rma < 2) {
2106 return 0;
2109 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
2110 if (fd < 0) {
2111 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2112 strerror(errno));
2113 return -1;
2116 size = MIN(ret.rma_size, 256ul << 20);
2118 *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2119 if (*rma == MAP_FAILED) {
2120 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
2121 return -1;
2124 return size;
2127 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2129 struct kvm_ppc_smmu_info info;
2130 long rampagesize, best_page_shift;
2131 int i;
2133 if (cap_ppc_rma >= 2) {
2134 return current_size;
2137 /* Find the largest hardware supported page size that's less than
2138 * or equal to the (logical) backing page size of guest RAM */
2139 kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2140 rampagesize = qemu_getrampagesize();
2141 best_page_shift = 0;
2143 for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2144 struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2146 if (!sps->page_shift) {
2147 continue;
2150 if ((sps->page_shift > best_page_shift)
2151 && ((1UL << sps->page_shift) <= rampagesize)) {
2152 best_page_shift = sps->page_shift;
2156 return MIN(current_size,
2157 1ULL << (best_page_shift + hash_shift - 7));
2159 #endif
2161 bool kvmppc_spapr_use_multitce(void)
2163 return cap_spapr_multitce;
2166 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
2167 uint64_t bus_offset, uint32_t nb_table,
2168 int *pfd, bool need_vfio)
2170 long len;
2171 int fd;
2172 void *table;
2174 /* Must set fd to -1 so we don't try to munmap when called for
2175 * destroying the table, which the upper layers -will- do
2177 *pfd = -1;
2178 if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2179 return NULL;
2182 if (cap_spapr_tce_64) {
2183 struct kvm_create_spapr_tce_64 args = {
2184 .liobn = liobn,
2185 .page_shift = page_shift,
2186 .offset = bus_offset >> page_shift,
2187 .size = nb_table,
2188 .flags = 0
2190 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args);
2191 if (fd < 0) {
2192 fprintf(stderr,
2193 "KVM: Failed to create TCE64 table for liobn 0x%x\n",
2194 liobn);
2195 return NULL;
2197 } else if (cap_spapr_tce) {
2198 uint64_t window_size = (uint64_t) nb_table << page_shift;
2199 struct kvm_create_spapr_tce args = {
2200 .liobn = liobn,
2201 .window_size = window_size,
2203 if ((window_size != args.window_size) || bus_offset) {
2204 return NULL;
2206 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2207 if (fd < 0) {
2208 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2209 liobn);
2210 return NULL;
2212 } else {
2213 return NULL;
2216 len = nb_table * sizeof(uint64_t);
2217 /* FIXME: round this up to page size */
2219 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2220 if (table == MAP_FAILED) {
2221 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2222 liobn);
2223 close(fd);
2224 return NULL;
2227 *pfd = fd;
2228 return table;
2231 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2233 long len;
2235 if (fd < 0) {
2236 return -1;
2239 len = nb_table * sizeof(uint64_t);
2240 if ((munmap(table, len) < 0) ||
2241 (close(fd) < 0)) {
2242 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2243 strerror(errno));
2244 /* Leak the table */
2247 return 0;
2250 int kvmppc_reset_htab(int shift_hint)
2252 uint32_t shift = shift_hint;
2254 if (!kvm_enabled()) {
2255 /* Full emulation, tell caller to allocate htab itself */
2256 return 0;
2258 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2259 int ret;
2260 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2261 if (ret == -ENOTTY) {
2262 /* At least some versions of PR KVM advertise the
2263 * capability, but don't implement the ioctl(). Oops.
2264 * Return 0 so that we allocate the htab in qemu, as is
2265 * correct for PR. */
2266 return 0;
2267 } else if (ret < 0) {
2268 return ret;
2270 return shift;
2273 /* We have a kernel that predates the htab reset calls. For PR
2274 * KVM, we need to allocate the htab ourselves, for an HV KVM of
2275 * this era, it has allocated a 16MB fixed size hash table already. */
2276 if (kvmppc_is_pr(kvm_state)) {
2277 /* PR - tell caller to allocate htab */
2278 return 0;
2279 } else {
2280 /* HV - assume 16MB kernel allocated htab */
2281 return 24;
2285 static inline uint32_t mfpvr(void)
2287 uint32_t pvr;
2289 asm ("mfpvr %0"
2290 : "=r"(pvr));
2291 return pvr;
2294 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2296 if (on) {
2297 *word |= flags;
2298 } else {
2299 *word &= ~flags;
2303 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2305 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2306 uint32_t vmx = kvmppc_get_vmx();
2307 uint32_t dfp = kvmppc_get_dfp();
2308 uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2309 uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2311 /* Now fix up the class with information we can query from the host */
2312 pcc->pvr = mfpvr();
2314 if (vmx != -1) {
2315 /* Only override when we know what the host supports */
2316 alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
2317 alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
2319 if (dfp != -1) {
2320 /* Only override when we know what the host supports */
2321 alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
2324 if (dcache_size != -1) {
2325 pcc->l1_dcache_size = dcache_size;
2328 if (icache_size != -1) {
2329 pcc->l1_icache_size = icache_size;
2332 #if defined(TARGET_PPC64)
2333 pcc->radix_page_info = kvm_get_radix_page_info();
2334 #endif /* defined(TARGET_PPC64) */
2337 bool kvmppc_has_cap_epr(void)
2339 return cap_epr;
2342 bool kvmppc_has_cap_htab_fd(void)
2344 return cap_htab_fd;
2347 bool kvmppc_has_cap_fixup_hcalls(void)
2349 return cap_fixup_hcalls;
2352 bool kvmppc_has_cap_htm(void)
2354 return cap_htm;
2357 static PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc)
2359 ObjectClass *oc = OBJECT_CLASS(pcc);
2361 while (oc && !object_class_is_abstract(oc)) {
2362 oc = object_class_get_parent(oc);
2364 assert(oc);
2366 return POWERPC_CPU_CLASS(oc);
2369 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2371 uint32_t host_pvr = mfpvr();
2372 PowerPCCPUClass *pvr_pcc;
2374 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2375 if (pvr_pcc == NULL) {
2376 pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2379 return pvr_pcc;
2382 static int kvm_ppc_register_host_cpu_type(void)
2384 TypeInfo type_info = {
2385 .name = TYPE_HOST_POWERPC_CPU,
2386 .class_init = kvmppc_host_cpu_class_init,
2388 PowerPCCPUClass *pvr_pcc;
2389 DeviceClass *dc;
2390 int i;
2392 pvr_pcc = kvm_ppc_get_host_cpu_class();
2393 if (pvr_pcc == NULL) {
2394 return -1;
2396 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2397 type_register(&type_info);
2399 #if defined(TARGET_PPC64)
2400 type_info.name = g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE, "host");
2401 type_info.parent = TYPE_SPAPR_CPU_CORE,
2402 type_info.instance_size = sizeof(sPAPRCPUCore);
2403 type_info.instance_init = NULL;
2404 type_info.class_init = spapr_cpu_core_class_init;
2405 type_info.class_data = (void *) "host";
2406 type_register(&type_info);
2407 g_free((void *)type_info.name);
2408 #endif
2411 * Update generic CPU family class alias (e.g. on a POWER8NVL host,
2412 * we want "POWER8" to be a "family" alias that points to the current
2413 * host CPU type, too)
2415 dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc));
2416 for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) {
2417 if (strcmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) {
2418 ObjectClass *oc = OBJECT_CLASS(pvr_pcc);
2419 char *suffix;
2421 ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc));
2422 suffix = strstr(ppc_cpu_aliases[i].model, "-"TYPE_POWERPC_CPU);
2423 if (suffix) {
2424 *suffix = 0;
2426 ppc_cpu_aliases[i].oc = oc;
2427 break;
2431 return 0;
2434 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2436 struct kvm_rtas_token_args args = {
2437 .token = token,
2440 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2441 return -ENOENT;
2444 strncpy(args.name, function, sizeof(args.name));
2446 return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2449 int kvmppc_get_htab_fd(bool write)
2451 struct kvm_get_htab_fd s = {
2452 .flags = write ? KVM_GET_HTAB_WRITE : 0,
2453 .start_index = 0,
2456 if (!cap_htab_fd) {
2457 fprintf(stderr, "KVM version doesn't support saving the hash table\n");
2458 return -1;
2461 return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2464 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2466 int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2467 uint8_t buf[bufsize];
2468 ssize_t rc;
2470 do {
2471 rc = read(fd, buf, bufsize);
2472 if (rc < 0) {
2473 fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2474 strerror(errno));
2475 return rc;
2476 } else if (rc) {
2477 uint8_t *buffer = buf;
2478 ssize_t n = rc;
2479 while (n) {
2480 struct kvm_get_htab_header *head =
2481 (struct kvm_get_htab_header *) buffer;
2482 size_t chunksize = sizeof(*head) +
2483 HASH_PTE_SIZE_64 * head->n_valid;
2485 qemu_put_be32(f, head->index);
2486 qemu_put_be16(f, head->n_valid);
2487 qemu_put_be16(f, head->n_invalid);
2488 qemu_put_buffer(f, (void *)(head + 1),
2489 HASH_PTE_SIZE_64 * head->n_valid);
2491 buffer += chunksize;
2492 n -= chunksize;
2495 } while ((rc != 0)
2496 && ((max_ns < 0)
2497 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2499 return (rc == 0) ? 1 : 0;
2502 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2503 uint16_t n_valid, uint16_t n_invalid)
2505 struct kvm_get_htab_header *buf;
2506 size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2507 ssize_t rc;
2509 buf = alloca(chunksize);
2510 buf->index = index;
2511 buf->n_valid = n_valid;
2512 buf->n_invalid = n_invalid;
2514 qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2516 rc = write(fd, buf, chunksize);
2517 if (rc < 0) {
2518 fprintf(stderr, "Error writing KVM hash table: %s\n",
2519 strerror(errno));
2520 return rc;
2522 if (rc != chunksize) {
2523 /* We should never get a short write on a single chunk */
2524 fprintf(stderr, "Short write, restoring KVM hash table\n");
2525 return -1;
2527 return 0;
2530 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2532 return true;
2535 void kvm_arch_init_irq_routing(KVMState *s)
2539 void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n)
2541 struct kvm_get_htab_fd ghf = {
2542 .flags = 0,
2543 .start_index = ptex,
2545 int fd, rc;
2546 int i;
2548 fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2549 if (fd < 0) {
2550 hw_error("kvmppc_read_hptes: Unable to open HPT fd");
2553 i = 0;
2554 while (i < n) {
2555 struct kvm_get_htab_header *hdr;
2556 int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP;
2557 char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64];
2559 rc = read(fd, buf, sizeof(buf));
2560 if (rc < 0) {
2561 hw_error("kvmppc_read_hptes: Unable to read HPTEs");
2564 hdr = (struct kvm_get_htab_header *)buf;
2565 while ((i < n) && ((char *)hdr < (buf + rc))) {
2566 int invalid = hdr->n_invalid;
2568 if (hdr->index != (ptex + i)) {
2569 hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32
2570 " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i);
2573 memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * hdr->n_valid);
2574 i += hdr->n_valid;
2576 if ((n - i) < invalid) {
2577 invalid = n - i;
2579 memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64);
2580 i += hdr->n_invalid;
2582 hdr = (struct kvm_get_htab_header *)
2583 ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid);
2587 close(fd);
2590 void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1)
2592 int fd, rc;
2593 struct kvm_get_htab_fd ghf;
2594 struct {
2595 struct kvm_get_htab_header hdr;
2596 uint64_t pte0;
2597 uint64_t pte1;
2598 } buf;
2600 ghf.flags = 0;
2601 ghf.start_index = 0; /* Ignored */
2602 fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2603 if (fd < 0) {
2604 hw_error("kvmppc_write_hpte: Unable to open HPT fd");
2607 buf.hdr.n_valid = 1;
2608 buf.hdr.n_invalid = 0;
2609 buf.hdr.index = ptex;
2610 buf.pte0 = cpu_to_be64(pte0);
2611 buf.pte1 = cpu_to_be64(pte1);
2613 rc = write(fd, &buf, sizeof(buf));
2614 if (rc != sizeof(buf)) {
2615 hw_error("kvmppc_write_hpte: Unable to update KVM HPT");
2617 close(fd);
2620 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2621 uint64_t address, uint32_t data, PCIDevice *dev)
2623 return 0;
2626 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2627 int vector, PCIDevice *dev)
2629 return 0;
2632 int kvm_arch_release_virq_post(int virq)
2634 return 0;
2637 int kvm_arch_msi_data_to_gsi(uint32_t data)
2639 return data & 0xffff;
2642 int kvmppc_enable_hwrng(void)
2644 if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2645 return -1;
2648 return kvmppc_enable_hcall(kvm_state, H_RANDOM);