target/arm: Restore SPSEL to correct CONTROL register on exception return
[qemu/kevin.git] / target / ppc / kvm.c
blob171d3d8040db02051d21e889171d2da903eced19
1 /*
2 * PowerPC implementation of KVM hooks
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
7 * Authors:
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
17 #include "qemu/osdep.h"
18 #include <dirent.h>
19 #include <sys/ioctl.h>
20 #include <sys/vfs.h>
22 #include <linux/kvm.h>
24 #include "qemu-common.h"
25 #include "qapi/error.h"
26 #include "qemu/error-report.h"
27 #include "cpu.h"
28 #include "cpu-models.h"
29 #include "qemu/timer.h"
30 #include "sysemu/sysemu.h"
31 #include "sysemu/hw_accel.h"
32 #include "kvm_ppc.h"
33 #include "sysemu/cpus.h"
34 #include "sysemu/device_tree.h"
35 #include "mmu-hash64.h"
37 #include "hw/sysbus.h"
38 #include "hw/ppc/spapr.h"
39 #include "hw/ppc/spapr_vio.h"
40 #include "hw/ppc/spapr_cpu_core.h"
41 #include "hw/ppc/ppc.h"
42 #include "sysemu/watchdog.h"
43 #include "trace.h"
44 #include "exec/gdbstub.h"
45 #include "exec/memattrs.h"
46 #include "exec/ram_addr.h"
47 #include "sysemu/hostmem.h"
48 #include "qemu/cutils.h"
49 #include "qemu/mmap-alloc.h"
50 #if defined(TARGET_PPC64)
51 #include "hw/ppc/spapr_cpu_core.h"
52 #endif
53 #include "elf.h"
54 #include "sysemu/kvm_int.h"
56 //#define DEBUG_KVM
58 #ifdef DEBUG_KVM
59 #define DPRINTF(fmt, ...) \
60 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
61 #else
62 #define DPRINTF(fmt, ...) \
63 do { } while (0)
64 #endif
66 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
68 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
69 KVM_CAP_LAST_INFO
72 static int cap_interrupt_unset = false;
73 static int cap_interrupt_level = false;
74 static int cap_segstate;
75 static int cap_booke_sregs;
76 static int cap_ppc_smt;
77 static int cap_ppc_smt_possible;
78 static int cap_ppc_rma;
79 static int cap_spapr_tce;
80 static int cap_spapr_tce_64;
81 static int cap_spapr_multitce;
82 static int cap_spapr_vfio;
83 static int cap_hior;
84 static int cap_one_reg;
85 static int cap_epr;
86 static int cap_ppc_watchdog;
87 static int cap_papr;
88 static int cap_htab_fd;
89 static int cap_fixup_hcalls;
90 static int cap_htm; /* Hardware transactional memory support */
91 static int cap_mmu_radix;
92 static int cap_mmu_hash_v3;
93 static int cap_resize_hpt;
94 static int cap_ppc_pvr_compat;
96 static uint32_t debug_inst_opcode;
98 /* XXX We have a race condition where we actually have a level triggered
99 * interrupt, but the infrastructure can't expose that yet, so the guest
100 * takes but ignores it, goes to sleep and never gets notified that there's
101 * still an interrupt pending.
103 * As a quick workaround, let's just wake up again 20 ms after we injected
104 * an interrupt. That way we can assure that we're always reinjecting
105 * interrupts in case the guest swallowed them.
107 static QEMUTimer *idle_timer;
109 static void kvm_kick_cpu(void *opaque)
111 PowerPCCPU *cpu = opaque;
113 qemu_cpu_kick(CPU(cpu));
116 /* Check whether we are running with KVM-PR (instead of KVM-HV). This
117 * should only be used for fallback tests - generally we should use
118 * explicit capabilities for the features we want, rather than
119 * assuming what is/isn't available depending on the KVM variant. */
120 static bool kvmppc_is_pr(KVMState *ks)
122 /* Assume KVM-PR if the GET_PVINFO capability is available */
123 return kvm_vm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0;
126 static int kvm_ppc_register_host_cpu_type(void);
128 int kvm_arch_init(MachineState *ms, KVMState *s)
130 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
131 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
132 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
133 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
134 cap_ppc_smt_possible = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT_POSSIBLE);
135 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
136 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
137 cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64);
138 cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
139 cap_spapr_vfio = false;
140 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
141 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
142 cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
143 cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
144 /* Note: we don't set cap_papr here, because this capability is
145 * only activated after this by kvmppc_set_papr() */
146 cap_htab_fd = kvm_vm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
147 cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
148 cap_ppc_smt = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT);
149 cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
150 cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX);
151 cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3);
152 cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT);
154 * Note: setting it to false because there is not such capability
155 * in KVM at this moment.
157 * TODO: call kvm_vm_check_extension() with the right capability
158 * after the kernel starts implementing it.*/
159 cap_ppc_pvr_compat = false;
161 if (!cap_interrupt_level) {
162 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
163 "VM to stall at times!\n");
166 kvm_ppc_register_host_cpu_type();
168 return 0;
171 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
173 return 0;
176 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
178 CPUPPCState *cenv = &cpu->env;
179 CPUState *cs = CPU(cpu);
180 struct kvm_sregs sregs;
181 int ret;
183 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
184 /* What we're really trying to say is "if we're on BookE, we use
185 the native PVR for now". This is the only sane way to check
186 it though, so we potentially confuse users that they can run
187 BookE guests on BookS. Let's hope nobody dares enough :) */
188 return 0;
189 } else {
190 if (!cap_segstate) {
191 fprintf(stderr, "kvm error: missing PVR setting capability\n");
192 return -ENOSYS;
196 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
197 if (ret) {
198 return ret;
201 sregs.pvr = cenv->spr[SPR_PVR];
202 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
205 /* Set up a shared TLB array with KVM */
206 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
208 CPUPPCState *env = &cpu->env;
209 CPUState *cs = CPU(cpu);
210 struct kvm_book3e_206_tlb_params params = {};
211 struct kvm_config_tlb cfg = {};
212 unsigned int entries = 0;
213 int ret, i;
215 if (!kvm_enabled() ||
216 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
217 return 0;
220 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
222 for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
223 params.tlb_sizes[i] = booke206_tlb_size(env, i);
224 params.tlb_ways[i] = booke206_tlb_ways(env, i);
225 entries += params.tlb_sizes[i];
228 assert(entries == env->nb_tlb);
229 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
231 env->tlb_dirty = true;
233 cfg.array = (uintptr_t)env->tlb.tlbm;
234 cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
235 cfg.params = (uintptr_t)&params;
236 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
238 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
239 if (ret < 0) {
240 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
241 __func__, strerror(-ret));
242 return ret;
245 env->kvm_sw_tlb = true;
246 return 0;
250 #if defined(TARGET_PPC64)
251 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
252 struct kvm_ppc_smmu_info *info)
254 CPUPPCState *env = &cpu->env;
255 CPUState *cs = CPU(cpu);
257 memset(info, 0, sizeof(*info));
259 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
260 * need to "guess" what the supported page sizes are.
262 * For that to work we make a few assumptions:
264 * - Check whether we are running "PR" KVM which only supports 4K
265 * and 16M pages, but supports them regardless of the backing
266 * store characteritics. We also don't support 1T segments.
268 * This is safe as if HV KVM ever supports that capability or PR
269 * KVM grows supports for more page/segment sizes, those versions
270 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
271 * will not hit this fallback
273 * - Else we are running HV KVM. This means we only support page
274 * sizes that fit in the backing store. Additionally we only
275 * advertize 64K pages if the processor is ARCH 2.06 and we assume
276 * P7 encodings for the SLB and hash table. Here too, we assume
277 * support for any newer processor will mean a kernel that
278 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
279 * this fallback.
281 if (kvmppc_is_pr(cs->kvm_state)) {
282 /* No flags */
283 info->flags = 0;
284 info->slb_size = 64;
286 /* Standard 4k base page size segment */
287 info->sps[0].page_shift = 12;
288 info->sps[0].slb_enc = 0;
289 info->sps[0].enc[0].page_shift = 12;
290 info->sps[0].enc[0].pte_enc = 0;
292 /* Standard 16M large page size segment */
293 info->sps[1].page_shift = 24;
294 info->sps[1].slb_enc = SLB_VSID_L;
295 info->sps[1].enc[0].page_shift = 24;
296 info->sps[1].enc[0].pte_enc = 0;
297 } else {
298 int i = 0;
300 /* HV KVM has backing store size restrictions */
301 info->flags = KVM_PPC_PAGE_SIZES_REAL;
303 if (env->mmu_model & POWERPC_MMU_1TSEG) {
304 info->flags |= KVM_PPC_1T_SEGMENTS;
307 if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 ||
308 POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) {
309 info->slb_size = 32;
310 } else {
311 info->slb_size = 64;
314 /* Standard 4k base page size segment */
315 info->sps[i].page_shift = 12;
316 info->sps[i].slb_enc = 0;
317 info->sps[i].enc[0].page_shift = 12;
318 info->sps[i].enc[0].pte_enc = 0;
319 i++;
321 /* 64K on MMU 2.06 and later */
322 if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 ||
323 POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) {
324 info->sps[i].page_shift = 16;
325 info->sps[i].slb_enc = 0x110;
326 info->sps[i].enc[0].page_shift = 16;
327 info->sps[i].enc[0].pte_enc = 1;
328 i++;
331 /* Standard 16M large page size segment */
332 info->sps[i].page_shift = 24;
333 info->sps[i].slb_enc = SLB_VSID_L;
334 info->sps[i].enc[0].page_shift = 24;
335 info->sps[i].enc[0].pte_enc = 0;
339 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
341 CPUState *cs = CPU(cpu);
342 int ret;
344 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
345 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
346 if (ret == 0) {
347 return;
351 kvm_get_fallback_smmu_info(cpu, info);
354 struct ppc_radix_page_info *kvm_get_radix_page_info(void)
356 KVMState *s = KVM_STATE(current_machine->accelerator);
357 struct ppc_radix_page_info *radix_page_info;
358 struct kvm_ppc_rmmu_info rmmu_info;
359 int i;
361 if (!kvm_check_extension(s, KVM_CAP_PPC_MMU_RADIX)) {
362 return NULL;
364 if (kvm_vm_ioctl(s, KVM_PPC_GET_RMMU_INFO, &rmmu_info)) {
365 return NULL;
367 radix_page_info = g_malloc0(sizeof(*radix_page_info));
368 radix_page_info->count = 0;
369 for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) {
370 if (rmmu_info.ap_encodings[i]) {
371 radix_page_info->entries[i] = rmmu_info.ap_encodings[i];
372 radix_page_info->count++;
375 return radix_page_info;
378 target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu,
379 bool radix, bool gtse,
380 uint64_t proc_tbl)
382 CPUState *cs = CPU(cpu);
383 int ret;
384 uint64_t flags = 0;
385 struct kvm_ppc_mmuv3_cfg cfg = {
386 .process_table = proc_tbl,
389 if (radix) {
390 flags |= KVM_PPC_MMUV3_RADIX;
392 if (gtse) {
393 flags |= KVM_PPC_MMUV3_GTSE;
395 cfg.flags = flags;
396 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_CONFIGURE_V3_MMU, &cfg);
397 switch (ret) {
398 case 0:
399 return H_SUCCESS;
400 case -EINVAL:
401 return H_PARAMETER;
402 case -ENODEV:
403 return H_NOT_AVAILABLE;
404 default:
405 return H_HARDWARE;
409 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
411 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
412 return true;
415 return (1ul << shift) <= rampgsize;
418 static long max_cpu_page_size;
420 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
422 static struct kvm_ppc_smmu_info smmu_info;
423 static bool has_smmu_info;
424 CPUPPCState *env = &cpu->env;
425 int iq, ik, jq, jk;
426 bool has_64k_pages = false;
428 /* We only handle page sizes for 64-bit server guests for now */
429 if (!(env->mmu_model & POWERPC_MMU_64)) {
430 return;
433 /* Collect MMU info from kernel if not already */
434 if (!has_smmu_info) {
435 kvm_get_smmu_info(cpu, &smmu_info);
436 has_smmu_info = true;
439 if (!max_cpu_page_size) {
440 max_cpu_page_size = qemu_getrampagesize();
443 /* Convert to QEMU form */
444 memset(&env->sps, 0, sizeof(env->sps));
446 /* If we have HV KVM, we need to forbid CI large pages if our
447 * host page size is smaller than 64K.
449 if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
450 env->ci_large_pages = getpagesize() >= 0x10000;
454 * XXX This loop should be an entry wide AND of the capabilities that
455 * the selected CPU has with the capabilities that KVM supports.
457 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
458 struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
459 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
461 if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
462 ksps->page_shift)) {
463 continue;
465 qsps->page_shift = ksps->page_shift;
466 qsps->slb_enc = ksps->slb_enc;
467 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
468 if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
469 ksps->enc[jk].page_shift)) {
470 continue;
472 if (ksps->enc[jk].page_shift == 16) {
473 has_64k_pages = true;
475 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
476 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
477 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
478 break;
481 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
482 break;
485 env->slb_nr = smmu_info.slb_size;
486 if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
487 env->mmu_model &= ~POWERPC_MMU_1TSEG;
489 if (!has_64k_pages) {
490 env->mmu_model &= ~POWERPC_MMU_64K;
494 bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path)
496 Object *mem_obj = object_resolve_path(obj_path, NULL);
497 char *mempath = object_property_get_str(mem_obj, "mem-path", NULL);
498 long pagesize;
500 if (mempath) {
501 pagesize = qemu_mempath_getpagesize(mempath);
502 g_free(mempath);
503 } else {
504 pagesize = getpagesize();
507 return pagesize >= max_cpu_page_size;
510 #else /* defined (TARGET_PPC64) */
512 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
516 bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path)
518 return true;
521 #endif /* !defined (TARGET_PPC64) */
523 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
525 return POWERPC_CPU(cpu)->vcpu_id;
528 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
529 * book3s supports only 1 watchpoint, so array size
530 * of 4 is sufficient for now.
532 #define MAX_HW_BKPTS 4
534 static struct HWBreakpoint {
535 target_ulong addr;
536 int type;
537 } hw_debug_points[MAX_HW_BKPTS];
539 static CPUWatchpoint hw_watchpoint;
541 /* Default there is no breakpoint and watchpoint supported */
542 static int max_hw_breakpoint;
543 static int max_hw_watchpoint;
544 static int nb_hw_breakpoint;
545 static int nb_hw_watchpoint;
547 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
549 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
550 max_hw_breakpoint = 2;
551 max_hw_watchpoint = 2;
554 if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
555 fprintf(stderr, "Error initializing h/w breakpoints\n");
556 return;
560 int kvm_arch_init_vcpu(CPUState *cs)
562 PowerPCCPU *cpu = POWERPC_CPU(cs);
563 CPUPPCState *cenv = &cpu->env;
564 int ret;
566 /* Gather server mmu info from KVM and update the CPU state */
567 kvm_fixup_page_sizes(cpu);
569 /* Synchronize sregs with kvm */
570 ret = kvm_arch_sync_sregs(cpu);
571 if (ret) {
572 if (ret == -EINVAL) {
573 error_report("Register sync failed... If you're using kvm-hv.ko,"
574 " only \"-cpu host\" is possible");
576 return ret;
579 idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
581 switch (cenv->mmu_model) {
582 case POWERPC_MMU_BOOKE206:
583 /* This target supports access to KVM's guest TLB */
584 ret = kvm_booke206_tlb_init(cpu);
585 break;
586 case POWERPC_MMU_2_07:
587 if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) {
588 /* KVM-HV has transactional memory on POWER8 also without the
589 * KVM_CAP_PPC_HTM extension, so enable it here instead as
590 * long as it's availble to userspace on the host. */
591 if (qemu_getauxval(AT_HWCAP2) & PPC_FEATURE2_HAS_HTM) {
592 cap_htm = true;
595 break;
596 default:
597 break;
600 kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
601 kvmppc_hw_debug_points_init(cenv);
603 return ret;
606 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
608 CPUPPCState *env = &cpu->env;
609 CPUState *cs = CPU(cpu);
610 struct kvm_dirty_tlb dirty_tlb;
611 unsigned char *bitmap;
612 int ret;
614 if (!env->kvm_sw_tlb) {
615 return;
618 bitmap = g_malloc((env->nb_tlb + 7) / 8);
619 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
621 dirty_tlb.bitmap = (uintptr_t)bitmap;
622 dirty_tlb.num_dirty = env->nb_tlb;
624 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
625 if (ret) {
626 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
627 __func__, strerror(-ret));
630 g_free(bitmap);
633 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
635 PowerPCCPU *cpu = POWERPC_CPU(cs);
636 CPUPPCState *env = &cpu->env;
637 union {
638 uint32_t u32;
639 uint64_t u64;
640 } val;
641 struct kvm_one_reg reg = {
642 .id = id,
643 .addr = (uintptr_t) &val,
645 int ret;
647 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
648 if (ret != 0) {
649 trace_kvm_failed_spr_get(spr, strerror(errno));
650 } else {
651 switch (id & KVM_REG_SIZE_MASK) {
652 case KVM_REG_SIZE_U32:
653 env->spr[spr] = val.u32;
654 break;
656 case KVM_REG_SIZE_U64:
657 env->spr[spr] = val.u64;
658 break;
660 default:
661 /* Don't handle this size yet */
662 abort();
667 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
669 PowerPCCPU *cpu = POWERPC_CPU(cs);
670 CPUPPCState *env = &cpu->env;
671 union {
672 uint32_t u32;
673 uint64_t u64;
674 } val;
675 struct kvm_one_reg reg = {
676 .id = id,
677 .addr = (uintptr_t) &val,
679 int ret;
681 switch (id & KVM_REG_SIZE_MASK) {
682 case KVM_REG_SIZE_U32:
683 val.u32 = env->spr[spr];
684 break;
686 case KVM_REG_SIZE_U64:
687 val.u64 = env->spr[spr];
688 break;
690 default:
691 /* Don't handle this size yet */
692 abort();
695 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
696 if (ret != 0) {
697 trace_kvm_failed_spr_set(spr, strerror(errno));
701 static int kvm_put_fp(CPUState *cs)
703 PowerPCCPU *cpu = POWERPC_CPU(cs);
704 CPUPPCState *env = &cpu->env;
705 struct kvm_one_reg reg;
706 int i;
707 int ret;
709 if (env->insns_flags & PPC_FLOAT) {
710 uint64_t fpscr = env->fpscr;
711 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
713 reg.id = KVM_REG_PPC_FPSCR;
714 reg.addr = (uintptr_t)&fpscr;
715 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
716 if (ret < 0) {
717 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
718 return ret;
721 for (i = 0; i < 32; i++) {
722 uint64_t vsr[2];
724 #ifdef HOST_WORDS_BIGENDIAN
725 vsr[0] = float64_val(env->fpr[i]);
726 vsr[1] = env->vsr[i];
727 #else
728 vsr[0] = env->vsr[i];
729 vsr[1] = float64_val(env->fpr[i]);
730 #endif
731 reg.addr = (uintptr_t) &vsr;
732 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
734 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
735 if (ret < 0) {
736 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
737 i, strerror(errno));
738 return ret;
743 if (env->insns_flags & PPC_ALTIVEC) {
744 reg.id = KVM_REG_PPC_VSCR;
745 reg.addr = (uintptr_t)&env->vscr;
746 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
747 if (ret < 0) {
748 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
749 return ret;
752 for (i = 0; i < 32; i++) {
753 reg.id = KVM_REG_PPC_VR(i);
754 reg.addr = (uintptr_t)&env->avr[i];
755 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
756 if (ret < 0) {
757 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
758 return ret;
763 return 0;
766 static int kvm_get_fp(CPUState *cs)
768 PowerPCCPU *cpu = POWERPC_CPU(cs);
769 CPUPPCState *env = &cpu->env;
770 struct kvm_one_reg reg;
771 int i;
772 int ret;
774 if (env->insns_flags & PPC_FLOAT) {
775 uint64_t fpscr;
776 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
778 reg.id = KVM_REG_PPC_FPSCR;
779 reg.addr = (uintptr_t)&fpscr;
780 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
781 if (ret < 0) {
782 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
783 return ret;
784 } else {
785 env->fpscr = fpscr;
788 for (i = 0; i < 32; i++) {
789 uint64_t vsr[2];
791 reg.addr = (uintptr_t) &vsr;
792 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
794 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
795 if (ret < 0) {
796 DPRINTF("Unable to get %s%d from KVM: %s\n",
797 vsx ? "VSR" : "FPR", i, strerror(errno));
798 return ret;
799 } else {
800 #ifdef HOST_WORDS_BIGENDIAN
801 env->fpr[i] = vsr[0];
802 if (vsx) {
803 env->vsr[i] = vsr[1];
805 #else
806 env->fpr[i] = vsr[1];
807 if (vsx) {
808 env->vsr[i] = vsr[0];
810 #endif
815 if (env->insns_flags & PPC_ALTIVEC) {
816 reg.id = KVM_REG_PPC_VSCR;
817 reg.addr = (uintptr_t)&env->vscr;
818 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
819 if (ret < 0) {
820 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
821 return ret;
824 for (i = 0; i < 32; i++) {
825 reg.id = KVM_REG_PPC_VR(i);
826 reg.addr = (uintptr_t)&env->avr[i];
827 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
828 if (ret < 0) {
829 DPRINTF("Unable to get VR%d from KVM: %s\n",
830 i, strerror(errno));
831 return ret;
836 return 0;
839 #if defined(TARGET_PPC64)
840 static int kvm_get_vpa(CPUState *cs)
842 PowerPCCPU *cpu = POWERPC_CPU(cs);
843 CPUPPCState *env = &cpu->env;
844 struct kvm_one_reg reg;
845 int ret;
847 reg.id = KVM_REG_PPC_VPA_ADDR;
848 reg.addr = (uintptr_t)&env->vpa_addr;
849 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
850 if (ret < 0) {
851 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
852 return ret;
855 assert((uintptr_t)&env->slb_shadow_size
856 == ((uintptr_t)&env->slb_shadow_addr + 8));
857 reg.id = KVM_REG_PPC_VPA_SLB;
858 reg.addr = (uintptr_t)&env->slb_shadow_addr;
859 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
860 if (ret < 0) {
861 DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
862 strerror(errno));
863 return ret;
866 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
867 reg.id = KVM_REG_PPC_VPA_DTL;
868 reg.addr = (uintptr_t)&env->dtl_addr;
869 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
870 if (ret < 0) {
871 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
872 strerror(errno));
873 return ret;
876 return 0;
879 static int kvm_put_vpa(CPUState *cs)
881 PowerPCCPU *cpu = POWERPC_CPU(cs);
882 CPUPPCState *env = &cpu->env;
883 struct kvm_one_reg reg;
884 int ret;
886 /* SLB shadow or DTL can't be registered unless a master VPA is
887 * registered. That means when restoring state, if a VPA *is*
888 * registered, we need to set that up first. If not, we need to
889 * deregister the others before deregistering the master VPA */
890 assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
892 if (env->vpa_addr) {
893 reg.id = KVM_REG_PPC_VPA_ADDR;
894 reg.addr = (uintptr_t)&env->vpa_addr;
895 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
896 if (ret < 0) {
897 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
898 return ret;
902 assert((uintptr_t)&env->slb_shadow_size
903 == ((uintptr_t)&env->slb_shadow_addr + 8));
904 reg.id = KVM_REG_PPC_VPA_SLB;
905 reg.addr = (uintptr_t)&env->slb_shadow_addr;
906 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
907 if (ret < 0) {
908 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
909 return ret;
912 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
913 reg.id = KVM_REG_PPC_VPA_DTL;
914 reg.addr = (uintptr_t)&env->dtl_addr;
915 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
916 if (ret < 0) {
917 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
918 strerror(errno));
919 return ret;
922 if (!env->vpa_addr) {
923 reg.id = KVM_REG_PPC_VPA_ADDR;
924 reg.addr = (uintptr_t)&env->vpa_addr;
925 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
926 if (ret < 0) {
927 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
928 return ret;
932 return 0;
934 #endif /* TARGET_PPC64 */
936 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
938 CPUPPCState *env = &cpu->env;
939 struct kvm_sregs sregs;
940 int i;
942 sregs.pvr = env->spr[SPR_PVR];
944 if (cpu->vhyp) {
945 PPCVirtualHypervisorClass *vhc =
946 PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
947 sregs.u.s.sdr1 = vhc->encode_hpt_for_kvm_pr(cpu->vhyp);
948 } else {
949 sregs.u.s.sdr1 = env->spr[SPR_SDR1];
952 /* Sync SLB */
953 #ifdef TARGET_PPC64
954 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
955 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
956 if (env->slb[i].esid & SLB_ESID_V) {
957 sregs.u.s.ppc64.slb[i].slbe |= i;
959 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
961 #endif
963 /* Sync SRs */
964 for (i = 0; i < 16; i++) {
965 sregs.u.s.ppc32.sr[i] = env->sr[i];
968 /* Sync BATs */
969 for (i = 0; i < 8; i++) {
970 /* Beware. We have to swap upper and lower bits here */
971 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
972 | env->DBAT[1][i];
973 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
974 | env->IBAT[1][i];
977 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
980 int kvm_arch_put_registers(CPUState *cs, int level)
982 PowerPCCPU *cpu = POWERPC_CPU(cs);
983 CPUPPCState *env = &cpu->env;
984 struct kvm_regs regs;
985 int ret;
986 int i;
988 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
989 if (ret < 0) {
990 return ret;
993 regs.ctr = env->ctr;
994 regs.lr = env->lr;
995 regs.xer = cpu_read_xer(env);
996 regs.msr = env->msr;
997 regs.pc = env->nip;
999 regs.srr0 = env->spr[SPR_SRR0];
1000 regs.srr1 = env->spr[SPR_SRR1];
1002 regs.sprg0 = env->spr[SPR_SPRG0];
1003 regs.sprg1 = env->spr[SPR_SPRG1];
1004 regs.sprg2 = env->spr[SPR_SPRG2];
1005 regs.sprg3 = env->spr[SPR_SPRG3];
1006 regs.sprg4 = env->spr[SPR_SPRG4];
1007 regs.sprg5 = env->spr[SPR_SPRG5];
1008 regs.sprg6 = env->spr[SPR_SPRG6];
1009 regs.sprg7 = env->spr[SPR_SPRG7];
1011 regs.pid = env->spr[SPR_BOOKE_PID];
1013 for (i = 0;i < 32; i++)
1014 regs.gpr[i] = env->gpr[i];
1016 regs.cr = 0;
1017 for (i = 0; i < 8; i++) {
1018 regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
1021 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
1022 if (ret < 0)
1023 return ret;
1025 kvm_put_fp(cs);
1027 if (env->tlb_dirty) {
1028 kvm_sw_tlb_put(cpu);
1029 env->tlb_dirty = false;
1032 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
1033 ret = kvmppc_put_books_sregs(cpu);
1034 if (ret < 0) {
1035 return ret;
1039 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
1040 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1043 if (cap_one_reg) {
1044 int i;
1046 /* We deliberately ignore errors here, for kernels which have
1047 * the ONE_REG calls, but don't support the specific
1048 * registers, there's a reasonable chance things will still
1049 * work, at least until we try to migrate. */
1050 for (i = 0; i < 1024; i++) {
1051 uint64_t id = env->spr_cb[i].one_reg_id;
1053 if (id != 0) {
1054 kvm_put_one_spr(cs, id, i);
1058 #ifdef TARGET_PPC64
1059 if (msr_ts) {
1060 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1061 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1063 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1064 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1066 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1067 kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1068 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1069 kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1070 kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1071 kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1072 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1073 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1074 kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1075 kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1078 if (cap_papr) {
1079 if (kvm_put_vpa(cs) < 0) {
1080 DPRINTF("Warning: Unable to set VPA information to KVM\n");
1084 kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1085 #endif /* TARGET_PPC64 */
1088 return ret;
1091 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1093 env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1096 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1098 CPUPPCState *env = &cpu->env;
1099 struct kvm_sregs sregs;
1100 int ret;
1102 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1103 if (ret < 0) {
1104 return ret;
1107 if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1108 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1109 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1110 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1111 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1112 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1113 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1114 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1115 env->spr[SPR_DECR] = sregs.u.e.dec;
1116 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1117 env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1118 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1121 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1122 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1123 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1124 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1125 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1126 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1129 if (sregs.u.e.features & KVM_SREGS_E_64) {
1130 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1133 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1134 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1137 if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1138 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1139 kvm_sync_excp(env, POWERPC_EXCP_CRITICAL, SPR_BOOKE_IVOR0);
1140 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1141 kvm_sync_excp(env, POWERPC_EXCP_MCHECK, SPR_BOOKE_IVOR1);
1142 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1143 kvm_sync_excp(env, POWERPC_EXCP_DSI, SPR_BOOKE_IVOR2);
1144 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1145 kvm_sync_excp(env, POWERPC_EXCP_ISI, SPR_BOOKE_IVOR3);
1146 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1147 kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL, SPR_BOOKE_IVOR4);
1148 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1149 kvm_sync_excp(env, POWERPC_EXCP_ALIGN, SPR_BOOKE_IVOR5);
1150 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1151 kvm_sync_excp(env, POWERPC_EXCP_PROGRAM, SPR_BOOKE_IVOR6);
1152 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1153 kvm_sync_excp(env, POWERPC_EXCP_FPU, SPR_BOOKE_IVOR7);
1154 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1155 kvm_sync_excp(env, POWERPC_EXCP_SYSCALL, SPR_BOOKE_IVOR8);
1156 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1157 kvm_sync_excp(env, POWERPC_EXCP_APU, SPR_BOOKE_IVOR9);
1158 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1159 kvm_sync_excp(env, POWERPC_EXCP_DECR, SPR_BOOKE_IVOR10);
1160 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1161 kvm_sync_excp(env, POWERPC_EXCP_FIT, SPR_BOOKE_IVOR11);
1162 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1163 kvm_sync_excp(env, POWERPC_EXCP_WDT, SPR_BOOKE_IVOR12);
1164 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1165 kvm_sync_excp(env, POWERPC_EXCP_DTLB, SPR_BOOKE_IVOR13);
1166 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1167 kvm_sync_excp(env, POWERPC_EXCP_ITLB, SPR_BOOKE_IVOR14);
1168 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1169 kvm_sync_excp(env, POWERPC_EXCP_DEBUG, SPR_BOOKE_IVOR15);
1171 if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1172 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1173 kvm_sync_excp(env, POWERPC_EXCP_SPEU, SPR_BOOKE_IVOR32);
1174 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1175 kvm_sync_excp(env, POWERPC_EXCP_EFPDI, SPR_BOOKE_IVOR33);
1176 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1177 kvm_sync_excp(env, POWERPC_EXCP_EFPRI, SPR_BOOKE_IVOR34);
1180 if (sregs.u.e.features & KVM_SREGS_E_PM) {
1181 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1182 kvm_sync_excp(env, POWERPC_EXCP_EPERFM, SPR_BOOKE_IVOR35);
1185 if (sregs.u.e.features & KVM_SREGS_E_PC) {
1186 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1187 kvm_sync_excp(env, POWERPC_EXCP_DOORI, SPR_BOOKE_IVOR36);
1188 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1189 kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1193 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1194 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1195 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1196 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1197 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1198 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1199 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1200 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1201 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1202 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1203 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1206 if (sregs.u.e.features & KVM_SREGS_EXP) {
1207 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1210 if (sregs.u.e.features & KVM_SREGS_E_PD) {
1211 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1212 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1215 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1216 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1217 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1218 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1220 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1221 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1222 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1226 return 0;
1229 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1231 CPUPPCState *env = &cpu->env;
1232 struct kvm_sregs sregs;
1233 int ret;
1234 int i;
1236 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1237 if (ret < 0) {
1238 return ret;
1241 if (!cpu->vhyp) {
1242 ppc_store_sdr1(env, sregs.u.s.sdr1);
1245 /* Sync SLB */
1246 #ifdef TARGET_PPC64
1248 * The packed SLB array we get from KVM_GET_SREGS only contains
1249 * information about valid entries. So we flush our internal copy
1250 * to get rid of stale ones, then put all valid SLB entries back
1251 * in.
1253 memset(env->slb, 0, sizeof(env->slb));
1254 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1255 target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1256 target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1258 * Only restore valid entries
1260 if (rb & SLB_ESID_V) {
1261 ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1264 #endif
1266 /* Sync SRs */
1267 for (i = 0; i < 16; i++) {
1268 env->sr[i] = sregs.u.s.ppc32.sr[i];
1271 /* Sync BATs */
1272 for (i = 0; i < 8; i++) {
1273 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1274 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1275 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1276 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1279 return 0;
1282 int kvm_arch_get_registers(CPUState *cs)
1284 PowerPCCPU *cpu = POWERPC_CPU(cs);
1285 CPUPPCState *env = &cpu->env;
1286 struct kvm_regs regs;
1287 uint32_t cr;
1288 int i, ret;
1290 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1291 if (ret < 0)
1292 return ret;
1294 cr = regs.cr;
1295 for (i = 7; i >= 0; i--) {
1296 env->crf[i] = cr & 15;
1297 cr >>= 4;
1300 env->ctr = regs.ctr;
1301 env->lr = regs.lr;
1302 cpu_write_xer(env, regs.xer);
1303 env->msr = regs.msr;
1304 env->nip = regs.pc;
1306 env->spr[SPR_SRR0] = regs.srr0;
1307 env->spr[SPR_SRR1] = regs.srr1;
1309 env->spr[SPR_SPRG0] = regs.sprg0;
1310 env->spr[SPR_SPRG1] = regs.sprg1;
1311 env->spr[SPR_SPRG2] = regs.sprg2;
1312 env->spr[SPR_SPRG3] = regs.sprg3;
1313 env->spr[SPR_SPRG4] = regs.sprg4;
1314 env->spr[SPR_SPRG5] = regs.sprg5;
1315 env->spr[SPR_SPRG6] = regs.sprg6;
1316 env->spr[SPR_SPRG7] = regs.sprg7;
1318 env->spr[SPR_BOOKE_PID] = regs.pid;
1320 for (i = 0;i < 32; i++)
1321 env->gpr[i] = regs.gpr[i];
1323 kvm_get_fp(cs);
1325 if (cap_booke_sregs) {
1326 ret = kvmppc_get_booke_sregs(cpu);
1327 if (ret < 0) {
1328 return ret;
1332 if (cap_segstate) {
1333 ret = kvmppc_get_books_sregs(cpu);
1334 if (ret < 0) {
1335 return ret;
1339 if (cap_hior) {
1340 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1343 if (cap_one_reg) {
1344 int i;
1346 /* We deliberately ignore errors here, for kernels which have
1347 * the ONE_REG calls, but don't support the specific
1348 * registers, there's a reasonable chance things will still
1349 * work, at least until we try to migrate. */
1350 for (i = 0; i < 1024; i++) {
1351 uint64_t id = env->spr_cb[i].one_reg_id;
1353 if (id != 0) {
1354 kvm_get_one_spr(cs, id, i);
1358 #ifdef TARGET_PPC64
1359 if (msr_ts) {
1360 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1361 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1363 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1364 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1366 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1367 kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1368 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1369 kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1370 kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1371 kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1372 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1373 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1374 kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1375 kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1378 if (cap_papr) {
1379 if (kvm_get_vpa(cs) < 0) {
1380 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1384 kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1385 #endif
1388 return 0;
1391 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1393 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1395 if (irq != PPC_INTERRUPT_EXT) {
1396 return 0;
1399 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1400 return 0;
1403 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1405 return 0;
1408 #if defined(TARGET_PPCEMB)
1409 #define PPC_INPUT_INT PPC40x_INPUT_INT
1410 #elif defined(TARGET_PPC64)
1411 #define PPC_INPUT_INT PPC970_INPUT_INT
1412 #else
1413 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1414 #endif
1416 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1418 PowerPCCPU *cpu = POWERPC_CPU(cs);
1419 CPUPPCState *env = &cpu->env;
1420 int r;
1421 unsigned irq;
1423 qemu_mutex_lock_iothread();
1425 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1426 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1427 if (!cap_interrupt_level &&
1428 run->ready_for_interrupt_injection &&
1429 (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1430 (env->irq_input_state & (1<<PPC_INPUT_INT)))
1432 /* For now KVM disregards the 'irq' argument. However, in the
1433 * future KVM could cache it in-kernel to avoid a heavyweight exit
1434 * when reading the UIC.
1436 irq = KVM_INTERRUPT_SET;
1438 DPRINTF("injected interrupt %d\n", irq);
1439 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1440 if (r < 0) {
1441 printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1444 /* Always wake up soon in case the interrupt was level based */
1445 timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1446 (NANOSECONDS_PER_SECOND / 50));
1449 /* We don't know if there are more interrupts pending after this. However,
1450 * the guest will return to userspace in the course of handling this one
1451 * anyways, so we will get a chance to deliver the rest. */
1453 qemu_mutex_unlock_iothread();
1456 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1458 return MEMTXATTRS_UNSPECIFIED;
1461 int kvm_arch_process_async_events(CPUState *cs)
1463 return cs->halted;
1466 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1468 CPUState *cs = CPU(cpu);
1469 CPUPPCState *env = &cpu->env;
1471 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1472 cs->halted = 1;
1473 cs->exception_index = EXCP_HLT;
1476 return 0;
1479 /* map dcr access to existing qemu dcr emulation */
1480 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1482 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1483 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1485 return 0;
1488 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1490 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1491 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1493 return 0;
1496 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1498 /* Mixed endian case is not handled */
1499 uint32_t sc = debug_inst_opcode;
1501 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1502 sizeof(sc), 0) ||
1503 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1504 return -EINVAL;
1507 return 0;
1510 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1512 uint32_t sc;
1514 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1515 sc != debug_inst_opcode ||
1516 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1517 sizeof(sc), 1)) {
1518 return -EINVAL;
1521 return 0;
1524 static int find_hw_breakpoint(target_ulong addr, int type)
1526 int n;
1528 assert((nb_hw_breakpoint + nb_hw_watchpoint)
1529 <= ARRAY_SIZE(hw_debug_points));
1531 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1532 if (hw_debug_points[n].addr == addr &&
1533 hw_debug_points[n].type == type) {
1534 return n;
1538 return -1;
1541 static int find_hw_watchpoint(target_ulong addr, int *flag)
1543 int n;
1545 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1546 if (n >= 0) {
1547 *flag = BP_MEM_ACCESS;
1548 return n;
1551 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1552 if (n >= 0) {
1553 *flag = BP_MEM_WRITE;
1554 return n;
1557 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1558 if (n >= 0) {
1559 *flag = BP_MEM_READ;
1560 return n;
1563 return -1;
1566 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1567 target_ulong len, int type)
1569 if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1570 return -ENOBUFS;
1573 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1574 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1576 switch (type) {
1577 case GDB_BREAKPOINT_HW:
1578 if (nb_hw_breakpoint >= max_hw_breakpoint) {
1579 return -ENOBUFS;
1582 if (find_hw_breakpoint(addr, type) >= 0) {
1583 return -EEXIST;
1586 nb_hw_breakpoint++;
1587 break;
1589 case GDB_WATCHPOINT_WRITE:
1590 case GDB_WATCHPOINT_READ:
1591 case GDB_WATCHPOINT_ACCESS:
1592 if (nb_hw_watchpoint >= max_hw_watchpoint) {
1593 return -ENOBUFS;
1596 if (find_hw_breakpoint(addr, type) >= 0) {
1597 return -EEXIST;
1600 nb_hw_watchpoint++;
1601 break;
1603 default:
1604 return -ENOSYS;
1607 return 0;
1610 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1611 target_ulong len, int type)
1613 int n;
1615 n = find_hw_breakpoint(addr, type);
1616 if (n < 0) {
1617 return -ENOENT;
1620 switch (type) {
1621 case GDB_BREAKPOINT_HW:
1622 nb_hw_breakpoint--;
1623 break;
1625 case GDB_WATCHPOINT_WRITE:
1626 case GDB_WATCHPOINT_READ:
1627 case GDB_WATCHPOINT_ACCESS:
1628 nb_hw_watchpoint--;
1629 break;
1631 default:
1632 return -ENOSYS;
1634 hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1636 return 0;
1639 void kvm_arch_remove_all_hw_breakpoints(void)
1641 nb_hw_breakpoint = nb_hw_watchpoint = 0;
1644 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1646 int n;
1648 /* Software Breakpoint updates */
1649 if (kvm_sw_breakpoints_active(cs)) {
1650 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1653 assert((nb_hw_breakpoint + nb_hw_watchpoint)
1654 <= ARRAY_SIZE(hw_debug_points));
1655 assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1657 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1658 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1659 memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1660 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1661 switch (hw_debug_points[n].type) {
1662 case GDB_BREAKPOINT_HW:
1663 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1664 break;
1665 case GDB_WATCHPOINT_WRITE:
1666 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1667 break;
1668 case GDB_WATCHPOINT_READ:
1669 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1670 break;
1671 case GDB_WATCHPOINT_ACCESS:
1672 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1673 KVMPPC_DEBUG_WATCH_READ;
1674 break;
1675 default:
1676 cpu_abort(cs, "Unsupported breakpoint type\n");
1678 dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1683 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1685 CPUState *cs = CPU(cpu);
1686 CPUPPCState *env = &cpu->env;
1687 struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1688 int handle = 0;
1689 int n;
1690 int flag = 0;
1692 if (cs->singlestep_enabled) {
1693 handle = 1;
1694 } else if (arch_info->status) {
1695 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1696 if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1697 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1698 if (n >= 0) {
1699 handle = 1;
1701 } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1702 KVMPPC_DEBUG_WATCH_WRITE)) {
1703 n = find_hw_watchpoint(arch_info->address, &flag);
1704 if (n >= 0) {
1705 handle = 1;
1706 cs->watchpoint_hit = &hw_watchpoint;
1707 hw_watchpoint.vaddr = hw_debug_points[n].addr;
1708 hw_watchpoint.flags = flag;
1712 } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1713 handle = 1;
1714 } else {
1715 /* QEMU is not able to handle debug exception, so inject
1716 * program exception to guest;
1717 * Yes program exception NOT debug exception !!
1718 * When QEMU is using debug resources then debug exception must
1719 * be always set. To achieve this we set MSR_DE and also set
1720 * MSRP_DEP so guest cannot change MSR_DE.
1721 * When emulating debug resource for guest we want guest
1722 * to control MSR_DE (enable/disable debug interrupt on need).
1723 * Supporting both configurations are NOT possible.
1724 * So the result is that we cannot share debug resources
1725 * between QEMU and Guest on BOOKE architecture.
1726 * In the current design QEMU gets the priority over guest,
1727 * this means that if QEMU is using debug resources then guest
1728 * cannot use them;
1729 * For software breakpoint QEMU uses a privileged instruction;
1730 * So there cannot be any reason that we are here for guest
1731 * set debug exception, only possibility is guest executed a
1732 * privileged / illegal instruction and that's why we are
1733 * injecting a program interrupt.
1736 cpu_synchronize_state(cs);
1737 /* env->nip is PC, so increment this by 4 to use
1738 * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1740 env->nip += 4;
1741 cs->exception_index = POWERPC_EXCP_PROGRAM;
1742 env->error_code = POWERPC_EXCP_INVAL;
1743 ppc_cpu_do_interrupt(cs);
1746 return handle;
1749 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1751 PowerPCCPU *cpu = POWERPC_CPU(cs);
1752 CPUPPCState *env = &cpu->env;
1753 int ret;
1755 qemu_mutex_lock_iothread();
1757 switch (run->exit_reason) {
1758 case KVM_EXIT_DCR:
1759 if (run->dcr.is_write) {
1760 DPRINTF("handle dcr write\n");
1761 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1762 } else {
1763 DPRINTF("handle dcr read\n");
1764 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1766 break;
1767 case KVM_EXIT_HLT:
1768 DPRINTF("handle halt\n");
1769 ret = kvmppc_handle_halt(cpu);
1770 break;
1771 #if defined(TARGET_PPC64)
1772 case KVM_EXIT_PAPR_HCALL:
1773 DPRINTF("handle PAPR hypercall\n");
1774 run->papr_hcall.ret = spapr_hypercall(cpu,
1775 run->papr_hcall.nr,
1776 run->papr_hcall.args);
1777 ret = 0;
1778 break;
1779 #endif
1780 case KVM_EXIT_EPR:
1781 DPRINTF("handle epr\n");
1782 run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1783 ret = 0;
1784 break;
1785 case KVM_EXIT_WATCHDOG:
1786 DPRINTF("handle watchdog expiry\n");
1787 watchdog_perform_action();
1788 ret = 0;
1789 break;
1791 case KVM_EXIT_DEBUG:
1792 DPRINTF("handle debug exception\n");
1793 if (kvm_handle_debug(cpu, run)) {
1794 ret = EXCP_DEBUG;
1795 break;
1797 /* re-enter, this exception was guest-internal */
1798 ret = 0;
1799 break;
1801 default:
1802 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1803 ret = -1;
1804 break;
1807 qemu_mutex_unlock_iothread();
1808 return ret;
1811 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1813 CPUState *cs = CPU(cpu);
1814 uint32_t bits = tsr_bits;
1815 struct kvm_one_reg reg = {
1816 .id = KVM_REG_PPC_OR_TSR,
1817 .addr = (uintptr_t) &bits,
1820 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1823 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1826 CPUState *cs = CPU(cpu);
1827 uint32_t bits = tsr_bits;
1828 struct kvm_one_reg reg = {
1829 .id = KVM_REG_PPC_CLEAR_TSR,
1830 .addr = (uintptr_t) &bits,
1833 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1836 int kvmppc_set_tcr(PowerPCCPU *cpu)
1838 CPUState *cs = CPU(cpu);
1839 CPUPPCState *env = &cpu->env;
1840 uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1842 struct kvm_one_reg reg = {
1843 .id = KVM_REG_PPC_TCR,
1844 .addr = (uintptr_t) &tcr,
1847 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1850 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1852 CPUState *cs = CPU(cpu);
1853 int ret;
1855 if (!kvm_enabled()) {
1856 return -1;
1859 if (!cap_ppc_watchdog) {
1860 printf("warning: KVM does not support watchdog");
1861 return -1;
1864 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1865 if (ret < 0) {
1866 fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1867 __func__, strerror(-ret));
1868 return ret;
1871 return ret;
1874 static int read_cpuinfo(const char *field, char *value, int len)
1876 FILE *f;
1877 int ret = -1;
1878 int field_len = strlen(field);
1879 char line[512];
1881 f = fopen("/proc/cpuinfo", "r");
1882 if (!f) {
1883 return -1;
1886 do {
1887 if (!fgets(line, sizeof(line), f)) {
1888 break;
1890 if (!strncmp(line, field, field_len)) {
1891 pstrcpy(value, len, line);
1892 ret = 0;
1893 break;
1895 } while(*line);
1897 fclose(f);
1899 return ret;
1902 uint32_t kvmppc_get_tbfreq(void)
1904 char line[512];
1905 char *ns;
1906 uint32_t retval = NANOSECONDS_PER_SECOND;
1908 if (read_cpuinfo("timebase", line, sizeof(line))) {
1909 return retval;
1912 if (!(ns = strchr(line, ':'))) {
1913 return retval;
1916 ns++;
1918 return atoi(ns);
1921 bool kvmppc_get_host_serial(char **value)
1923 return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1924 NULL);
1927 bool kvmppc_get_host_model(char **value)
1929 return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1932 /* Try to find a device tree node for a CPU with clock-frequency property */
1933 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1935 struct dirent *dirp;
1936 DIR *dp;
1938 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1939 printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1940 return -1;
1943 buf[0] = '\0';
1944 while ((dirp = readdir(dp)) != NULL) {
1945 FILE *f;
1946 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1947 dirp->d_name);
1948 f = fopen(buf, "r");
1949 if (f) {
1950 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1951 fclose(f);
1952 break;
1954 buf[0] = '\0';
1956 closedir(dp);
1957 if (buf[0] == '\0') {
1958 printf("Unknown host!\n");
1959 return -1;
1962 return 0;
1965 static uint64_t kvmppc_read_int_dt(const char *filename)
1967 union {
1968 uint32_t v32;
1969 uint64_t v64;
1970 } u;
1971 FILE *f;
1972 int len;
1974 f = fopen(filename, "rb");
1975 if (!f) {
1976 return -1;
1979 len = fread(&u, 1, sizeof(u), f);
1980 fclose(f);
1981 switch (len) {
1982 case 4:
1983 /* property is a 32-bit quantity */
1984 return be32_to_cpu(u.v32);
1985 case 8:
1986 return be64_to_cpu(u.v64);
1989 return 0;
1992 /* Read a CPU node property from the host device tree that's a single
1993 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1994 * (can't find or open the property, or doesn't understand the
1995 * format) */
1996 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1998 char buf[PATH_MAX], *tmp;
1999 uint64_t val;
2001 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
2002 return -1;
2005 tmp = g_strdup_printf("%s/%s", buf, propname);
2006 val = kvmppc_read_int_dt(tmp);
2007 g_free(tmp);
2009 return val;
2012 uint64_t kvmppc_get_clockfreq(void)
2014 return kvmppc_read_int_cpu_dt("clock-frequency");
2017 uint32_t kvmppc_get_vmx(void)
2019 return kvmppc_read_int_cpu_dt("ibm,vmx");
2022 uint32_t kvmppc_get_dfp(void)
2024 return kvmppc_read_int_cpu_dt("ibm,dfp");
2027 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
2029 PowerPCCPU *cpu = ppc_env_get_cpu(env);
2030 CPUState *cs = CPU(cpu);
2032 if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
2033 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
2034 return 0;
2037 return 1;
2040 int kvmppc_get_hasidle(CPUPPCState *env)
2042 struct kvm_ppc_pvinfo pvinfo;
2044 if (!kvmppc_get_pvinfo(env, &pvinfo) &&
2045 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
2046 return 1;
2049 return 0;
2052 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
2054 uint32_t *hc = (uint32_t*)buf;
2055 struct kvm_ppc_pvinfo pvinfo;
2057 if (!kvmppc_get_pvinfo(env, &pvinfo)) {
2058 memcpy(buf, pvinfo.hcall, buf_len);
2059 return 0;
2063 * Fallback to always fail hypercalls regardless of endianness:
2065 * tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
2066 * li r3, -1
2067 * b .+8 (becomes nop in wrong endian)
2068 * bswap32(li r3, -1)
2071 hc[0] = cpu_to_be32(0x08000048);
2072 hc[1] = cpu_to_be32(0x3860ffff);
2073 hc[2] = cpu_to_be32(0x48000008);
2074 hc[3] = cpu_to_be32(bswap32(0x3860ffff));
2076 return 1;
2079 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
2081 return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2084 void kvmppc_enable_logical_ci_hcalls(void)
2087 * FIXME: it would be nice if we could detect the cases where
2088 * we're using a device which requires the in kernel
2089 * implementation of these hcalls, but the kernel lacks them and
2090 * produce a warning.
2092 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2093 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2096 void kvmppc_enable_set_mode_hcall(void)
2098 kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2101 void kvmppc_enable_clear_ref_mod_hcalls(void)
2103 kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
2104 kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
2107 void kvmppc_set_papr(PowerPCCPU *cpu)
2109 CPUState *cs = CPU(cpu);
2110 int ret;
2112 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2113 if (ret) {
2114 error_report("This vCPU type or KVM version does not support PAPR");
2115 exit(1);
2118 /* Update the capability flag so we sync the right information
2119 * with kvm */
2120 cap_papr = 1;
2123 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr)
2125 return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr);
2128 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2130 CPUState *cs = CPU(cpu);
2131 int ret;
2133 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2134 if (ret && mpic_proxy) {
2135 error_report("This KVM version does not support EPR");
2136 exit(1);
2140 int kvmppc_smt_threads(void)
2142 return cap_ppc_smt ? cap_ppc_smt : 1;
2145 int kvmppc_set_smt_threads(int smt)
2147 int ret;
2149 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_SMT, 0, smt, 0);
2150 if (!ret) {
2151 cap_ppc_smt = smt;
2153 return ret;
2156 void kvmppc_hint_smt_possible(Error **errp)
2158 int i;
2159 GString *g;
2160 char *s;
2162 assert(kvm_enabled());
2163 if (cap_ppc_smt_possible) {
2164 g = g_string_new("Available VSMT modes:");
2165 for (i = 63; i >= 0; i--) {
2166 if ((1UL << i) & cap_ppc_smt_possible) {
2167 g_string_append_printf(g, " %lu", (1UL << i));
2170 s = g_string_free(g, false);
2171 error_append_hint(errp, "%s.\n", s);
2172 g_free(s);
2173 } else {
2174 error_append_hint(errp,
2175 "This KVM seems to be too old to support VSMT.\n");
2180 #ifdef TARGET_PPC64
2181 off_t kvmppc_alloc_rma(void **rma)
2183 off_t size;
2184 int fd;
2185 struct kvm_allocate_rma ret;
2187 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2188 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2189 * not necessary on this hardware
2190 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2192 * FIXME: We should allow the user to force contiguous RMA
2193 * allocation in the cap_ppc_rma==1 case.
2195 if (cap_ppc_rma < 2) {
2196 return 0;
2199 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
2200 if (fd < 0) {
2201 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2202 strerror(errno));
2203 return -1;
2206 size = MIN(ret.rma_size, 256ul << 20);
2208 *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2209 if (*rma == MAP_FAILED) {
2210 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
2211 return -1;
2214 return size;
2217 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2219 struct kvm_ppc_smmu_info info;
2220 long rampagesize, best_page_shift;
2221 int i;
2223 if (cap_ppc_rma >= 2) {
2224 return current_size;
2227 /* Find the largest hardware supported page size that's less than
2228 * or equal to the (logical) backing page size of guest RAM */
2229 kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2230 rampagesize = qemu_getrampagesize();
2231 best_page_shift = 0;
2233 for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2234 struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2236 if (!sps->page_shift) {
2237 continue;
2240 if ((sps->page_shift > best_page_shift)
2241 && ((1UL << sps->page_shift) <= rampagesize)) {
2242 best_page_shift = sps->page_shift;
2246 return MIN(current_size,
2247 1ULL << (best_page_shift + hash_shift - 7));
2249 #endif
2251 bool kvmppc_spapr_use_multitce(void)
2253 return cap_spapr_multitce;
2256 int kvmppc_spapr_enable_inkernel_multitce(void)
2258 int ret;
2260 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2261 H_PUT_TCE_INDIRECT, 1);
2262 if (!ret) {
2263 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2264 H_STUFF_TCE, 1);
2267 return ret;
2270 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
2271 uint64_t bus_offset, uint32_t nb_table,
2272 int *pfd, bool need_vfio)
2274 long len;
2275 int fd;
2276 void *table;
2278 /* Must set fd to -1 so we don't try to munmap when called for
2279 * destroying the table, which the upper layers -will- do
2281 *pfd = -1;
2282 if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2283 return NULL;
2286 if (cap_spapr_tce_64) {
2287 struct kvm_create_spapr_tce_64 args = {
2288 .liobn = liobn,
2289 .page_shift = page_shift,
2290 .offset = bus_offset >> page_shift,
2291 .size = nb_table,
2292 .flags = 0
2294 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args);
2295 if (fd < 0) {
2296 fprintf(stderr,
2297 "KVM: Failed to create TCE64 table for liobn 0x%x\n",
2298 liobn);
2299 return NULL;
2301 } else if (cap_spapr_tce) {
2302 uint64_t window_size = (uint64_t) nb_table << page_shift;
2303 struct kvm_create_spapr_tce args = {
2304 .liobn = liobn,
2305 .window_size = window_size,
2307 if ((window_size != args.window_size) || bus_offset) {
2308 return NULL;
2310 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2311 if (fd < 0) {
2312 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2313 liobn);
2314 return NULL;
2316 } else {
2317 return NULL;
2320 len = nb_table * sizeof(uint64_t);
2321 /* FIXME: round this up to page size */
2323 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2324 if (table == MAP_FAILED) {
2325 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2326 liobn);
2327 close(fd);
2328 return NULL;
2331 *pfd = fd;
2332 return table;
2335 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2337 long len;
2339 if (fd < 0) {
2340 return -1;
2343 len = nb_table * sizeof(uint64_t);
2344 if ((munmap(table, len) < 0) ||
2345 (close(fd) < 0)) {
2346 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2347 strerror(errno));
2348 /* Leak the table */
2351 return 0;
2354 int kvmppc_reset_htab(int shift_hint)
2356 uint32_t shift = shift_hint;
2358 if (!kvm_enabled()) {
2359 /* Full emulation, tell caller to allocate htab itself */
2360 return 0;
2362 if (kvm_vm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2363 int ret;
2364 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2365 if (ret == -ENOTTY) {
2366 /* At least some versions of PR KVM advertise the
2367 * capability, but don't implement the ioctl(). Oops.
2368 * Return 0 so that we allocate the htab in qemu, as is
2369 * correct for PR. */
2370 return 0;
2371 } else if (ret < 0) {
2372 return ret;
2374 return shift;
2377 /* We have a kernel that predates the htab reset calls. For PR
2378 * KVM, we need to allocate the htab ourselves, for an HV KVM of
2379 * this era, it has allocated a 16MB fixed size hash table already. */
2380 if (kvmppc_is_pr(kvm_state)) {
2381 /* PR - tell caller to allocate htab */
2382 return 0;
2383 } else {
2384 /* HV - assume 16MB kernel allocated htab */
2385 return 24;
2389 static inline uint32_t mfpvr(void)
2391 uint32_t pvr;
2393 asm ("mfpvr %0"
2394 : "=r"(pvr));
2395 return pvr;
2398 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2400 if (on) {
2401 *word |= flags;
2402 } else {
2403 *word &= ~flags;
2407 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2409 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2410 uint32_t vmx = kvmppc_get_vmx();
2411 uint32_t dfp = kvmppc_get_dfp();
2412 uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2413 uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2415 /* Now fix up the class with information we can query from the host */
2416 pcc->pvr = mfpvr();
2418 if (vmx != -1) {
2419 /* Only override when we know what the host supports */
2420 alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
2421 alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
2423 if (dfp != -1) {
2424 /* Only override when we know what the host supports */
2425 alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
2428 if (dcache_size != -1) {
2429 pcc->l1_dcache_size = dcache_size;
2432 if (icache_size != -1) {
2433 pcc->l1_icache_size = icache_size;
2436 #if defined(TARGET_PPC64)
2437 pcc->radix_page_info = kvm_get_radix_page_info();
2439 if ((pcc->pvr & 0xffffff00) == CPU_POWERPC_POWER9_DD1) {
2441 * POWER9 DD1 has some bugs which make it not really ISA 3.00
2442 * compliant. More importantly, advertising ISA 3.00
2443 * architected mode may prevent guests from activating
2444 * necessary DD1 workarounds.
2446 pcc->pcr_supported &= ~(PCR_COMPAT_3_00 | PCR_COMPAT_2_07
2447 | PCR_COMPAT_2_06 | PCR_COMPAT_2_05);
2449 #endif /* defined(TARGET_PPC64) */
2452 bool kvmppc_has_cap_epr(void)
2454 return cap_epr;
2457 bool kvmppc_has_cap_fixup_hcalls(void)
2459 return cap_fixup_hcalls;
2462 bool kvmppc_has_cap_htm(void)
2464 return cap_htm;
2467 bool kvmppc_has_cap_mmu_radix(void)
2469 return cap_mmu_radix;
2472 bool kvmppc_has_cap_mmu_hash_v3(void)
2474 return cap_mmu_hash_v3;
2477 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2479 uint32_t host_pvr = mfpvr();
2480 PowerPCCPUClass *pvr_pcc;
2482 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2483 if (pvr_pcc == NULL) {
2484 pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2487 return pvr_pcc;
2490 static int kvm_ppc_register_host_cpu_type(void)
2492 TypeInfo type_info = {
2493 .name = TYPE_HOST_POWERPC_CPU,
2494 .class_init = kvmppc_host_cpu_class_init,
2496 PowerPCCPUClass *pvr_pcc;
2497 ObjectClass *oc;
2498 DeviceClass *dc;
2499 int i;
2501 pvr_pcc = kvm_ppc_get_host_cpu_class();
2502 if (pvr_pcc == NULL) {
2503 return -1;
2505 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2506 type_register(&type_info);
2508 oc = object_class_by_name(type_info.name);
2509 g_assert(oc);
2511 #if defined(TARGET_PPC64)
2512 type_info.name = g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE, "host");
2513 type_info.parent = TYPE_SPAPR_CPU_CORE,
2514 type_info.instance_size = sizeof(sPAPRCPUCore);
2515 type_info.instance_init = NULL;
2516 type_info.class_init = spapr_cpu_core_class_init;
2517 type_info.class_data = (void *) "host";
2518 type_register(&type_info);
2519 g_free((void *)type_info.name);
2520 #endif
2523 * Update generic CPU family class alias (e.g. on a POWER8NVL host,
2524 * we want "POWER8" to be a "family" alias that points to the current
2525 * host CPU type, too)
2527 dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc));
2528 for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) {
2529 if (strcasecmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) {
2530 char *suffix;
2532 ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc));
2533 suffix = strstr(ppc_cpu_aliases[i].model, POWERPC_CPU_TYPE_SUFFIX);
2534 if (suffix) {
2535 *suffix = 0;
2537 break;
2541 return 0;
2544 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2546 struct kvm_rtas_token_args args = {
2547 .token = token,
2550 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2551 return -ENOENT;
2554 strncpy(args.name, function, sizeof(args.name));
2556 return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2559 int kvmppc_get_htab_fd(bool write, uint64_t index, Error **errp)
2561 struct kvm_get_htab_fd s = {
2562 .flags = write ? KVM_GET_HTAB_WRITE : 0,
2563 .start_index = index,
2565 int ret;
2567 if (!cap_htab_fd) {
2568 error_setg(errp, "KVM version doesn't support %s the HPT",
2569 write ? "writing" : "reading");
2570 return -ENOTSUP;
2573 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2574 if (ret < 0) {
2575 error_setg(errp, "Unable to open fd for %s HPT %s KVM: %s",
2576 write ? "writing" : "reading", write ? "to" : "from",
2577 strerror(errno));
2578 return -errno;
2581 return ret;
2584 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2586 int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2587 uint8_t buf[bufsize];
2588 ssize_t rc;
2590 do {
2591 rc = read(fd, buf, bufsize);
2592 if (rc < 0) {
2593 fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2594 strerror(errno));
2595 return rc;
2596 } else if (rc) {
2597 uint8_t *buffer = buf;
2598 ssize_t n = rc;
2599 while (n) {
2600 struct kvm_get_htab_header *head =
2601 (struct kvm_get_htab_header *) buffer;
2602 size_t chunksize = sizeof(*head) +
2603 HASH_PTE_SIZE_64 * head->n_valid;
2605 qemu_put_be32(f, head->index);
2606 qemu_put_be16(f, head->n_valid);
2607 qemu_put_be16(f, head->n_invalid);
2608 qemu_put_buffer(f, (void *)(head + 1),
2609 HASH_PTE_SIZE_64 * head->n_valid);
2611 buffer += chunksize;
2612 n -= chunksize;
2615 } while ((rc != 0)
2616 && ((max_ns < 0)
2617 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2619 return (rc == 0) ? 1 : 0;
2622 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2623 uint16_t n_valid, uint16_t n_invalid)
2625 struct kvm_get_htab_header *buf;
2626 size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2627 ssize_t rc;
2629 buf = alloca(chunksize);
2630 buf->index = index;
2631 buf->n_valid = n_valid;
2632 buf->n_invalid = n_invalid;
2634 qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2636 rc = write(fd, buf, chunksize);
2637 if (rc < 0) {
2638 fprintf(stderr, "Error writing KVM hash table: %s\n",
2639 strerror(errno));
2640 return rc;
2642 if (rc != chunksize) {
2643 /* We should never get a short write on a single chunk */
2644 fprintf(stderr, "Short write, restoring KVM hash table\n");
2645 return -1;
2647 return 0;
2650 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2652 return true;
2655 void kvm_arch_init_irq_routing(KVMState *s)
2659 void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n)
2661 int fd, rc;
2662 int i;
2664 fd = kvmppc_get_htab_fd(false, ptex, &error_abort);
2666 i = 0;
2667 while (i < n) {
2668 struct kvm_get_htab_header *hdr;
2669 int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP;
2670 char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64];
2672 rc = read(fd, buf, sizeof(buf));
2673 if (rc < 0) {
2674 hw_error("kvmppc_read_hptes: Unable to read HPTEs");
2677 hdr = (struct kvm_get_htab_header *)buf;
2678 while ((i < n) && ((char *)hdr < (buf + rc))) {
2679 int invalid = hdr->n_invalid;
2681 if (hdr->index != (ptex + i)) {
2682 hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32
2683 " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i);
2686 memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * hdr->n_valid);
2687 i += hdr->n_valid;
2689 if ((n - i) < invalid) {
2690 invalid = n - i;
2692 memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64);
2693 i += hdr->n_invalid;
2695 hdr = (struct kvm_get_htab_header *)
2696 ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid);
2700 close(fd);
2703 void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1)
2705 int fd, rc;
2706 struct {
2707 struct kvm_get_htab_header hdr;
2708 uint64_t pte0;
2709 uint64_t pte1;
2710 } buf;
2712 fd = kvmppc_get_htab_fd(true, 0 /* Ignored */, &error_abort);
2714 buf.hdr.n_valid = 1;
2715 buf.hdr.n_invalid = 0;
2716 buf.hdr.index = ptex;
2717 buf.pte0 = cpu_to_be64(pte0);
2718 buf.pte1 = cpu_to_be64(pte1);
2720 rc = write(fd, &buf, sizeof(buf));
2721 if (rc != sizeof(buf)) {
2722 hw_error("kvmppc_write_hpte: Unable to update KVM HPT");
2724 close(fd);
2727 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2728 uint64_t address, uint32_t data, PCIDevice *dev)
2730 return 0;
2733 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2734 int vector, PCIDevice *dev)
2736 return 0;
2739 int kvm_arch_release_virq_post(int virq)
2741 return 0;
2744 int kvm_arch_msi_data_to_gsi(uint32_t data)
2746 return data & 0xffff;
2749 int kvmppc_enable_hwrng(void)
2751 if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2752 return -1;
2755 return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2758 void kvmppc_check_papr_resize_hpt(Error **errp)
2760 if (!kvm_enabled()) {
2761 return; /* No KVM, we're good */
2764 if (cap_resize_hpt) {
2765 return; /* Kernel has explicit support, we're good */
2768 /* Otherwise fallback on looking for PR KVM */
2769 if (kvmppc_is_pr(kvm_state)) {
2770 return;
2773 error_setg(errp,
2774 "Hash page table resizing not available with this KVM version");
2777 int kvmppc_resize_hpt_prepare(PowerPCCPU *cpu, target_ulong flags, int shift)
2779 CPUState *cs = CPU(cpu);
2780 struct kvm_ppc_resize_hpt rhpt = {
2781 .flags = flags,
2782 .shift = shift,
2785 if (!cap_resize_hpt) {
2786 return -ENOSYS;
2789 return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_PREPARE, &rhpt);
2792 int kvmppc_resize_hpt_commit(PowerPCCPU *cpu, target_ulong flags, int shift)
2794 CPUState *cs = CPU(cpu);
2795 struct kvm_ppc_resize_hpt rhpt = {
2796 .flags = flags,
2797 .shift = shift,
2800 if (!cap_resize_hpt) {
2801 return -ENOSYS;
2804 return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_COMMIT, &rhpt);
2808 * This is a helper function to detect a post migration scenario
2809 * in which a guest, running as KVM-HV, freezes in cpu_post_load because
2810 * the guest kernel can't handle a PVR value other than the actual host
2811 * PVR in KVM_SET_SREGS, even if pvr_match() returns true.
2813 * If we don't have cap_ppc_pvr_compat and we're not running in PR
2814 * (so, we're HV), return true. The workaround itself is done in
2815 * cpu_post_load.
2817 * The order here is important: we'll only check for KVM PR as a
2818 * fallback if the guest kernel can't handle the situation itself.
2819 * We need to avoid as much as possible querying the running KVM type
2820 * in QEMU level.
2822 bool kvmppc_pvr_workaround_required(PowerPCCPU *cpu)
2824 CPUState *cs = CPU(cpu);
2826 if (!kvm_enabled()) {
2827 return false;
2830 if (cap_ppc_pvr_compat) {
2831 return false;
2834 return !kvmppc_is_pr(cs->kvm_state);