ppc/kvm: check some capabilities with kvm_vm_check_extension()
[qemu/armbru.git] / target / ppc / kvm.c
blob208c70e81426386f7272835ba334adeab2675bb7
1 /*
2 * PowerPC implementation of KVM hooks
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
7 * Authors:
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
17 #include "qemu/osdep.h"
18 #include <dirent.h>
19 #include <sys/ioctl.h>
20 #include <sys/vfs.h>
22 #include <linux/kvm.h>
24 #include "qemu-common.h"
25 #include "qapi/error.h"
26 #include "qemu/error-report.h"
27 #include "cpu.h"
28 #include "cpu-models.h"
29 #include "qemu/timer.h"
30 #include "sysemu/sysemu.h"
31 #include "sysemu/hw_accel.h"
32 #include "kvm_ppc.h"
33 #include "sysemu/cpus.h"
34 #include "sysemu/device_tree.h"
35 #include "mmu-hash64.h"
37 #include "hw/sysbus.h"
38 #include "hw/ppc/spapr.h"
39 #include "hw/ppc/spapr_vio.h"
40 #include "hw/ppc/spapr_cpu_core.h"
41 #include "hw/ppc/ppc.h"
42 #include "sysemu/watchdog.h"
43 #include "trace.h"
44 #include "exec/gdbstub.h"
45 #include "exec/memattrs.h"
46 #include "exec/ram_addr.h"
47 #include "sysemu/hostmem.h"
48 #include "qemu/cutils.h"
49 #include "qemu/mmap-alloc.h"
50 #if defined(TARGET_PPC64)
51 #include "hw/ppc/spapr_cpu_core.h"
52 #endif
53 #include "elf.h"
54 #include "sysemu/kvm_int.h"
56 //#define DEBUG_KVM
58 #ifdef DEBUG_KVM
59 #define DPRINTF(fmt, ...) \
60 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
61 #else
62 #define DPRINTF(fmt, ...) \
63 do { } while (0)
64 #endif
66 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
68 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
69 KVM_CAP_LAST_INFO
72 static int cap_interrupt_unset = false;
73 static int cap_interrupt_level = false;
74 static int cap_segstate;
75 static int cap_booke_sregs;
76 static int cap_ppc_smt;
77 static int cap_ppc_smt_possible;
78 static int cap_ppc_rma;
79 static int cap_spapr_tce;
80 static int cap_spapr_tce_64;
81 static int cap_spapr_multitce;
82 static int cap_spapr_vfio;
83 static int cap_hior;
84 static int cap_one_reg;
85 static int cap_epr;
86 static int cap_ppc_watchdog;
87 static int cap_papr;
88 static int cap_htab_fd;
89 static int cap_fixup_hcalls;
90 static int cap_htm; /* Hardware transactional memory support */
91 static int cap_mmu_radix;
92 static int cap_mmu_hash_v3;
93 static int cap_resize_hpt;
94 static int cap_ppc_pvr_compat;
96 static uint32_t debug_inst_opcode;
98 /* XXX We have a race condition where we actually have a level triggered
99 * interrupt, but the infrastructure can't expose that yet, so the guest
100 * takes but ignores it, goes to sleep and never gets notified that there's
101 * still an interrupt pending.
103 * As a quick workaround, let's just wake up again 20 ms after we injected
104 * an interrupt. That way we can assure that we're always reinjecting
105 * interrupts in case the guest swallowed them.
107 static QEMUTimer *idle_timer;
109 static void kvm_kick_cpu(void *opaque)
111 PowerPCCPU *cpu = opaque;
113 qemu_cpu_kick(CPU(cpu));
116 /* Check whether we are running with KVM-PR (instead of KVM-HV). This
117 * should only be used for fallback tests - generally we should use
118 * explicit capabilities for the features we want, rather than
119 * assuming what is/isn't available depending on the KVM variant. */
120 static bool kvmppc_is_pr(KVMState *ks)
122 /* Assume KVM-PR if the GET_PVINFO capability is available */
123 return kvm_vm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0;
126 static int kvm_ppc_register_host_cpu_type(void);
128 int kvm_arch_init(MachineState *ms, KVMState *s)
130 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
131 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
132 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
133 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
134 cap_ppc_smt_possible = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT_POSSIBLE);
135 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
136 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
137 cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64);
138 cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
139 cap_spapr_vfio = false;
140 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
141 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
142 cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
143 cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
144 /* Note: we don't set cap_papr here, because this capability is
145 * only activated after this by kvmppc_set_papr() */
146 cap_htab_fd = kvm_vm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
147 cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
148 cap_ppc_smt = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT);
149 cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
150 cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX);
151 cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3);
152 cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT);
154 * Note: setting it to false because there is not such capability
155 * in KVM at this moment.
157 * TODO: call kvm_vm_check_extension() with the right capability
158 * after the kernel starts implementing it.*/
159 cap_ppc_pvr_compat = false;
161 if (!cap_interrupt_level) {
162 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
163 "VM to stall at times!\n");
166 kvm_ppc_register_host_cpu_type();
168 return 0;
171 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
173 return 0;
176 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
178 CPUPPCState *cenv = &cpu->env;
179 CPUState *cs = CPU(cpu);
180 struct kvm_sregs sregs;
181 int ret;
183 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
184 /* What we're really trying to say is "if we're on BookE, we use
185 the native PVR for now". This is the only sane way to check
186 it though, so we potentially confuse users that they can run
187 BookE guests on BookS. Let's hope nobody dares enough :) */
188 return 0;
189 } else {
190 if (!cap_segstate) {
191 fprintf(stderr, "kvm error: missing PVR setting capability\n");
192 return -ENOSYS;
196 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
197 if (ret) {
198 return ret;
201 sregs.pvr = cenv->spr[SPR_PVR];
202 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
205 /* Set up a shared TLB array with KVM */
206 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
208 CPUPPCState *env = &cpu->env;
209 CPUState *cs = CPU(cpu);
210 struct kvm_book3e_206_tlb_params params = {};
211 struct kvm_config_tlb cfg = {};
212 unsigned int entries = 0;
213 int ret, i;
215 if (!kvm_enabled() ||
216 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
217 return 0;
220 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
222 for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
223 params.tlb_sizes[i] = booke206_tlb_size(env, i);
224 params.tlb_ways[i] = booke206_tlb_ways(env, i);
225 entries += params.tlb_sizes[i];
228 assert(entries == env->nb_tlb);
229 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
231 env->tlb_dirty = true;
233 cfg.array = (uintptr_t)env->tlb.tlbm;
234 cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
235 cfg.params = (uintptr_t)&params;
236 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
238 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
239 if (ret < 0) {
240 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
241 __func__, strerror(-ret));
242 return ret;
245 env->kvm_sw_tlb = true;
246 return 0;
250 #if defined(TARGET_PPC64)
251 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
252 struct kvm_ppc_smmu_info *info)
254 CPUPPCState *env = &cpu->env;
255 CPUState *cs = CPU(cpu);
257 memset(info, 0, sizeof(*info));
259 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
260 * need to "guess" what the supported page sizes are.
262 * For that to work we make a few assumptions:
264 * - Check whether we are running "PR" KVM which only supports 4K
265 * and 16M pages, but supports them regardless of the backing
266 * store characteritics. We also don't support 1T segments.
268 * This is safe as if HV KVM ever supports that capability or PR
269 * KVM grows supports for more page/segment sizes, those versions
270 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
271 * will not hit this fallback
273 * - Else we are running HV KVM. This means we only support page
274 * sizes that fit in the backing store. Additionally we only
275 * advertize 64K pages if the processor is ARCH 2.06 and we assume
276 * P7 encodings for the SLB and hash table. Here too, we assume
277 * support for any newer processor will mean a kernel that
278 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
279 * this fallback.
281 if (kvmppc_is_pr(cs->kvm_state)) {
282 /* No flags */
283 info->flags = 0;
284 info->slb_size = 64;
286 /* Standard 4k base page size segment */
287 info->sps[0].page_shift = 12;
288 info->sps[0].slb_enc = 0;
289 info->sps[0].enc[0].page_shift = 12;
290 info->sps[0].enc[0].pte_enc = 0;
292 /* Standard 16M large page size segment */
293 info->sps[1].page_shift = 24;
294 info->sps[1].slb_enc = SLB_VSID_L;
295 info->sps[1].enc[0].page_shift = 24;
296 info->sps[1].enc[0].pte_enc = 0;
297 } else {
298 int i = 0;
300 /* HV KVM has backing store size restrictions */
301 info->flags = KVM_PPC_PAGE_SIZES_REAL;
303 if (env->mmu_model & POWERPC_MMU_1TSEG) {
304 info->flags |= KVM_PPC_1T_SEGMENTS;
307 if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 ||
308 POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) {
309 info->slb_size = 32;
310 } else {
311 info->slb_size = 64;
314 /* Standard 4k base page size segment */
315 info->sps[i].page_shift = 12;
316 info->sps[i].slb_enc = 0;
317 info->sps[i].enc[0].page_shift = 12;
318 info->sps[i].enc[0].pte_enc = 0;
319 i++;
321 /* 64K on MMU 2.06 and later */
322 if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 ||
323 POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) {
324 info->sps[i].page_shift = 16;
325 info->sps[i].slb_enc = 0x110;
326 info->sps[i].enc[0].page_shift = 16;
327 info->sps[i].enc[0].pte_enc = 1;
328 i++;
331 /* Standard 16M large page size segment */
332 info->sps[i].page_shift = 24;
333 info->sps[i].slb_enc = SLB_VSID_L;
334 info->sps[i].enc[0].page_shift = 24;
335 info->sps[i].enc[0].pte_enc = 0;
339 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
341 CPUState *cs = CPU(cpu);
342 int ret;
344 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
345 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
346 if (ret == 0) {
347 return;
351 kvm_get_fallback_smmu_info(cpu, info);
354 struct ppc_radix_page_info *kvm_get_radix_page_info(void)
356 KVMState *s = KVM_STATE(current_machine->accelerator);
357 struct ppc_radix_page_info *radix_page_info;
358 struct kvm_ppc_rmmu_info rmmu_info;
359 int i;
361 if (!kvm_check_extension(s, KVM_CAP_PPC_MMU_RADIX)) {
362 return NULL;
364 if (kvm_vm_ioctl(s, KVM_PPC_GET_RMMU_INFO, &rmmu_info)) {
365 return NULL;
367 radix_page_info = g_malloc0(sizeof(*radix_page_info));
368 radix_page_info->count = 0;
369 for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) {
370 if (rmmu_info.ap_encodings[i]) {
371 radix_page_info->entries[i] = rmmu_info.ap_encodings[i];
372 radix_page_info->count++;
375 return radix_page_info;
378 target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu,
379 bool radix, bool gtse,
380 uint64_t proc_tbl)
382 CPUState *cs = CPU(cpu);
383 int ret;
384 uint64_t flags = 0;
385 struct kvm_ppc_mmuv3_cfg cfg = {
386 .process_table = proc_tbl,
389 if (radix) {
390 flags |= KVM_PPC_MMUV3_RADIX;
392 if (gtse) {
393 flags |= KVM_PPC_MMUV3_GTSE;
395 cfg.flags = flags;
396 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_CONFIGURE_V3_MMU, &cfg);
397 switch (ret) {
398 case 0:
399 return H_SUCCESS;
400 case -EINVAL:
401 return H_PARAMETER;
402 case -ENODEV:
403 return H_NOT_AVAILABLE;
404 default:
405 return H_HARDWARE;
409 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
411 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
412 return true;
415 return (1ul << shift) <= rampgsize;
418 static long max_cpu_page_size;
420 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
422 static struct kvm_ppc_smmu_info smmu_info;
423 static bool has_smmu_info;
424 CPUPPCState *env = &cpu->env;
425 int iq, ik, jq, jk;
426 bool has_64k_pages = false;
428 /* We only handle page sizes for 64-bit server guests for now */
429 if (!(env->mmu_model & POWERPC_MMU_64)) {
430 return;
433 /* Collect MMU info from kernel if not already */
434 if (!has_smmu_info) {
435 kvm_get_smmu_info(cpu, &smmu_info);
436 has_smmu_info = true;
439 if (!max_cpu_page_size) {
440 max_cpu_page_size = qemu_getrampagesize();
443 /* Convert to QEMU form */
444 memset(&env->sps, 0, sizeof(env->sps));
446 /* If we have HV KVM, we need to forbid CI large pages if our
447 * host page size is smaller than 64K.
449 if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
450 env->ci_large_pages = getpagesize() >= 0x10000;
454 * XXX This loop should be an entry wide AND of the capabilities that
455 * the selected CPU has with the capabilities that KVM supports.
457 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
458 struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
459 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
461 if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
462 ksps->page_shift)) {
463 continue;
465 qsps->page_shift = ksps->page_shift;
466 qsps->slb_enc = ksps->slb_enc;
467 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
468 if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
469 ksps->enc[jk].page_shift)) {
470 continue;
472 if (ksps->enc[jk].page_shift == 16) {
473 has_64k_pages = true;
475 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
476 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
477 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
478 break;
481 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
482 break;
485 env->slb_nr = smmu_info.slb_size;
486 if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
487 env->mmu_model &= ~POWERPC_MMU_1TSEG;
489 if (!has_64k_pages) {
490 env->mmu_model &= ~POWERPC_MMU_64K;
494 bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path)
496 Object *mem_obj = object_resolve_path(obj_path, NULL);
497 char *mempath = object_property_get_str(mem_obj, "mem-path", NULL);
498 long pagesize;
500 if (mempath) {
501 pagesize = qemu_mempath_getpagesize(mempath);
502 g_free(mempath);
503 } else {
504 pagesize = getpagesize();
507 return pagesize >= max_cpu_page_size;
510 #else /* defined (TARGET_PPC64) */
512 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
516 bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path)
518 return true;
521 #endif /* !defined (TARGET_PPC64) */
523 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
525 return POWERPC_CPU(cpu)->vcpu_id;
528 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
529 * book3s supports only 1 watchpoint, so array size
530 * of 4 is sufficient for now.
532 #define MAX_HW_BKPTS 4
534 static struct HWBreakpoint {
535 target_ulong addr;
536 int type;
537 } hw_debug_points[MAX_HW_BKPTS];
539 static CPUWatchpoint hw_watchpoint;
541 /* Default there is no breakpoint and watchpoint supported */
542 static int max_hw_breakpoint;
543 static int max_hw_watchpoint;
544 static int nb_hw_breakpoint;
545 static int nb_hw_watchpoint;
547 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
549 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
550 max_hw_breakpoint = 2;
551 max_hw_watchpoint = 2;
554 if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
555 fprintf(stderr, "Error initializing h/w breakpoints\n");
556 return;
560 int kvm_arch_init_vcpu(CPUState *cs)
562 PowerPCCPU *cpu = POWERPC_CPU(cs);
563 CPUPPCState *cenv = &cpu->env;
564 int ret;
566 /* Gather server mmu info from KVM and update the CPU state */
567 kvm_fixup_page_sizes(cpu);
569 /* Synchronize sregs with kvm */
570 ret = kvm_arch_sync_sregs(cpu);
571 if (ret) {
572 if (ret == -EINVAL) {
573 error_report("Register sync failed... If you're using kvm-hv.ko,"
574 " only \"-cpu host\" is possible");
576 return ret;
579 idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
581 switch (cenv->mmu_model) {
582 case POWERPC_MMU_BOOKE206:
583 /* This target supports access to KVM's guest TLB */
584 ret = kvm_booke206_tlb_init(cpu);
585 break;
586 case POWERPC_MMU_2_07:
587 if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) {
588 /* KVM-HV has transactional memory on POWER8 also without the
589 * KVM_CAP_PPC_HTM extension, so enable it here instead as
590 * long as it's availble to userspace on the host. */
591 if (qemu_getauxval(AT_HWCAP2) & PPC_FEATURE2_HAS_HTM) {
592 cap_htm = true;
595 break;
596 default:
597 break;
600 kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
601 kvmppc_hw_debug_points_init(cenv);
603 return ret;
606 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
608 CPUPPCState *env = &cpu->env;
609 CPUState *cs = CPU(cpu);
610 struct kvm_dirty_tlb dirty_tlb;
611 unsigned char *bitmap;
612 int ret;
614 if (!env->kvm_sw_tlb) {
615 return;
618 bitmap = g_malloc((env->nb_tlb + 7) / 8);
619 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
621 dirty_tlb.bitmap = (uintptr_t)bitmap;
622 dirty_tlb.num_dirty = env->nb_tlb;
624 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
625 if (ret) {
626 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
627 __func__, strerror(-ret));
630 g_free(bitmap);
633 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
635 PowerPCCPU *cpu = POWERPC_CPU(cs);
636 CPUPPCState *env = &cpu->env;
637 union {
638 uint32_t u32;
639 uint64_t u64;
640 } val;
641 struct kvm_one_reg reg = {
642 .id = id,
643 .addr = (uintptr_t) &val,
645 int ret;
647 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
648 if (ret != 0) {
649 trace_kvm_failed_spr_get(spr, strerror(errno));
650 } else {
651 switch (id & KVM_REG_SIZE_MASK) {
652 case KVM_REG_SIZE_U32:
653 env->spr[spr] = val.u32;
654 break;
656 case KVM_REG_SIZE_U64:
657 env->spr[spr] = val.u64;
658 break;
660 default:
661 /* Don't handle this size yet */
662 abort();
667 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
669 PowerPCCPU *cpu = POWERPC_CPU(cs);
670 CPUPPCState *env = &cpu->env;
671 union {
672 uint32_t u32;
673 uint64_t u64;
674 } val;
675 struct kvm_one_reg reg = {
676 .id = id,
677 .addr = (uintptr_t) &val,
679 int ret;
681 switch (id & KVM_REG_SIZE_MASK) {
682 case KVM_REG_SIZE_U32:
683 val.u32 = env->spr[spr];
684 break;
686 case KVM_REG_SIZE_U64:
687 val.u64 = env->spr[spr];
688 break;
690 default:
691 /* Don't handle this size yet */
692 abort();
695 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
696 if (ret != 0) {
697 trace_kvm_failed_spr_set(spr, strerror(errno));
701 static int kvm_put_fp(CPUState *cs)
703 PowerPCCPU *cpu = POWERPC_CPU(cs);
704 CPUPPCState *env = &cpu->env;
705 struct kvm_one_reg reg;
706 int i;
707 int ret;
709 if (env->insns_flags & PPC_FLOAT) {
710 uint64_t fpscr = env->fpscr;
711 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
713 reg.id = KVM_REG_PPC_FPSCR;
714 reg.addr = (uintptr_t)&fpscr;
715 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
716 if (ret < 0) {
717 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
718 return ret;
721 for (i = 0; i < 32; i++) {
722 uint64_t vsr[2];
724 #ifdef HOST_WORDS_BIGENDIAN
725 vsr[0] = float64_val(env->fpr[i]);
726 vsr[1] = env->vsr[i];
727 #else
728 vsr[0] = env->vsr[i];
729 vsr[1] = float64_val(env->fpr[i]);
730 #endif
731 reg.addr = (uintptr_t) &vsr;
732 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
734 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
735 if (ret < 0) {
736 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
737 i, strerror(errno));
738 return ret;
743 if (env->insns_flags & PPC_ALTIVEC) {
744 reg.id = KVM_REG_PPC_VSCR;
745 reg.addr = (uintptr_t)&env->vscr;
746 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
747 if (ret < 0) {
748 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
749 return ret;
752 for (i = 0; i < 32; i++) {
753 reg.id = KVM_REG_PPC_VR(i);
754 reg.addr = (uintptr_t)&env->avr[i];
755 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
756 if (ret < 0) {
757 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
758 return ret;
763 return 0;
766 static int kvm_get_fp(CPUState *cs)
768 PowerPCCPU *cpu = POWERPC_CPU(cs);
769 CPUPPCState *env = &cpu->env;
770 struct kvm_one_reg reg;
771 int i;
772 int ret;
774 if (env->insns_flags & PPC_FLOAT) {
775 uint64_t fpscr;
776 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
778 reg.id = KVM_REG_PPC_FPSCR;
779 reg.addr = (uintptr_t)&fpscr;
780 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
781 if (ret < 0) {
782 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
783 return ret;
784 } else {
785 env->fpscr = fpscr;
788 for (i = 0; i < 32; i++) {
789 uint64_t vsr[2];
791 reg.addr = (uintptr_t) &vsr;
792 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
794 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
795 if (ret < 0) {
796 DPRINTF("Unable to get %s%d from KVM: %s\n",
797 vsx ? "VSR" : "FPR", i, strerror(errno));
798 return ret;
799 } else {
800 #ifdef HOST_WORDS_BIGENDIAN
801 env->fpr[i] = vsr[0];
802 if (vsx) {
803 env->vsr[i] = vsr[1];
805 #else
806 env->fpr[i] = vsr[1];
807 if (vsx) {
808 env->vsr[i] = vsr[0];
810 #endif
815 if (env->insns_flags & PPC_ALTIVEC) {
816 reg.id = KVM_REG_PPC_VSCR;
817 reg.addr = (uintptr_t)&env->vscr;
818 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
819 if (ret < 0) {
820 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
821 return ret;
824 for (i = 0; i < 32; i++) {
825 reg.id = KVM_REG_PPC_VR(i);
826 reg.addr = (uintptr_t)&env->avr[i];
827 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
828 if (ret < 0) {
829 DPRINTF("Unable to get VR%d from KVM: %s\n",
830 i, strerror(errno));
831 return ret;
836 return 0;
839 #if defined(TARGET_PPC64)
840 static int kvm_get_vpa(CPUState *cs)
842 PowerPCCPU *cpu = POWERPC_CPU(cs);
843 CPUPPCState *env = &cpu->env;
844 struct kvm_one_reg reg;
845 int ret;
847 reg.id = KVM_REG_PPC_VPA_ADDR;
848 reg.addr = (uintptr_t)&env->vpa_addr;
849 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
850 if (ret < 0) {
851 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
852 return ret;
855 assert((uintptr_t)&env->slb_shadow_size
856 == ((uintptr_t)&env->slb_shadow_addr + 8));
857 reg.id = KVM_REG_PPC_VPA_SLB;
858 reg.addr = (uintptr_t)&env->slb_shadow_addr;
859 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
860 if (ret < 0) {
861 DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
862 strerror(errno));
863 return ret;
866 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
867 reg.id = KVM_REG_PPC_VPA_DTL;
868 reg.addr = (uintptr_t)&env->dtl_addr;
869 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
870 if (ret < 0) {
871 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
872 strerror(errno));
873 return ret;
876 return 0;
879 static int kvm_put_vpa(CPUState *cs)
881 PowerPCCPU *cpu = POWERPC_CPU(cs);
882 CPUPPCState *env = &cpu->env;
883 struct kvm_one_reg reg;
884 int ret;
886 /* SLB shadow or DTL can't be registered unless a master VPA is
887 * registered. That means when restoring state, if a VPA *is*
888 * registered, we need to set that up first. If not, we need to
889 * deregister the others before deregistering the master VPA */
890 assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
892 if (env->vpa_addr) {
893 reg.id = KVM_REG_PPC_VPA_ADDR;
894 reg.addr = (uintptr_t)&env->vpa_addr;
895 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
896 if (ret < 0) {
897 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
898 return ret;
902 assert((uintptr_t)&env->slb_shadow_size
903 == ((uintptr_t)&env->slb_shadow_addr + 8));
904 reg.id = KVM_REG_PPC_VPA_SLB;
905 reg.addr = (uintptr_t)&env->slb_shadow_addr;
906 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
907 if (ret < 0) {
908 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
909 return ret;
912 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
913 reg.id = KVM_REG_PPC_VPA_DTL;
914 reg.addr = (uintptr_t)&env->dtl_addr;
915 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
916 if (ret < 0) {
917 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
918 strerror(errno));
919 return ret;
922 if (!env->vpa_addr) {
923 reg.id = KVM_REG_PPC_VPA_ADDR;
924 reg.addr = (uintptr_t)&env->vpa_addr;
925 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
926 if (ret < 0) {
927 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
928 return ret;
932 return 0;
934 #endif /* TARGET_PPC64 */
936 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
938 CPUPPCState *env = &cpu->env;
939 struct kvm_sregs sregs;
940 int i;
942 sregs.pvr = env->spr[SPR_PVR];
944 sregs.u.s.sdr1 = env->spr[SPR_SDR1];
946 /* Sync SLB */
947 #ifdef TARGET_PPC64
948 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
949 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
950 if (env->slb[i].esid & SLB_ESID_V) {
951 sregs.u.s.ppc64.slb[i].slbe |= i;
953 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
955 #endif
957 /* Sync SRs */
958 for (i = 0; i < 16; i++) {
959 sregs.u.s.ppc32.sr[i] = env->sr[i];
962 /* Sync BATs */
963 for (i = 0; i < 8; i++) {
964 /* Beware. We have to swap upper and lower bits here */
965 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
966 | env->DBAT[1][i];
967 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
968 | env->IBAT[1][i];
971 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
974 int kvm_arch_put_registers(CPUState *cs, int level)
976 PowerPCCPU *cpu = POWERPC_CPU(cs);
977 CPUPPCState *env = &cpu->env;
978 struct kvm_regs regs;
979 int ret;
980 int i;
982 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
983 if (ret < 0) {
984 return ret;
987 regs.ctr = env->ctr;
988 regs.lr = env->lr;
989 regs.xer = cpu_read_xer(env);
990 regs.msr = env->msr;
991 regs.pc = env->nip;
993 regs.srr0 = env->spr[SPR_SRR0];
994 regs.srr1 = env->spr[SPR_SRR1];
996 regs.sprg0 = env->spr[SPR_SPRG0];
997 regs.sprg1 = env->spr[SPR_SPRG1];
998 regs.sprg2 = env->spr[SPR_SPRG2];
999 regs.sprg3 = env->spr[SPR_SPRG3];
1000 regs.sprg4 = env->spr[SPR_SPRG4];
1001 regs.sprg5 = env->spr[SPR_SPRG5];
1002 regs.sprg6 = env->spr[SPR_SPRG6];
1003 regs.sprg7 = env->spr[SPR_SPRG7];
1005 regs.pid = env->spr[SPR_BOOKE_PID];
1007 for (i = 0;i < 32; i++)
1008 regs.gpr[i] = env->gpr[i];
1010 regs.cr = 0;
1011 for (i = 0; i < 8; i++) {
1012 regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
1015 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
1016 if (ret < 0)
1017 return ret;
1019 kvm_put_fp(cs);
1021 if (env->tlb_dirty) {
1022 kvm_sw_tlb_put(cpu);
1023 env->tlb_dirty = false;
1026 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
1027 ret = kvmppc_put_books_sregs(cpu);
1028 if (ret < 0) {
1029 return ret;
1033 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
1034 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1037 if (cap_one_reg) {
1038 int i;
1040 /* We deliberately ignore errors here, for kernels which have
1041 * the ONE_REG calls, but don't support the specific
1042 * registers, there's a reasonable chance things will still
1043 * work, at least until we try to migrate. */
1044 for (i = 0; i < 1024; i++) {
1045 uint64_t id = env->spr_cb[i].one_reg_id;
1047 if (id != 0) {
1048 kvm_put_one_spr(cs, id, i);
1052 #ifdef TARGET_PPC64
1053 if (msr_ts) {
1054 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1055 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1057 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1058 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1060 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1061 kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1062 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1063 kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1064 kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1065 kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1066 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1067 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1068 kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1069 kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1072 if (cap_papr) {
1073 if (kvm_put_vpa(cs) < 0) {
1074 DPRINTF("Warning: Unable to set VPA information to KVM\n");
1078 kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1079 #endif /* TARGET_PPC64 */
1082 return ret;
1085 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1087 env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1090 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1092 CPUPPCState *env = &cpu->env;
1093 struct kvm_sregs sregs;
1094 int ret;
1096 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1097 if (ret < 0) {
1098 return ret;
1101 if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1102 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1103 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1104 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1105 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1106 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1107 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1108 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1109 env->spr[SPR_DECR] = sregs.u.e.dec;
1110 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1111 env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1112 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1115 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1116 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1117 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1118 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1119 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1120 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1123 if (sregs.u.e.features & KVM_SREGS_E_64) {
1124 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1127 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1128 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1131 if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1132 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1133 kvm_sync_excp(env, POWERPC_EXCP_CRITICAL, SPR_BOOKE_IVOR0);
1134 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1135 kvm_sync_excp(env, POWERPC_EXCP_MCHECK, SPR_BOOKE_IVOR1);
1136 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1137 kvm_sync_excp(env, POWERPC_EXCP_DSI, SPR_BOOKE_IVOR2);
1138 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1139 kvm_sync_excp(env, POWERPC_EXCP_ISI, SPR_BOOKE_IVOR3);
1140 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1141 kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL, SPR_BOOKE_IVOR4);
1142 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1143 kvm_sync_excp(env, POWERPC_EXCP_ALIGN, SPR_BOOKE_IVOR5);
1144 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1145 kvm_sync_excp(env, POWERPC_EXCP_PROGRAM, SPR_BOOKE_IVOR6);
1146 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1147 kvm_sync_excp(env, POWERPC_EXCP_FPU, SPR_BOOKE_IVOR7);
1148 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1149 kvm_sync_excp(env, POWERPC_EXCP_SYSCALL, SPR_BOOKE_IVOR8);
1150 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1151 kvm_sync_excp(env, POWERPC_EXCP_APU, SPR_BOOKE_IVOR9);
1152 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1153 kvm_sync_excp(env, POWERPC_EXCP_DECR, SPR_BOOKE_IVOR10);
1154 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1155 kvm_sync_excp(env, POWERPC_EXCP_FIT, SPR_BOOKE_IVOR11);
1156 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1157 kvm_sync_excp(env, POWERPC_EXCP_WDT, SPR_BOOKE_IVOR12);
1158 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1159 kvm_sync_excp(env, POWERPC_EXCP_DTLB, SPR_BOOKE_IVOR13);
1160 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1161 kvm_sync_excp(env, POWERPC_EXCP_ITLB, SPR_BOOKE_IVOR14);
1162 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1163 kvm_sync_excp(env, POWERPC_EXCP_DEBUG, SPR_BOOKE_IVOR15);
1165 if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1166 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1167 kvm_sync_excp(env, POWERPC_EXCP_SPEU, SPR_BOOKE_IVOR32);
1168 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1169 kvm_sync_excp(env, POWERPC_EXCP_EFPDI, SPR_BOOKE_IVOR33);
1170 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1171 kvm_sync_excp(env, POWERPC_EXCP_EFPRI, SPR_BOOKE_IVOR34);
1174 if (sregs.u.e.features & KVM_SREGS_E_PM) {
1175 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1176 kvm_sync_excp(env, POWERPC_EXCP_EPERFM, SPR_BOOKE_IVOR35);
1179 if (sregs.u.e.features & KVM_SREGS_E_PC) {
1180 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1181 kvm_sync_excp(env, POWERPC_EXCP_DOORI, SPR_BOOKE_IVOR36);
1182 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1183 kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1187 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1188 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1189 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1190 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1191 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1192 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1193 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1194 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1195 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1196 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1197 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1200 if (sregs.u.e.features & KVM_SREGS_EXP) {
1201 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1204 if (sregs.u.e.features & KVM_SREGS_E_PD) {
1205 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1206 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1209 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1210 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1211 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1212 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1214 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1215 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1216 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1220 return 0;
1223 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1225 CPUPPCState *env = &cpu->env;
1226 struct kvm_sregs sregs;
1227 int ret;
1228 int i;
1230 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1231 if (ret < 0) {
1232 return ret;
1235 if (!cpu->vhyp) {
1236 ppc_store_sdr1(env, sregs.u.s.sdr1);
1239 /* Sync SLB */
1240 #ifdef TARGET_PPC64
1242 * The packed SLB array we get from KVM_GET_SREGS only contains
1243 * information about valid entries. So we flush our internal copy
1244 * to get rid of stale ones, then put all valid SLB entries back
1245 * in.
1247 memset(env->slb, 0, sizeof(env->slb));
1248 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1249 target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1250 target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1252 * Only restore valid entries
1254 if (rb & SLB_ESID_V) {
1255 ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1258 #endif
1260 /* Sync SRs */
1261 for (i = 0; i < 16; i++) {
1262 env->sr[i] = sregs.u.s.ppc32.sr[i];
1265 /* Sync BATs */
1266 for (i = 0; i < 8; i++) {
1267 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1268 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1269 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1270 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1273 return 0;
1276 int kvm_arch_get_registers(CPUState *cs)
1278 PowerPCCPU *cpu = POWERPC_CPU(cs);
1279 CPUPPCState *env = &cpu->env;
1280 struct kvm_regs regs;
1281 uint32_t cr;
1282 int i, ret;
1284 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1285 if (ret < 0)
1286 return ret;
1288 cr = regs.cr;
1289 for (i = 7; i >= 0; i--) {
1290 env->crf[i] = cr & 15;
1291 cr >>= 4;
1294 env->ctr = regs.ctr;
1295 env->lr = regs.lr;
1296 cpu_write_xer(env, regs.xer);
1297 env->msr = regs.msr;
1298 env->nip = regs.pc;
1300 env->spr[SPR_SRR0] = regs.srr0;
1301 env->spr[SPR_SRR1] = regs.srr1;
1303 env->spr[SPR_SPRG0] = regs.sprg0;
1304 env->spr[SPR_SPRG1] = regs.sprg1;
1305 env->spr[SPR_SPRG2] = regs.sprg2;
1306 env->spr[SPR_SPRG3] = regs.sprg3;
1307 env->spr[SPR_SPRG4] = regs.sprg4;
1308 env->spr[SPR_SPRG5] = regs.sprg5;
1309 env->spr[SPR_SPRG6] = regs.sprg6;
1310 env->spr[SPR_SPRG7] = regs.sprg7;
1312 env->spr[SPR_BOOKE_PID] = regs.pid;
1314 for (i = 0;i < 32; i++)
1315 env->gpr[i] = regs.gpr[i];
1317 kvm_get_fp(cs);
1319 if (cap_booke_sregs) {
1320 ret = kvmppc_get_booke_sregs(cpu);
1321 if (ret < 0) {
1322 return ret;
1326 if (cap_segstate) {
1327 ret = kvmppc_get_books_sregs(cpu);
1328 if (ret < 0) {
1329 return ret;
1333 if (cap_hior) {
1334 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1337 if (cap_one_reg) {
1338 int i;
1340 /* We deliberately ignore errors here, for kernels which have
1341 * the ONE_REG calls, but don't support the specific
1342 * registers, there's a reasonable chance things will still
1343 * work, at least until we try to migrate. */
1344 for (i = 0; i < 1024; i++) {
1345 uint64_t id = env->spr_cb[i].one_reg_id;
1347 if (id != 0) {
1348 kvm_get_one_spr(cs, id, i);
1352 #ifdef TARGET_PPC64
1353 if (msr_ts) {
1354 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1355 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1357 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1358 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1360 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1361 kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1362 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1363 kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1364 kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1365 kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1366 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1367 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1368 kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1369 kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1372 if (cap_papr) {
1373 if (kvm_get_vpa(cs) < 0) {
1374 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1378 kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1379 #endif
1382 return 0;
1385 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1387 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1389 if (irq != PPC_INTERRUPT_EXT) {
1390 return 0;
1393 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1394 return 0;
1397 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1399 return 0;
1402 #if defined(TARGET_PPCEMB)
1403 #define PPC_INPUT_INT PPC40x_INPUT_INT
1404 #elif defined(TARGET_PPC64)
1405 #define PPC_INPUT_INT PPC970_INPUT_INT
1406 #else
1407 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1408 #endif
1410 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1412 PowerPCCPU *cpu = POWERPC_CPU(cs);
1413 CPUPPCState *env = &cpu->env;
1414 int r;
1415 unsigned irq;
1417 qemu_mutex_lock_iothread();
1419 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1420 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1421 if (!cap_interrupt_level &&
1422 run->ready_for_interrupt_injection &&
1423 (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1424 (env->irq_input_state & (1<<PPC_INPUT_INT)))
1426 /* For now KVM disregards the 'irq' argument. However, in the
1427 * future KVM could cache it in-kernel to avoid a heavyweight exit
1428 * when reading the UIC.
1430 irq = KVM_INTERRUPT_SET;
1432 DPRINTF("injected interrupt %d\n", irq);
1433 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1434 if (r < 0) {
1435 printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1438 /* Always wake up soon in case the interrupt was level based */
1439 timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1440 (NANOSECONDS_PER_SECOND / 50));
1443 /* We don't know if there are more interrupts pending after this. However,
1444 * the guest will return to userspace in the course of handling this one
1445 * anyways, so we will get a chance to deliver the rest. */
1447 qemu_mutex_unlock_iothread();
1450 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1452 return MEMTXATTRS_UNSPECIFIED;
1455 int kvm_arch_process_async_events(CPUState *cs)
1457 return cs->halted;
1460 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1462 CPUState *cs = CPU(cpu);
1463 CPUPPCState *env = &cpu->env;
1465 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1466 cs->halted = 1;
1467 cs->exception_index = EXCP_HLT;
1470 return 0;
1473 /* map dcr access to existing qemu dcr emulation */
1474 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1476 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1477 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1479 return 0;
1482 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1484 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1485 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1487 return 0;
1490 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1492 /* Mixed endian case is not handled */
1493 uint32_t sc = debug_inst_opcode;
1495 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1496 sizeof(sc), 0) ||
1497 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1498 return -EINVAL;
1501 return 0;
1504 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1506 uint32_t sc;
1508 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1509 sc != debug_inst_opcode ||
1510 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1511 sizeof(sc), 1)) {
1512 return -EINVAL;
1515 return 0;
1518 static int find_hw_breakpoint(target_ulong addr, int type)
1520 int n;
1522 assert((nb_hw_breakpoint + nb_hw_watchpoint)
1523 <= ARRAY_SIZE(hw_debug_points));
1525 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1526 if (hw_debug_points[n].addr == addr &&
1527 hw_debug_points[n].type == type) {
1528 return n;
1532 return -1;
1535 static int find_hw_watchpoint(target_ulong addr, int *flag)
1537 int n;
1539 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1540 if (n >= 0) {
1541 *flag = BP_MEM_ACCESS;
1542 return n;
1545 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1546 if (n >= 0) {
1547 *flag = BP_MEM_WRITE;
1548 return n;
1551 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1552 if (n >= 0) {
1553 *flag = BP_MEM_READ;
1554 return n;
1557 return -1;
1560 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1561 target_ulong len, int type)
1563 if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1564 return -ENOBUFS;
1567 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1568 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1570 switch (type) {
1571 case GDB_BREAKPOINT_HW:
1572 if (nb_hw_breakpoint >= max_hw_breakpoint) {
1573 return -ENOBUFS;
1576 if (find_hw_breakpoint(addr, type) >= 0) {
1577 return -EEXIST;
1580 nb_hw_breakpoint++;
1581 break;
1583 case GDB_WATCHPOINT_WRITE:
1584 case GDB_WATCHPOINT_READ:
1585 case GDB_WATCHPOINT_ACCESS:
1586 if (nb_hw_watchpoint >= max_hw_watchpoint) {
1587 return -ENOBUFS;
1590 if (find_hw_breakpoint(addr, type) >= 0) {
1591 return -EEXIST;
1594 nb_hw_watchpoint++;
1595 break;
1597 default:
1598 return -ENOSYS;
1601 return 0;
1604 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1605 target_ulong len, int type)
1607 int n;
1609 n = find_hw_breakpoint(addr, type);
1610 if (n < 0) {
1611 return -ENOENT;
1614 switch (type) {
1615 case GDB_BREAKPOINT_HW:
1616 nb_hw_breakpoint--;
1617 break;
1619 case GDB_WATCHPOINT_WRITE:
1620 case GDB_WATCHPOINT_READ:
1621 case GDB_WATCHPOINT_ACCESS:
1622 nb_hw_watchpoint--;
1623 break;
1625 default:
1626 return -ENOSYS;
1628 hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1630 return 0;
1633 void kvm_arch_remove_all_hw_breakpoints(void)
1635 nb_hw_breakpoint = nb_hw_watchpoint = 0;
1638 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1640 int n;
1642 /* Software Breakpoint updates */
1643 if (kvm_sw_breakpoints_active(cs)) {
1644 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1647 assert((nb_hw_breakpoint + nb_hw_watchpoint)
1648 <= ARRAY_SIZE(hw_debug_points));
1649 assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1651 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1652 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1653 memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1654 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1655 switch (hw_debug_points[n].type) {
1656 case GDB_BREAKPOINT_HW:
1657 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1658 break;
1659 case GDB_WATCHPOINT_WRITE:
1660 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1661 break;
1662 case GDB_WATCHPOINT_READ:
1663 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1664 break;
1665 case GDB_WATCHPOINT_ACCESS:
1666 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1667 KVMPPC_DEBUG_WATCH_READ;
1668 break;
1669 default:
1670 cpu_abort(cs, "Unsupported breakpoint type\n");
1672 dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1677 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1679 CPUState *cs = CPU(cpu);
1680 CPUPPCState *env = &cpu->env;
1681 struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1682 int handle = 0;
1683 int n;
1684 int flag = 0;
1686 if (cs->singlestep_enabled) {
1687 handle = 1;
1688 } else if (arch_info->status) {
1689 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1690 if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1691 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1692 if (n >= 0) {
1693 handle = 1;
1695 } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1696 KVMPPC_DEBUG_WATCH_WRITE)) {
1697 n = find_hw_watchpoint(arch_info->address, &flag);
1698 if (n >= 0) {
1699 handle = 1;
1700 cs->watchpoint_hit = &hw_watchpoint;
1701 hw_watchpoint.vaddr = hw_debug_points[n].addr;
1702 hw_watchpoint.flags = flag;
1706 } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1707 handle = 1;
1708 } else {
1709 /* QEMU is not able to handle debug exception, so inject
1710 * program exception to guest;
1711 * Yes program exception NOT debug exception !!
1712 * When QEMU is using debug resources then debug exception must
1713 * be always set. To achieve this we set MSR_DE and also set
1714 * MSRP_DEP so guest cannot change MSR_DE.
1715 * When emulating debug resource for guest we want guest
1716 * to control MSR_DE (enable/disable debug interrupt on need).
1717 * Supporting both configurations are NOT possible.
1718 * So the result is that we cannot share debug resources
1719 * between QEMU and Guest on BOOKE architecture.
1720 * In the current design QEMU gets the priority over guest,
1721 * this means that if QEMU is using debug resources then guest
1722 * cannot use them;
1723 * For software breakpoint QEMU uses a privileged instruction;
1724 * So there cannot be any reason that we are here for guest
1725 * set debug exception, only possibility is guest executed a
1726 * privileged / illegal instruction and that's why we are
1727 * injecting a program interrupt.
1730 cpu_synchronize_state(cs);
1731 /* env->nip is PC, so increment this by 4 to use
1732 * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1734 env->nip += 4;
1735 cs->exception_index = POWERPC_EXCP_PROGRAM;
1736 env->error_code = POWERPC_EXCP_INVAL;
1737 ppc_cpu_do_interrupt(cs);
1740 return handle;
1743 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1745 PowerPCCPU *cpu = POWERPC_CPU(cs);
1746 CPUPPCState *env = &cpu->env;
1747 int ret;
1749 qemu_mutex_lock_iothread();
1751 switch (run->exit_reason) {
1752 case KVM_EXIT_DCR:
1753 if (run->dcr.is_write) {
1754 DPRINTF("handle dcr write\n");
1755 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1756 } else {
1757 DPRINTF("handle dcr read\n");
1758 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1760 break;
1761 case KVM_EXIT_HLT:
1762 DPRINTF("handle halt\n");
1763 ret = kvmppc_handle_halt(cpu);
1764 break;
1765 #if defined(TARGET_PPC64)
1766 case KVM_EXIT_PAPR_HCALL:
1767 DPRINTF("handle PAPR hypercall\n");
1768 run->papr_hcall.ret = spapr_hypercall(cpu,
1769 run->papr_hcall.nr,
1770 run->papr_hcall.args);
1771 ret = 0;
1772 break;
1773 #endif
1774 case KVM_EXIT_EPR:
1775 DPRINTF("handle epr\n");
1776 run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1777 ret = 0;
1778 break;
1779 case KVM_EXIT_WATCHDOG:
1780 DPRINTF("handle watchdog expiry\n");
1781 watchdog_perform_action();
1782 ret = 0;
1783 break;
1785 case KVM_EXIT_DEBUG:
1786 DPRINTF("handle debug exception\n");
1787 if (kvm_handle_debug(cpu, run)) {
1788 ret = EXCP_DEBUG;
1789 break;
1791 /* re-enter, this exception was guest-internal */
1792 ret = 0;
1793 break;
1795 default:
1796 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1797 ret = -1;
1798 break;
1801 qemu_mutex_unlock_iothread();
1802 return ret;
1805 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1807 CPUState *cs = CPU(cpu);
1808 uint32_t bits = tsr_bits;
1809 struct kvm_one_reg reg = {
1810 .id = KVM_REG_PPC_OR_TSR,
1811 .addr = (uintptr_t) &bits,
1814 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1817 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1820 CPUState *cs = CPU(cpu);
1821 uint32_t bits = tsr_bits;
1822 struct kvm_one_reg reg = {
1823 .id = KVM_REG_PPC_CLEAR_TSR,
1824 .addr = (uintptr_t) &bits,
1827 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1830 int kvmppc_set_tcr(PowerPCCPU *cpu)
1832 CPUState *cs = CPU(cpu);
1833 CPUPPCState *env = &cpu->env;
1834 uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1836 struct kvm_one_reg reg = {
1837 .id = KVM_REG_PPC_TCR,
1838 .addr = (uintptr_t) &tcr,
1841 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1844 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1846 CPUState *cs = CPU(cpu);
1847 int ret;
1849 if (!kvm_enabled()) {
1850 return -1;
1853 if (!cap_ppc_watchdog) {
1854 printf("warning: KVM does not support watchdog");
1855 return -1;
1858 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1859 if (ret < 0) {
1860 fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1861 __func__, strerror(-ret));
1862 return ret;
1865 return ret;
1868 static int read_cpuinfo(const char *field, char *value, int len)
1870 FILE *f;
1871 int ret = -1;
1872 int field_len = strlen(field);
1873 char line[512];
1875 f = fopen("/proc/cpuinfo", "r");
1876 if (!f) {
1877 return -1;
1880 do {
1881 if (!fgets(line, sizeof(line), f)) {
1882 break;
1884 if (!strncmp(line, field, field_len)) {
1885 pstrcpy(value, len, line);
1886 ret = 0;
1887 break;
1889 } while(*line);
1891 fclose(f);
1893 return ret;
1896 uint32_t kvmppc_get_tbfreq(void)
1898 char line[512];
1899 char *ns;
1900 uint32_t retval = NANOSECONDS_PER_SECOND;
1902 if (read_cpuinfo("timebase", line, sizeof(line))) {
1903 return retval;
1906 if (!(ns = strchr(line, ':'))) {
1907 return retval;
1910 ns++;
1912 return atoi(ns);
1915 bool kvmppc_get_host_serial(char **value)
1917 return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1918 NULL);
1921 bool kvmppc_get_host_model(char **value)
1923 return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1926 /* Try to find a device tree node for a CPU with clock-frequency property */
1927 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1929 struct dirent *dirp;
1930 DIR *dp;
1932 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1933 printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1934 return -1;
1937 buf[0] = '\0';
1938 while ((dirp = readdir(dp)) != NULL) {
1939 FILE *f;
1940 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1941 dirp->d_name);
1942 f = fopen(buf, "r");
1943 if (f) {
1944 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1945 fclose(f);
1946 break;
1948 buf[0] = '\0';
1950 closedir(dp);
1951 if (buf[0] == '\0') {
1952 printf("Unknown host!\n");
1953 return -1;
1956 return 0;
1959 static uint64_t kvmppc_read_int_dt(const char *filename)
1961 union {
1962 uint32_t v32;
1963 uint64_t v64;
1964 } u;
1965 FILE *f;
1966 int len;
1968 f = fopen(filename, "rb");
1969 if (!f) {
1970 return -1;
1973 len = fread(&u, 1, sizeof(u), f);
1974 fclose(f);
1975 switch (len) {
1976 case 4:
1977 /* property is a 32-bit quantity */
1978 return be32_to_cpu(u.v32);
1979 case 8:
1980 return be64_to_cpu(u.v64);
1983 return 0;
1986 /* Read a CPU node property from the host device tree that's a single
1987 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1988 * (can't find or open the property, or doesn't understand the
1989 * format) */
1990 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1992 char buf[PATH_MAX], *tmp;
1993 uint64_t val;
1995 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1996 return -1;
1999 tmp = g_strdup_printf("%s/%s", buf, propname);
2000 val = kvmppc_read_int_dt(tmp);
2001 g_free(tmp);
2003 return val;
2006 uint64_t kvmppc_get_clockfreq(void)
2008 return kvmppc_read_int_cpu_dt("clock-frequency");
2011 uint32_t kvmppc_get_vmx(void)
2013 return kvmppc_read_int_cpu_dt("ibm,vmx");
2016 uint32_t kvmppc_get_dfp(void)
2018 return kvmppc_read_int_cpu_dt("ibm,dfp");
2021 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
2023 PowerPCCPU *cpu = ppc_env_get_cpu(env);
2024 CPUState *cs = CPU(cpu);
2026 if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
2027 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
2028 return 0;
2031 return 1;
2034 int kvmppc_get_hasidle(CPUPPCState *env)
2036 struct kvm_ppc_pvinfo pvinfo;
2038 if (!kvmppc_get_pvinfo(env, &pvinfo) &&
2039 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
2040 return 1;
2043 return 0;
2046 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
2048 uint32_t *hc = (uint32_t*)buf;
2049 struct kvm_ppc_pvinfo pvinfo;
2051 if (!kvmppc_get_pvinfo(env, &pvinfo)) {
2052 memcpy(buf, pvinfo.hcall, buf_len);
2053 return 0;
2057 * Fallback to always fail hypercalls regardless of endianness:
2059 * tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
2060 * li r3, -1
2061 * b .+8 (becomes nop in wrong endian)
2062 * bswap32(li r3, -1)
2065 hc[0] = cpu_to_be32(0x08000048);
2066 hc[1] = cpu_to_be32(0x3860ffff);
2067 hc[2] = cpu_to_be32(0x48000008);
2068 hc[3] = cpu_to_be32(bswap32(0x3860ffff));
2070 return 1;
2073 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
2075 return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2078 void kvmppc_enable_logical_ci_hcalls(void)
2081 * FIXME: it would be nice if we could detect the cases where
2082 * we're using a device which requires the in kernel
2083 * implementation of these hcalls, but the kernel lacks them and
2084 * produce a warning.
2086 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2087 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2090 void kvmppc_enable_set_mode_hcall(void)
2092 kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2095 void kvmppc_enable_clear_ref_mod_hcalls(void)
2097 kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
2098 kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
2101 void kvmppc_set_papr(PowerPCCPU *cpu)
2103 CPUState *cs = CPU(cpu);
2104 int ret;
2106 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2107 if (ret) {
2108 error_report("This vCPU type or KVM version does not support PAPR");
2109 exit(1);
2112 /* Update the capability flag so we sync the right information
2113 * with kvm */
2114 cap_papr = 1;
2117 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr)
2119 return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr);
2122 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2124 CPUState *cs = CPU(cpu);
2125 int ret;
2127 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2128 if (ret && mpic_proxy) {
2129 error_report("This KVM version does not support EPR");
2130 exit(1);
2134 int kvmppc_smt_threads(void)
2136 return cap_ppc_smt ? cap_ppc_smt : 1;
2139 int kvmppc_set_smt_threads(int smt)
2141 int ret;
2143 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_SMT, 0, smt, 0);
2144 if (!ret) {
2145 cap_ppc_smt = smt;
2147 return ret;
2150 void kvmppc_hint_smt_possible(Error **errp)
2152 int i;
2153 GString *g;
2154 char *s;
2156 assert(kvm_enabled());
2157 if (cap_ppc_smt_possible) {
2158 g = g_string_new("Available VSMT modes:");
2159 for (i = 63; i >= 0; i--) {
2160 if ((1UL << i) & cap_ppc_smt_possible) {
2161 g_string_append_printf(g, " %lu", (1UL << i));
2164 s = g_string_free(g, false);
2165 error_append_hint(errp, "%s.\n", s);
2166 g_free(s);
2167 } else {
2168 error_append_hint(errp,
2169 "This KVM seems to be too old to support VSMT.\n");
2174 #ifdef TARGET_PPC64
2175 off_t kvmppc_alloc_rma(void **rma)
2177 off_t size;
2178 int fd;
2179 struct kvm_allocate_rma ret;
2181 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2182 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2183 * not necessary on this hardware
2184 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2186 * FIXME: We should allow the user to force contiguous RMA
2187 * allocation in the cap_ppc_rma==1 case.
2189 if (cap_ppc_rma < 2) {
2190 return 0;
2193 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
2194 if (fd < 0) {
2195 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2196 strerror(errno));
2197 return -1;
2200 size = MIN(ret.rma_size, 256ul << 20);
2202 *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2203 if (*rma == MAP_FAILED) {
2204 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
2205 return -1;
2208 return size;
2211 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2213 struct kvm_ppc_smmu_info info;
2214 long rampagesize, best_page_shift;
2215 int i;
2217 if (cap_ppc_rma >= 2) {
2218 return current_size;
2221 /* Find the largest hardware supported page size that's less than
2222 * or equal to the (logical) backing page size of guest RAM */
2223 kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2224 rampagesize = qemu_getrampagesize();
2225 best_page_shift = 0;
2227 for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2228 struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2230 if (!sps->page_shift) {
2231 continue;
2234 if ((sps->page_shift > best_page_shift)
2235 && ((1UL << sps->page_shift) <= rampagesize)) {
2236 best_page_shift = sps->page_shift;
2240 return MIN(current_size,
2241 1ULL << (best_page_shift + hash_shift - 7));
2243 #endif
2245 bool kvmppc_spapr_use_multitce(void)
2247 return cap_spapr_multitce;
2250 int kvmppc_spapr_enable_inkernel_multitce(void)
2252 int ret;
2254 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2255 H_PUT_TCE_INDIRECT, 1);
2256 if (!ret) {
2257 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2258 H_STUFF_TCE, 1);
2261 return ret;
2264 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
2265 uint64_t bus_offset, uint32_t nb_table,
2266 int *pfd, bool need_vfio)
2268 long len;
2269 int fd;
2270 void *table;
2272 /* Must set fd to -1 so we don't try to munmap when called for
2273 * destroying the table, which the upper layers -will- do
2275 *pfd = -1;
2276 if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2277 return NULL;
2280 if (cap_spapr_tce_64) {
2281 struct kvm_create_spapr_tce_64 args = {
2282 .liobn = liobn,
2283 .page_shift = page_shift,
2284 .offset = bus_offset >> page_shift,
2285 .size = nb_table,
2286 .flags = 0
2288 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args);
2289 if (fd < 0) {
2290 fprintf(stderr,
2291 "KVM: Failed to create TCE64 table for liobn 0x%x\n",
2292 liobn);
2293 return NULL;
2295 } else if (cap_spapr_tce) {
2296 uint64_t window_size = (uint64_t) nb_table << page_shift;
2297 struct kvm_create_spapr_tce args = {
2298 .liobn = liobn,
2299 .window_size = window_size,
2301 if ((window_size != args.window_size) || bus_offset) {
2302 return NULL;
2304 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2305 if (fd < 0) {
2306 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2307 liobn);
2308 return NULL;
2310 } else {
2311 return NULL;
2314 len = nb_table * sizeof(uint64_t);
2315 /* FIXME: round this up to page size */
2317 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2318 if (table == MAP_FAILED) {
2319 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2320 liobn);
2321 close(fd);
2322 return NULL;
2325 *pfd = fd;
2326 return table;
2329 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2331 long len;
2333 if (fd < 0) {
2334 return -1;
2337 len = nb_table * sizeof(uint64_t);
2338 if ((munmap(table, len) < 0) ||
2339 (close(fd) < 0)) {
2340 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2341 strerror(errno));
2342 /* Leak the table */
2345 return 0;
2348 int kvmppc_reset_htab(int shift_hint)
2350 uint32_t shift = shift_hint;
2352 if (!kvm_enabled()) {
2353 /* Full emulation, tell caller to allocate htab itself */
2354 return 0;
2356 if (kvm_vm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2357 int ret;
2358 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2359 if (ret == -ENOTTY) {
2360 /* At least some versions of PR KVM advertise the
2361 * capability, but don't implement the ioctl(). Oops.
2362 * Return 0 so that we allocate the htab in qemu, as is
2363 * correct for PR. */
2364 return 0;
2365 } else if (ret < 0) {
2366 return ret;
2368 return shift;
2371 /* We have a kernel that predates the htab reset calls. For PR
2372 * KVM, we need to allocate the htab ourselves, for an HV KVM of
2373 * this era, it has allocated a 16MB fixed size hash table already. */
2374 if (kvmppc_is_pr(kvm_state)) {
2375 /* PR - tell caller to allocate htab */
2376 return 0;
2377 } else {
2378 /* HV - assume 16MB kernel allocated htab */
2379 return 24;
2383 static inline uint32_t mfpvr(void)
2385 uint32_t pvr;
2387 asm ("mfpvr %0"
2388 : "=r"(pvr));
2389 return pvr;
2392 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2394 if (on) {
2395 *word |= flags;
2396 } else {
2397 *word &= ~flags;
2401 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2403 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2404 uint32_t vmx = kvmppc_get_vmx();
2405 uint32_t dfp = kvmppc_get_dfp();
2406 uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2407 uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2409 /* Now fix up the class with information we can query from the host */
2410 pcc->pvr = mfpvr();
2412 if (vmx != -1) {
2413 /* Only override when we know what the host supports */
2414 alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
2415 alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
2417 if (dfp != -1) {
2418 /* Only override when we know what the host supports */
2419 alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
2422 if (dcache_size != -1) {
2423 pcc->l1_dcache_size = dcache_size;
2426 if (icache_size != -1) {
2427 pcc->l1_icache_size = icache_size;
2430 #if defined(TARGET_PPC64)
2431 pcc->radix_page_info = kvm_get_radix_page_info();
2433 if ((pcc->pvr & 0xffffff00) == CPU_POWERPC_POWER9_DD1) {
2435 * POWER9 DD1 has some bugs which make it not really ISA 3.00
2436 * compliant. More importantly, advertising ISA 3.00
2437 * architected mode may prevent guests from activating
2438 * necessary DD1 workarounds.
2440 pcc->pcr_supported &= ~(PCR_COMPAT_3_00 | PCR_COMPAT_2_07
2441 | PCR_COMPAT_2_06 | PCR_COMPAT_2_05);
2443 #endif /* defined(TARGET_PPC64) */
2446 bool kvmppc_has_cap_epr(void)
2448 return cap_epr;
2451 bool kvmppc_has_cap_htab_fd(void)
2453 return cap_htab_fd;
2456 bool kvmppc_has_cap_fixup_hcalls(void)
2458 return cap_fixup_hcalls;
2461 bool kvmppc_has_cap_htm(void)
2463 return cap_htm;
2466 bool kvmppc_has_cap_mmu_radix(void)
2468 return cap_mmu_radix;
2471 bool kvmppc_has_cap_mmu_hash_v3(void)
2473 return cap_mmu_hash_v3;
2476 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2478 uint32_t host_pvr = mfpvr();
2479 PowerPCCPUClass *pvr_pcc;
2481 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2482 if (pvr_pcc == NULL) {
2483 pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2486 return pvr_pcc;
2489 static int kvm_ppc_register_host_cpu_type(void)
2491 TypeInfo type_info = {
2492 .name = TYPE_HOST_POWERPC_CPU,
2493 .class_init = kvmppc_host_cpu_class_init,
2495 PowerPCCPUClass *pvr_pcc;
2496 ObjectClass *oc;
2497 DeviceClass *dc;
2498 int i;
2500 pvr_pcc = kvm_ppc_get_host_cpu_class();
2501 if (pvr_pcc == NULL) {
2502 return -1;
2504 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2505 type_register(&type_info);
2507 oc = object_class_by_name(type_info.name);
2508 g_assert(oc);
2510 #if defined(TARGET_PPC64)
2511 type_info.name = g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE, "host");
2512 type_info.parent = TYPE_SPAPR_CPU_CORE,
2513 type_info.instance_size = sizeof(sPAPRCPUCore);
2514 type_info.instance_init = NULL;
2515 type_info.class_init = spapr_cpu_core_class_init;
2516 type_info.class_data = (void *) "host";
2517 type_register(&type_info);
2518 g_free((void *)type_info.name);
2519 #endif
2522 * Update generic CPU family class alias (e.g. on a POWER8NVL host,
2523 * we want "POWER8" to be a "family" alias that points to the current
2524 * host CPU type, too)
2526 dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc));
2527 for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) {
2528 if (strcasecmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) {
2529 char *suffix;
2531 ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc));
2532 suffix = strstr(ppc_cpu_aliases[i].model, POWERPC_CPU_TYPE_SUFFIX);
2533 if (suffix) {
2534 *suffix = 0;
2536 break;
2540 return 0;
2543 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2545 struct kvm_rtas_token_args args = {
2546 .token = token,
2549 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2550 return -ENOENT;
2553 strncpy(args.name, function, sizeof(args.name));
2555 return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2558 int kvmppc_get_htab_fd(bool write)
2560 struct kvm_get_htab_fd s = {
2561 .flags = write ? KVM_GET_HTAB_WRITE : 0,
2562 .start_index = 0,
2565 if (!cap_htab_fd) {
2566 fprintf(stderr, "KVM version doesn't support saving the hash table\n");
2567 return -1;
2570 return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2573 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2575 int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2576 uint8_t buf[bufsize];
2577 ssize_t rc;
2579 do {
2580 rc = read(fd, buf, bufsize);
2581 if (rc < 0) {
2582 fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2583 strerror(errno));
2584 return rc;
2585 } else if (rc) {
2586 uint8_t *buffer = buf;
2587 ssize_t n = rc;
2588 while (n) {
2589 struct kvm_get_htab_header *head =
2590 (struct kvm_get_htab_header *) buffer;
2591 size_t chunksize = sizeof(*head) +
2592 HASH_PTE_SIZE_64 * head->n_valid;
2594 qemu_put_be32(f, head->index);
2595 qemu_put_be16(f, head->n_valid);
2596 qemu_put_be16(f, head->n_invalid);
2597 qemu_put_buffer(f, (void *)(head + 1),
2598 HASH_PTE_SIZE_64 * head->n_valid);
2600 buffer += chunksize;
2601 n -= chunksize;
2604 } while ((rc != 0)
2605 && ((max_ns < 0)
2606 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2608 return (rc == 0) ? 1 : 0;
2611 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2612 uint16_t n_valid, uint16_t n_invalid)
2614 struct kvm_get_htab_header *buf;
2615 size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2616 ssize_t rc;
2618 buf = alloca(chunksize);
2619 buf->index = index;
2620 buf->n_valid = n_valid;
2621 buf->n_invalid = n_invalid;
2623 qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2625 rc = write(fd, buf, chunksize);
2626 if (rc < 0) {
2627 fprintf(stderr, "Error writing KVM hash table: %s\n",
2628 strerror(errno));
2629 return rc;
2631 if (rc != chunksize) {
2632 /* We should never get a short write on a single chunk */
2633 fprintf(stderr, "Short write, restoring KVM hash table\n");
2634 return -1;
2636 return 0;
2639 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2641 return true;
2644 void kvm_arch_init_irq_routing(KVMState *s)
2648 void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n)
2650 struct kvm_get_htab_fd ghf = {
2651 .flags = 0,
2652 .start_index = ptex,
2654 int fd, rc;
2655 int i;
2657 fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2658 if (fd < 0) {
2659 hw_error("kvmppc_read_hptes: Unable to open HPT fd");
2662 i = 0;
2663 while (i < n) {
2664 struct kvm_get_htab_header *hdr;
2665 int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP;
2666 char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64];
2668 rc = read(fd, buf, sizeof(buf));
2669 if (rc < 0) {
2670 hw_error("kvmppc_read_hptes: Unable to read HPTEs");
2673 hdr = (struct kvm_get_htab_header *)buf;
2674 while ((i < n) && ((char *)hdr < (buf + rc))) {
2675 int invalid = hdr->n_invalid;
2677 if (hdr->index != (ptex + i)) {
2678 hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32
2679 " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i);
2682 memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * hdr->n_valid);
2683 i += hdr->n_valid;
2685 if ((n - i) < invalid) {
2686 invalid = n - i;
2688 memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64);
2689 i += hdr->n_invalid;
2691 hdr = (struct kvm_get_htab_header *)
2692 ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid);
2696 close(fd);
2699 void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1)
2701 int fd, rc;
2702 struct kvm_get_htab_fd ghf;
2703 struct {
2704 struct kvm_get_htab_header hdr;
2705 uint64_t pte0;
2706 uint64_t pte1;
2707 } buf;
2709 ghf.flags = 0;
2710 ghf.start_index = 0; /* Ignored */
2711 fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2712 if (fd < 0) {
2713 hw_error("kvmppc_write_hpte: Unable to open HPT fd");
2716 buf.hdr.n_valid = 1;
2717 buf.hdr.n_invalid = 0;
2718 buf.hdr.index = ptex;
2719 buf.pte0 = cpu_to_be64(pte0);
2720 buf.pte1 = cpu_to_be64(pte1);
2722 rc = write(fd, &buf, sizeof(buf));
2723 if (rc != sizeof(buf)) {
2724 hw_error("kvmppc_write_hpte: Unable to update KVM HPT");
2726 close(fd);
2729 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2730 uint64_t address, uint32_t data, PCIDevice *dev)
2732 return 0;
2735 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2736 int vector, PCIDevice *dev)
2738 return 0;
2741 int kvm_arch_release_virq_post(int virq)
2743 return 0;
2746 int kvm_arch_msi_data_to_gsi(uint32_t data)
2748 return data & 0xffff;
2751 int kvmppc_enable_hwrng(void)
2753 if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2754 return -1;
2757 return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2760 void kvmppc_check_papr_resize_hpt(Error **errp)
2762 if (!kvm_enabled()) {
2763 return; /* No KVM, we're good */
2766 if (cap_resize_hpt) {
2767 return; /* Kernel has explicit support, we're good */
2770 /* Otherwise fallback on looking for PR KVM */
2771 if (kvmppc_is_pr(kvm_state)) {
2772 return;
2775 error_setg(errp,
2776 "Hash page table resizing not available with this KVM version");
2779 int kvmppc_resize_hpt_prepare(PowerPCCPU *cpu, target_ulong flags, int shift)
2781 CPUState *cs = CPU(cpu);
2782 struct kvm_ppc_resize_hpt rhpt = {
2783 .flags = flags,
2784 .shift = shift,
2787 if (!cap_resize_hpt) {
2788 return -ENOSYS;
2791 return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_PREPARE, &rhpt);
2794 int kvmppc_resize_hpt_commit(PowerPCCPU *cpu, target_ulong flags, int shift)
2796 CPUState *cs = CPU(cpu);
2797 struct kvm_ppc_resize_hpt rhpt = {
2798 .flags = flags,
2799 .shift = shift,
2802 if (!cap_resize_hpt) {
2803 return -ENOSYS;
2806 return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_COMMIT, &rhpt);
2809 static void kvmppc_pivot_hpt_cpu(CPUState *cs, run_on_cpu_data arg)
2811 target_ulong sdr1 = arg.target_ptr;
2812 PowerPCCPU *cpu = POWERPC_CPU(cs);
2813 CPUPPCState *env = &cpu->env;
2815 /* This is just for the benefit of PR KVM */
2816 cpu_synchronize_state(cs);
2817 env->spr[SPR_SDR1] = sdr1;
2818 if (kvmppc_put_books_sregs(cpu) < 0) {
2819 error_report("Unable to update SDR1 in KVM");
2820 exit(1);
2824 void kvmppc_update_sdr1(target_ulong sdr1)
2826 CPUState *cs;
2828 CPU_FOREACH(cs) {
2829 run_on_cpu(cs, kvmppc_pivot_hpt_cpu, RUN_ON_CPU_TARGET_PTR(sdr1));
2834 * This is a helper function to detect a post migration scenario
2835 * in which a guest, running as KVM-HV, freezes in cpu_post_load because
2836 * the guest kernel can't handle a PVR value other than the actual host
2837 * PVR in KVM_SET_SREGS, even if pvr_match() returns true.
2839 * If we don't have cap_ppc_pvr_compat and we're not running in PR
2840 * (so, we're HV), return true. The workaround itself is done in
2841 * cpu_post_load.
2843 * The order here is important: we'll only check for KVM PR as a
2844 * fallback if the guest kernel can't handle the situation itself.
2845 * We need to avoid as much as possible querying the running KVM type
2846 * in QEMU level.
2848 bool kvmppc_pvr_workaround_required(PowerPCCPU *cpu)
2850 CPUState *cs = CPU(cpu);
2852 if (!kvm_enabled()) {
2853 return false;
2856 if (cap_ppc_pvr_compat) {
2857 return false;
2860 return !kvmppc_is_pr(cs->kvm_state);