hw/misc/mmio_interface: Return after error_setg() to avoid crash
[qemu.git] / target / ppc / kvm.c
blob85713795de714869bf14ed02cbd2f9117dbee611
1 /*
2 * PowerPC implementation of KVM hooks
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
7 * Authors:
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
17 #include "qemu/osdep.h"
18 #include <dirent.h>
19 #include <sys/ioctl.h>
20 #include <sys/vfs.h>
22 #include <linux/kvm.h>
24 #include "qemu-common.h"
25 #include "qapi/error.h"
26 #include "qemu/error-report.h"
27 #include "cpu.h"
28 #include "cpu-models.h"
29 #include "qemu/timer.h"
30 #include "sysemu/sysemu.h"
31 #include "sysemu/hw_accel.h"
32 #include "kvm_ppc.h"
33 #include "sysemu/cpus.h"
34 #include "sysemu/device_tree.h"
35 #include "mmu-hash64.h"
37 #include "hw/sysbus.h"
38 #include "hw/ppc/spapr.h"
39 #include "hw/ppc/spapr_vio.h"
40 #include "hw/ppc/spapr_cpu_core.h"
41 #include "hw/ppc/ppc.h"
42 #include "sysemu/watchdog.h"
43 #include "trace.h"
44 #include "exec/gdbstub.h"
45 #include "exec/memattrs.h"
46 #include "exec/ram_addr.h"
47 #include "sysemu/hostmem.h"
48 #include "qemu/cutils.h"
49 #include "qemu/mmap-alloc.h"
50 #if defined(TARGET_PPC64)
51 #include "hw/ppc/spapr_cpu_core.h"
52 #endif
53 #include "elf.h"
54 #include "sysemu/kvm_int.h"
56 //#define DEBUG_KVM
58 #ifdef DEBUG_KVM
59 #define DPRINTF(fmt, ...) \
60 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
61 #else
62 #define DPRINTF(fmt, ...) \
63 do { } while (0)
64 #endif
66 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
68 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
69 KVM_CAP_LAST_INFO
72 static int cap_interrupt_unset = false;
73 static int cap_interrupt_level = false;
74 static int cap_segstate;
75 static int cap_booke_sregs;
76 static int cap_ppc_smt;
77 static int cap_ppc_rma;
78 static int cap_spapr_tce;
79 static int cap_spapr_tce_64;
80 static int cap_spapr_multitce;
81 static int cap_spapr_vfio;
82 static int cap_hior;
83 static int cap_one_reg;
84 static int cap_epr;
85 static int cap_ppc_watchdog;
86 static int cap_papr;
87 static int cap_htab_fd;
88 static int cap_fixup_hcalls;
89 static int cap_htm; /* Hardware transactional memory support */
90 static int cap_mmu_radix;
91 static int cap_mmu_hash_v3;
92 static int cap_resize_hpt;
94 static uint32_t debug_inst_opcode;
96 /* XXX We have a race condition where we actually have a level triggered
97 * interrupt, but the infrastructure can't expose that yet, so the guest
98 * takes but ignores it, goes to sleep and never gets notified that there's
99 * still an interrupt pending.
101 * As a quick workaround, let's just wake up again 20 ms after we injected
102 * an interrupt. That way we can assure that we're always reinjecting
103 * interrupts in case the guest swallowed them.
105 static QEMUTimer *idle_timer;
107 static void kvm_kick_cpu(void *opaque)
109 PowerPCCPU *cpu = opaque;
111 qemu_cpu_kick(CPU(cpu));
114 /* Check whether we are running with KVM-PR (instead of KVM-HV). This
115 * should only be used for fallback tests - generally we should use
116 * explicit capabilities for the features we want, rather than
117 * assuming what is/isn't available depending on the KVM variant. */
118 static bool kvmppc_is_pr(KVMState *ks)
120 /* Assume KVM-PR if the GET_PVINFO capability is available */
121 return kvm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0;
124 static int kvm_ppc_register_host_cpu_type(void);
126 int kvm_arch_init(MachineState *ms, KVMState *s)
128 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
129 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
130 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
131 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
132 cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
133 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
134 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
135 cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64);
136 cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
137 cap_spapr_vfio = false;
138 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
139 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
140 cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
141 cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
142 /* Note: we don't set cap_papr here, because this capability is
143 * only activated after this by kvmppc_set_papr() */
144 cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
145 cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
146 cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
147 cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX);
148 cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3);
149 cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT);
151 if (!cap_interrupt_level) {
152 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
153 "VM to stall at times!\n");
156 kvm_ppc_register_host_cpu_type();
158 return 0;
161 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
163 return 0;
166 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
168 CPUPPCState *cenv = &cpu->env;
169 CPUState *cs = CPU(cpu);
170 struct kvm_sregs sregs;
171 int ret;
173 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
174 /* What we're really trying to say is "if we're on BookE, we use
175 the native PVR for now". This is the only sane way to check
176 it though, so we potentially confuse users that they can run
177 BookE guests on BookS. Let's hope nobody dares enough :) */
178 return 0;
179 } else {
180 if (!cap_segstate) {
181 fprintf(stderr, "kvm error: missing PVR setting capability\n");
182 return -ENOSYS;
186 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
187 if (ret) {
188 return ret;
191 sregs.pvr = cenv->spr[SPR_PVR];
192 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
195 /* Set up a shared TLB array with KVM */
196 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
198 CPUPPCState *env = &cpu->env;
199 CPUState *cs = CPU(cpu);
200 struct kvm_book3e_206_tlb_params params = {};
201 struct kvm_config_tlb cfg = {};
202 unsigned int entries = 0;
203 int ret, i;
205 if (!kvm_enabled() ||
206 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
207 return 0;
210 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
212 for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
213 params.tlb_sizes[i] = booke206_tlb_size(env, i);
214 params.tlb_ways[i] = booke206_tlb_ways(env, i);
215 entries += params.tlb_sizes[i];
218 assert(entries == env->nb_tlb);
219 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
221 env->tlb_dirty = true;
223 cfg.array = (uintptr_t)env->tlb.tlbm;
224 cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
225 cfg.params = (uintptr_t)&params;
226 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
228 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
229 if (ret < 0) {
230 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
231 __func__, strerror(-ret));
232 return ret;
235 env->kvm_sw_tlb = true;
236 return 0;
240 #if defined(TARGET_PPC64)
241 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
242 struct kvm_ppc_smmu_info *info)
244 CPUPPCState *env = &cpu->env;
245 CPUState *cs = CPU(cpu);
247 memset(info, 0, sizeof(*info));
249 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
250 * need to "guess" what the supported page sizes are.
252 * For that to work we make a few assumptions:
254 * - Check whether we are running "PR" KVM which only supports 4K
255 * and 16M pages, but supports them regardless of the backing
256 * store characteritics. We also don't support 1T segments.
258 * This is safe as if HV KVM ever supports that capability or PR
259 * KVM grows supports for more page/segment sizes, those versions
260 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
261 * will not hit this fallback
263 * - Else we are running HV KVM. This means we only support page
264 * sizes that fit in the backing store. Additionally we only
265 * advertize 64K pages if the processor is ARCH 2.06 and we assume
266 * P7 encodings for the SLB and hash table. Here too, we assume
267 * support for any newer processor will mean a kernel that
268 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
269 * this fallback.
271 if (kvmppc_is_pr(cs->kvm_state)) {
272 /* No flags */
273 info->flags = 0;
274 info->slb_size = 64;
276 /* Standard 4k base page size segment */
277 info->sps[0].page_shift = 12;
278 info->sps[0].slb_enc = 0;
279 info->sps[0].enc[0].page_shift = 12;
280 info->sps[0].enc[0].pte_enc = 0;
282 /* Standard 16M large page size segment */
283 info->sps[1].page_shift = 24;
284 info->sps[1].slb_enc = SLB_VSID_L;
285 info->sps[1].enc[0].page_shift = 24;
286 info->sps[1].enc[0].pte_enc = 0;
287 } else {
288 int i = 0;
290 /* HV KVM has backing store size restrictions */
291 info->flags = KVM_PPC_PAGE_SIZES_REAL;
293 if (env->mmu_model & POWERPC_MMU_1TSEG) {
294 info->flags |= KVM_PPC_1T_SEGMENTS;
297 if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 ||
298 POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) {
299 info->slb_size = 32;
300 } else {
301 info->slb_size = 64;
304 /* Standard 4k base page size segment */
305 info->sps[i].page_shift = 12;
306 info->sps[i].slb_enc = 0;
307 info->sps[i].enc[0].page_shift = 12;
308 info->sps[i].enc[0].pte_enc = 0;
309 i++;
311 /* 64K on MMU 2.06 and later */
312 if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 ||
313 POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) {
314 info->sps[i].page_shift = 16;
315 info->sps[i].slb_enc = 0x110;
316 info->sps[i].enc[0].page_shift = 16;
317 info->sps[i].enc[0].pte_enc = 1;
318 i++;
321 /* Standard 16M large page size segment */
322 info->sps[i].page_shift = 24;
323 info->sps[i].slb_enc = SLB_VSID_L;
324 info->sps[i].enc[0].page_shift = 24;
325 info->sps[i].enc[0].pte_enc = 0;
329 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
331 CPUState *cs = CPU(cpu);
332 int ret;
334 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
335 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
336 if (ret == 0) {
337 return;
341 kvm_get_fallback_smmu_info(cpu, info);
344 struct ppc_radix_page_info *kvm_get_radix_page_info(void)
346 KVMState *s = KVM_STATE(current_machine->accelerator);
347 struct ppc_radix_page_info *radix_page_info;
348 struct kvm_ppc_rmmu_info rmmu_info;
349 int i;
351 if (!kvm_check_extension(s, KVM_CAP_PPC_MMU_RADIX)) {
352 return NULL;
354 if (kvm_vm_ioctl(s, KVM_PPC_GET_RMMU_INFO, &rmmu_info)) {
355 return NULL;
357 radix_page_info = g_malloc0(sizeof(*radix_page_info));
358 radix_page_info->count = 0;
359 for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) {
360 if (rmmu_info.ap_encodings[i]) {
361 radix_page_info->entries[i] = rmmu_info.ap_encodings[i];
362 radix_page_info->count++;
365 return radix_page_info;
368 target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu,
369 bool radix, bool gtse,
370 uint64_t proc_tbl)
372 CPUState *cs = CPU(cpu);
373 int ret;
374 uint64_t flags = 0;
375 struct kvm_ppc_mmuv3_cfg cfg = {
376 .process_table = proc_tbl,
379 if (radix) {
380 flags |= KVM_PPC_MMUV3_RADIX;
382 if (gtse) {
383 flags |= KVM_PPC_MMUV3_GTSE;
385 cfg.flags = flags;
386 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_CONFIGURE_V3_MMU, &cfg);
387 switch (ret) {
388 case 0:
389 return H_SUCCESS;
390 case -EINVAL:
391 return H_PARAMETER;
392 case -ENODEV:
393 return H_NOT_AVAILABLE;
394 default:
395 return H_HARDWARE;
399 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
401 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
402 return true;
405 return (1ul << shift) <= rampgsize;
408 static long max_cpu_page_size;
410 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
412 static struct kvm_ppc_smmu_info smmu_info;
413 static bool has_smmu_info;
414 CPUPPCState *env = &cpu->env;
415 int iq, ik, jq, jk;
416 bool has_64k_pages = false;
418 /* We only handle page sizes for 64-bit server guests for now */
419 if (!(env->mmu_model & POWERPC_MMU_64)) {
420 return;
423 /* Collect MMU info from kernel if not already */
424 if (!has_smmu_info) {
425 kvm_get_smmu_info(cpu, &smmu_info);
426 has_smmu_info = true;
429 if (!max_cpu_page_size) {
430 max_cpu_page_size = qemu_getrampagesize();
433 /* Convert to QEMU form */
434 memset(&env->sps, 0, sizeof(env->sps));
436 /* If we have HV KVM, we need to forbid CI large pages if our
437 * host page size is smaller than 64K.
439 if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
440 env->ci_large_pages = getpagesize() >= 0x10000;
444 * XXX This loop should be an entry wide AND of the capabilities that
445 * the selected CPU has with the capabilities that KVM supports.
447 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
448 struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
449 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
451 if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
452 ksps->page_shift)) {
453 continue;
455 qsps->page_shift = ksps->page_shift;
456 qsps->slb_enc = ksps->slb_enc;
457 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
458 if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
459 ksps->enc[jk].page_shift)) {
460 continue;
462 if (ksps->enc[jk].page_shift == 16) {
463 has_64k_pages = true;
465 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
466 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
467 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
468 break;
471 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
472 break;
475 env->slb_nr = smmu_info.slb_size;
476 if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
477 env->mmu_model &= ~POWERPC_MMU_1TSEG;
479 if (!has_64k_pages) {
480 env->mmu_model &= ~POWERPC_MMU_64K;
484 bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path)
486 Object *mem_obj = object_resolve_path(obj_path, NULL);
487 char *mempath = object_property_get_str(mem_obj, "mem-path", NULL);
488 long pagesize;
490 if (mempath) {
491 pagesize = qemu_mempath_getpagesize(mempath);
492 g_free(mempath);
493 } else {
494 pagesize = getpagesize();
497 return pagesize >= max_cpu_page_size;
500 #else /* defined (TARGET_PPC64) */
502 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
506 bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path)
508 return true;
511 #endif /* !defined (TARGET_PPC64) */
513 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
515 return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
518 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
519 * book3s supports only 1 watchpoint, so array size
520 * of 4 is sufficient for now.
522 #define MAX_HW_BKPTS 4
524 static struct HWBreakpoint {
525 target_ulong addr;
526 int type;
527 } hw_debug_points[MAX_HW_BKPTS];
529 static CPUWatchpoint hw_watchpoint;
531 /* Default there is no breakpoint and watchpoint supported */
532 static int max_hw_breakpoint;
533 static int max_hw_watchpoint;
534 static int nb_hw_breakpoint;
535 static int nb_hw_watchpoint;
537 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
539 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
540 max_hw_breakpoint = 2;
541 max_hw_watchpoint = 2;
544 if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
545 fprintf(stderr, "Error initializing h/w breakpoints\n");
546 return;
550 int kvm_arch_init_vcpu(CPUState *cs)
552 PowerPCCPU *cpu = POWERPC_CPU(cs);
553 CPUPPCState *cenv = &cpu->env;
554 int ret;
556 /* Gather server mmu info from KVM and update the CPU state */
557 kvm_fixup_page_sizes(cpu);
559 /* Synchronize sregs with kvm */
560 ret = kvm_arch_sync_sregs(cpu);
561 if (ret) {
562 if (ret == -EINVAL) {
563 error_report("Register sync failed... If you're using kvm-hv.ko,"
564 " only \"-cpu host\" is possible");
566 return ret;
569 idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
571 switch (cenv->mmu_model) {
572 case POWERPC_MMU_BOOKE206:
573 /* This target supports access to KVM's guest TLB */
574 ret = kvm_booke206_tlb_init(cpu);
575 break;
576 case POWERPC_MMU_2_07:
577 if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) {
578 /* KVM-HV has transactional memory on POWER8 also without the
579 * KVM_CAP_PPC_HTM extension, so enable it here instead as
580 * long as it's availble to userspace on the host. */
581 if (qemu_getauxval(AT_HWCAP2) & PPC_FEATURE2_HAS_HTM) {
582 cap_htm = true;
585 break;
586 default:
587 break;
590 kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
591 kvmppc_hw_debug_points_init(cenv);
593 return ret;
596 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
598 CPUPPCState *env = &cpu->env;
599 CPUState *cs = CPU(cpu);
600 struct kvm_dirty_tlb dirty_tlb;
601 unsigned char *bitmap;
602 int ret;
604 if (!env->kvm_sw_tlb) {
605 return;
608 bitmap = g_malloc((env->nb_tlb + 7) / 8);
609 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
611 dirty_tlb.bitmap = (uintptr_t)bitmap;
612 dirty_tlb.num_dirty = env->nb_tlb;
614 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
615 if (ret) {
616 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
617 __func__, strerror(-ret));
620 g_free(bitmap);
623 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
625 PowerPCCPU *cpu = POWERPC_CPU(cs);
626 CPUPPCState *env = &cpu->env;
627 union {
628 uint32_t u32;
629 uint64_t u64;
630 } val;
631 struct kvm_one_reg reg = {
632 .id = id,
633 .addr = (uintptr_t) &val,
635 int ret;
637 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
638 if (ret != 0) {
639 trace_kvm_failed_spr_get(spr, strerror(errno));
640 } else {
641 switch (id & KVM_REG_SIZE_MASK) {
642 case KVM_REG_SIZE_U32:
643 env->spr[spr] = val.u32;
644 break;
646 case KVM_REG_SIZE_U64:
647 env->spr[spr] = val.u64;
648 break;
650 default:
651 /* Don't handle this size yet */
652 abort();
657 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
659 PowerPCCPU *cpu = POWERPC_CPU(cs);
660 CPUPPCState *env = &cpu->env;
661 union {
662 uint32_t u32;
663 uint64_t u64;
664 } val;
665 struct kvm_one_reg reg = {
666 .id = id,
667 .addr = (uintptr_t) &val,
669 int ret;
671 switch (id & KVM_REG_SIZE_MASK) {
672 case KVM_REG_SIZE_U32:
673 val.u32 = env->spr[spr];
674 break;
676 case KVM_REG_SIZE_U64:
677 val.u64 = env->spr[spr];
678 break;
680 default:
681 /* Don't handle this size yet */
682 abort();
685 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
686 if (ret != 0) {
687 trace_kvm_failed_spr_set(spr, strerror(errno));
691 static int kvm_put_fp(CPUState *cs)
693 PowerPCCPU *cpu = POWERPC_CPU(cs);
694 CPUPPCState *env = &cpu->env;
695 struct kvm_one_reg reg;
696 int i;
697 int ret;
699 if (env->insns_flags & PPC_FLOAT) {
700 uint64_t fpscr = env->fpscr;
701 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
703 reg.id = KVM_REG_PPC_FPSCR;
704 reg.addr = (uintptr_t)&fpscr;
705 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
706 if (ret < 0) {
707 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
708 return ret;
711 for (i = 0; i < 32; i++) {
712 uint64_t vsr[2];
714 #ifdef HOST_WORDS_BIGENDIAN
715 vsr[0] = float64_val(env->fpr[i]);
716 vsr[1] = env->vsr[i];
717 #else
718 vsr[0] = env->vsr[i];
719 vsr[1] = float64_val(env->fpr[i]);
720 #endif
721 reg.addr = (uintptr_t) &vsr;
722 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
724 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
725 if (ret < 0) {
726 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
727 i, strerror(errno));
728 return ret;
733 if (env->insns_flags & PPC_ALTIVEC) {
734 reg.id = KVM_REG_PPC_VSCR;
735 reg.addr = (uintptr_t)&env->vscr;
736 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
737 if (ret < 0) {
738 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
739 return ret;
742 for (i = 0; i < 32; i++) {
743 reg.id = KVM_REG_PPC_VR(i);
744 reg.addr = (uintptr_t)&env->avr[i];
745 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
746 if (ret < 0) {
747 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
748 return ret;
753 return 0;
756 static int kvm_get_fp(CPUState *cs)
758 PowerPCCPU *cpu = POWERPC_CPU(cs);
759 CPUPPCState *env = &cpu->env;
760 struct kvm_one_reg reg;
761 int i;
762 int ret;
764 if (env->insns_flags & PPC_FLOAT) {
765 uint64_t fpscr;
766 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
768 reg.id = KVM_REG_PPC_FPSCR;
769 reg.addr = (uintptr_t)&fpscr;
770 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
771 if (ret < 0) {
772 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
773 return ret;
774 } else {
775 env->fpscr = fpscr;
778 for (i = 0; i < 32; i++) {
779 uint64_t vsr[2];
781 reg.addr = (uintptr_t) &vsr;
782 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
784 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
785 if (ret < 0) {
786 DPRINTF("Unable to get %s%d from KVM: %s\n",
787 vsx ? "VSR" : "FPR", i, strerror(errno));
788 return ret;
789 } else {
790 #ifdef HOST_WORDS_BIGENDIAN
791 env->fpr[i] = vsr[0];
792 if (vsx) {
793 env->vsr[i] = vsr[1];
795 #else
796 env->fpr[i] = vsr[1];
797 if (vsx) {
798 env->vsr[i] = vsr[0];
800 #endif
805 if (env->insns_flags & PPC_ALTIVEC) {
806 reg.id = KVM_REG_PPC_VSCR;
807 reg.addr = (uintptr_t)&env->vscr;
808 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
809 if (ret < 0) {
810 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
811 return ret;
814 for (i = 0; i < 32; i++) {
815 reg.id = KVM_REG_PPC_VR(i);
816 reg.addr = (uintptr_t)&env->avr[i];
817 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
818 if (ret < 0) {
819 DPRINTF("Unable to get VR%d from KVM: %s\n",
820 i, strerror(errno));
821 return ret;
826 return 0;
829 #if defined(TARGET_PPC64)
830 static int kvm_get_vpa(CPUState *cs)
832 PowerPCCPU *cpu = POWERPC_CPU(cs);
833 CPUPPCState *env = &cpu->env;
834 struct kvm_one_reg reg;
835 int ret;
837 reg.id = KVM_REG_PPC_VPA_ADDR;
838 reg.addr = (uintptr_t)&env->vpa_addr;
839 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
840 if (ret < 0) {
841 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
842 return ret;
845 assert((uintptr_t)&env->slb_shadow_size
846 == ((uintptr_t)&env->slb_shadow_addr + 8));
847 reg.id = KVM_REG_PPC_VPA_SLB;
848 reg.addr = (uintptr_t)&env->slb_shadow_addr;
849 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
850 if (ret < 0) {
851 DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
852 strerror(errno));
853 return ret;
856 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
857 reg.id = KVM_REG_PPC_VPA_DTL;
858 reg.addr = (uintptr_t)&env->dtl_addr;
859 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
860 if (ret < 0) {
861 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
862 strerror(errno));
863 return ret;
866 return 0;
869 static int kvm_put_vpa(CPUState *cs)
871 PowerPCCPU *cpu = POWERPC_CPU(cs);
872 CPUPPCState *env = &cpu->env;
873 struct kvm_one_reg reg;
874 int ret;
876 /* SLB shadow or DTL can't be registered unless a master VPA is
877 * registered. That means when restoring state, if a VPA *is*
878 * registered, we need to set that up first. If not, we need to
879 * deregister the others before deregistering the master VPA */
880 assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
882 if (env->vpa_addr) {
883 reg.id = KVM_REG_PPC_VPA_ADDR;
884 reg.addr = (uintptr_t)&env->vpa_addr;
885 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
886 if (ret < 0) {
887 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
888 return ret;
892 assert((uintptr_t)&env->slb_shadow_size
893 == ((uintptr_t)&env->slb_shadow_addr + 8));
894 reg.id = KVM_REG_PPC_VPA_SLB;
895 reg.addr = (uintptr_t)&env->slb_shadow_addr;
896 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
897 if (ret < 0) {
898 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
899 return ret;
902 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
903 reg.id = KVM_REG_PPC_VPA_DTL;
904 reg.addr = (uintptr_t)&env->dtl_addr;
905 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
906 if (ret < 0) {
907 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
908 strerror(errno));
909 return ret;
912 if (!env->vpa_addr) {
913 reg.id = KVM_REG_PPC_VPA_ADDR;
914 reg.addr = (uintptr_t)&env->vpa_addr;
915 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
916 if (ret < 0) {
917 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
918 return ret;
922 return 0;
924 #endif /* TARGET_PPC64 */
926 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
928 CPUPPCState *env = &cpu->env;
929 struct kvm_sregs sregs;
930 int i;
932 sregs.pvr = env->spr[SPR_PVR];
934 sregs.u.s.sdr1 = env->spr[SPR_SDR1];
936 /* Sync SLB */
937 #ifdef TARGET_PPC64
938 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
939 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
940 if (env->slb[i].esid & SLB_ESID_V) {
941 sregs.u.s.ppc64.slb[i].slbe |= i;
943 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
945 #endif
947 /* Sync SRs */
948 for (i = 0; i < 16; i++) {
949 sregs.u.s.ppc32.sr[i] = env->sr[i];
952 /* Sync BATs */
953 for (i = 0; i < 8; i++) {
954 /* Beware. We have to swap upper and lower bits here */
955 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
956 | env->DBAT[1][i];
957 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
958 | env->IBAT[1][i];
961 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
964 int kvm_arch_put_registers(CPUState *cs, int level)
966 PowerPCCPU *cpu = POWERPC_CPU(cs);
967 CPUPPCState *env = &cpu->env;
968 struct kvm_regs regs;
969 int ret;
970 int i;
972 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
973 if (ret < 0) {
974 return ret;
977 regs.ctr = env->ctr;
978 regs.lr = env->lr;
979 regs.xer = cpu_read_xer(env);
980 regs.msr = env->msr;
981 regs.pc = env->nip;
983 regs.srr0 = env->spr[SPR_SRR0];
984 regs.srr1 = env->spr[SPR_SRR1];
986 regs.sprg0 = env->spr[SPR_SPRG0];
987 regs.sprg1 = env->spr[SPR_SPRG1];
988 regs.sprg2 = env->spr[SPR_SPRG2];
989 regs.sprg3 = env->spr[SPR_SPRG3];
990 regs.sprg4 = env->spr[SPR_SPRG4];
991 regs.sprg5 = env->spr[SPR_SPRG5];
992 regs.sprg6 = env->spr[SPR_SPRG6];
993 regs.sprg7 = env->spr[SPR_SPRG7];
995 regs.pid = env->spr[SPR_BOOKE_PID];
997 for (i = 0;i < 32; i++)
998 regs.gpr[i] = env->gpr[i];
1000 regs.cr = 0;
1001 for (i = 0; i < 8; i++) {
1002 regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
1005 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
1006 if (ret < 0)
1007 return ret;
1009 kvm_put_fp(cs);
1011 if (env->tlb_dirty) {
1012 kvm_sw_tlb_put(cpu);
1013 env->tlb_dirty = false;
1016 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
1017 ret = kvmppc_put_books_sregs(cpu);
1018 if (ret < 0) {
1019 return ret;
1023 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
1024 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1027 if (cap_one_reg) {
1028 int i;
1030 /* We deliberately ignore errors here, for kernels which have
1031 * the ONE_REG calls, but don't support the specific
1032 * registers, there's a reasonable chance things will still
1033 * work, at least until we try to migrate. */
1034 for (i = 0; i < 1024; i++) {
1035 uint64_t id = env->spr_cb[i].one_reg_id;
1037 if (id != 0) {
1038 kvm_put_one_spr(cs, id, i);
1042 #ifdef TARGET_PPC64
1043 if (msr_ts) {
1044 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1045 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1047 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1048 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1050 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1051 kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1052 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1053 kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1054 kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1055 kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1056 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1057 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1058 kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1059 kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1062 if (cap_papr) {
1063 if (kvm_put_vpa(cs) < 0) {
1064 DPRINTF("Warning: Unable to set VPA information to KVM\n");
1068 kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1069 #endif /* TARGET_PPC64 */
1072 return ret;
1075 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1077 env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1080 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1082 CPUPPCState *env = &cpu->env;
1083 struct kvm_sregs sregs;
1084 int ret;
1086 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1087 if (ret < 0) {
1088 return ret;
1091 if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1092 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1093 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1094 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1095 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1096 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1097 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1098 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1099 env->spr[SPR_DECR] = sregs.u.e.dec;
1100 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1101 env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1102 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1105 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1106 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1107 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1108 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1109 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1110 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1113 if (sregs.u.e.features & KVM_SREGS_E_64) {
1114 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1117 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1118 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1121 if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1122 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1123 kvm_sync_excp(env, POWERPC_EXCP_CRITICAL, SPR_BOOKE_IVOR0);
1124 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1125 kvm_sync_excp(env, POWERPC_EXCP_MCHECK, SPR_BOOKE_IVOR1);
1126 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1127 kvm_sync_excp(env, POWERPC_EXCP_DSI, SPR_BOOKE_IVOR2);
1128 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1129 kvm_sync_excp(env, POWERPC_EXCP_ISI, SPR_BOOKE_IVOR3);
1130 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1131 kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL, SPR_BOOKE_IVOR4);
1132 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1133 kvm_sync_excp(env, POWERPC_EXCP_ALIGN, SPR_BOOKE_IVOR5);
1134 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1135 kvm_sync_excp(env, POWERPC_EXCP_PROGRAM, SPR_BOOKE_IVOR6);
1136 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1137 kvm_sync_excp(env, POWERPC_EXCP_FPU, SPR_BOOKE_IVOR7);
1138 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1139 kvm_sync_excp(env, POWERPC_EXCP_SYSCALL, SPR_BOOKE_IVOR8);
1140 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1141 kvm_sync_excp(env, POWERPC_EXCP_APU, SPR_BOOKE_IVOR9);
1142 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1143 kvm_sync_excp(env, POWERPC_EXCP_DECR, SPR_BOOKE_IVOR10);
1144 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1145 kvm_sync_excp(env, POWERPC_EXCP_FIT, SPR_BOOKE_IVOR11);
1146 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1147 kvm_sync_excp(env, POWERPC_EXCP_WDT, SPR_BOOKE_IVOR12);
1148 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1149 kvm_sync_excp(env, POWERPC_EXCP_DTLB, SPR_BOOKE_IVOR13);
1150 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1151 kvm_sync_excp(env, POWERPC_EXCP_ITLB, SPR_BOOKE_IVOR14);
1152 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1153 kvm_sync_excp(env, POWERPC_EXCP_DEBUG, SPR_BOOKE_IVOR15);
1155 if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1156 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1157 kvm_sync_excp(env, POWERPC_EXCP_SPEU, SPR_BOOKE_IVOR32);
1158 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1159 kvm_sync_excp(env, POWERPC_EXCP_EFPDI, SPR_BOOKE_IVOR33);
1160 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1161 kvm_sync_excp(env, POWERPC_EXCP_EFPRI, SPR_BOOKE_IVOR34);
1164 if (sregs.u.e.features & KVM_SREGS_E_PM) {
1165 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1166 kvm_sync_excp(env, POWERPC_EXCP_EPERFM, SPR_BOOKE_IVOR35);
1169 if (sregs.u.e.features & KVM_SREGS_E_PC) {
1170 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1171 kvm_sync_excp(env, POWERPC_EXCP_DOORI, SPR_BOOKE_IVOR36);
1172 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1173 kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1177 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1178 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1179 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1180 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1181 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1182 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1183 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1184 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1185 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1186 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1187 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1190 if (sregs.u.e.features & KVM_SREGS_EXP) {
1191 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1194 if (sregs.u.e.features & KVM_SREGS_E_PD) {
1195 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1196 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1199 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1200 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1201 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1202 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1204 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1205 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1206 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1210 return 0;
1213 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1215 CPUPPCState *env = &cpu->env;
1216 struct kvm_sregs sregs;
1217 int ret;
1218 int i;
1220 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1221 if (ret < 0) {
1222 return ret;
1225 if (!cpu->vhyp) {
1226 ppc_store_sdr1(env, sregs.u.s.sdr1);
1229 /* Sync SLB */
1230 #ifdef TARGET_PPC64
1232 * The packed SLB array we get from KVM_GET_SREGS only contains
1233 * information about valid entries. So we flush our internal copy
1234 * to get rid of stale ones, then put all valid SLB entries back
1235 * in.
1237 memset(env->slb, 0, sizeof(env->slb));
1238 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1239 target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1240 target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1242 * Only restore valid entries
1244 if (rb & SLB_ESID_V) {
1245 ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1248 #endif
1250 /* Sync SRs */
1251 for (i = 0; i < 16; i++) {
1252 env->sr[i] = sregs.u.s.ppc32.sr[i];
1255 /* Sync BATs */
1256 for (i = 0; i < 8; i++) {
1257 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1258 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1259 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1260 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1263 return 0;
1266 int kvm_arch_get_registers(CPUState *cs)
1268 PowerPCCPU *cpu = POWERPC_CPU(cs);
1269 CPUPPCState *env = &cpu->env;
1270 struct kvm_regs regs;
1271 uint32_t cr;
1272 int i, ret;
1274 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1275 if (ret < 0)
1276 return ret;
1278 cr = regs.cr;
1279 for (i = 7; i >= 0; i--) {
1280 env->crf[i] = cr & 15;
1281 cr >>= 4;
1284 env->ctr = regs.ctr;
1285 env->lr = regs.lr;
1286 cpu_write_xer(env, regs.xer);
1287 env->msr = regs.msr;
1288 env->nip = regs.pc;
1290 env->spr[SPR_SRR0] = regs.srr0;
1291 env->spr[SPR_SRR1] = regs.srr1;
1293 env->spr[SPR_SPRG0] = regs.sprg0;
1294 env->spr[SPR_SPRG1] = regs.sprg1;
1295 env->spr[SPR_SPRG2] = regs.sprg2;
1296 env->spr[SPR_SPRG3] = regs.sprg3;
1297 env->spr[SPR_SPRG4] = regs.sprg4;
1298 env->spr[SPR_SPRG5] = regs.sprg5;
1299 env->spr[SPR_SPRG6] = regs.sprg6;
1300 env->spr[SPR_SPRG7] = regs.sprg7;
1302 env->spr[SPR_BOOKE_PID] = regs.pid;
1304 for (i = 0;i < 32; i++)
1305 env->gpr[i] = regs.gpr[i];
1307 kvm_get_fp(cs);
1309 if (cap_booke_sregs) {
1310 ret = kvmppc_get_booke_sregs(cpu);
1311 if (ret < 0) {
1312 return ret;
1316 if (cap_segstate) {
1317 ret = kvmppc_get_books_sregs(cpu);
1318 if (ret < 0) {
1319 return ret;
1323 if (cap_hior) {
1324 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1327 if (cap_one_reg) {
1328 int i;
1330 /* We deliberately ignore errors here, for kernels which have
1331 * the ONE_REG calls, but don't support the specific
1332 * registers, there's a reasonable chance things will still
1333 * work, at least until we try to migrate. */
1334 for (i = 0; i < 1024; i++) {
1335 uint64_t id = env->spr_cb[i].one_reg_id;
1337 if (id != 0) {
1338 kvm_get_one_spr(cs, id, i);
1342 #ifdef TARGET_PPC64
1343 if (msr_ts) {
1344 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1345 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1347 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1348 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1350 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1351 kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1352 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1353 kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1354 kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1355 kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1356 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1357 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1358 kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1359 kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1362 if (cap_papr) {
1363 if (kvm_get_vpa(cs) < 0) {
1364 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1368 kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1369 #endif
1372 return 0;
1375 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1377 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1379 if (irq != PPC_INTERRUPT_EXT) {
1380 return 0;
1383 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1384 return 0;
1387 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1389 return 0;
1392 #if defined(TARGET_PPCEMB)
1393 #define PPC_INPUT_INT PPC40x_INPUT_INT
1394 #elif defined(TARGET_PPC64)
1395 #define PPC_INPUT_INT PPC970_INPUT_INT
1396 #else
1397 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1398 #endif
1400 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1402 PowerPCCPU *cpu = POWERPC_CPU(cs);
1403 CPUPPCState *env = &cpu->env;
1404 int r;
1405 unsigned irq;
1407 qemu_mutex_lock_iothread();
1409 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1410 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1411 if (!cap_interrupt_level &&
1412 run->ready_for_interrupt_injection &&
1413 (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1414 (env->irq_input_state & (1<<PPC_INPUT_INT)))
1416 /* For now KVM disregards the 'irq' argument. However, in the
1417 * future KVM could cache it in-kernel to avoid a heavyweight exit
1418 * when reading the UIC.
1420 irq = KVM_INTERRUPT_SET;
1422 DPRINTF("injected interrupt %d\n", irq);
1423 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1424 if (r < 0) {
1425 printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1428 /* Always wake up soon in case the interrupt was level based */
1429 timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1430 (NANOSECONDS_PER_SECOND / 50));
1433 /* We don't know if there are more interrupts pending after this. However,
1434 * the guest will return to userspace in the course of handling this one
1435 * anyways, so we will get a chance to deliver the rest. */
1437 qemu_mutex_unlock_iothread();
1440 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1442 return MEMTXATTRS_UNSPECIFIED;
1445 int kvm_arch_process_async_events(CPUState *cs)
1447 return cs->halted;
1450 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1452 CPUState *cs = CPU(cpu);
1453 CPUPPCState *env = &cpu->env;
1455 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1456 cs->halted = 1;
1457 cs->exception_index = EXCP_HLT;
1460 return 0;
1463 /* map dcr access to existing qemu dcr emulation */
1464 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1466 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1467 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1469 return 0;
1472 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1474 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1475 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1477 return 0;
1480 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1482 /* Mixed endian case is not handled */
1483 uint32_t sc = debug_inst_opcode;
1485 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1486 sizeof(sc), 0) ||
1487 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1488 return -EINVAL;
1491 return 0;
1494 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1496 uint32_t sc;
1498 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1499 sc != debug_inst_opcode ||
1500 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1501 sizeof(sc), 1)) {
1502 return -EINVAL;
1505 return 0;
1508 static int find_hw_breakpoint(target_ulong addr, int type)
1510 int n;
1512 assert((nb_hw_breakpoint + nb_hw_watchpoint)
1513 <= ARRAY_SIZE(hw_debug_points));
1515 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1516 if (hw_debug_points[n].addr == addr &&
1517 hw_debug_points[n].type == type) {
1518 return n;
1522 return -1;
1525 static int find_hw_watchpoint(target_ulong addr, int *flag)
1527 int n;
1529 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1530 if (n >= 0) {
1531 *flag = BP_MEM_ACCESS;
1532 return n;
1535 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1536 if (n >= 0) {
1537 *flag = BP_MEM_WRITE;
1538 return n;
1541 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1542 if (n >= 0) {
1543 *flag = BP_MEM_READ;
1544 return n;
1547 return -1;
1550 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1551 target_ulong len, int type)
1553 if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1554 return -ENOBUFS;
1557 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1558 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1560 switch (type) {
1561 case GDB_BREAKPOINT_HW:
1562 if (nb_hw_breakpoint >= max_hw_breakpoint) {
1563 return -ENOBUFS;
1566 if (find_hw_breakpoint(addr, type) >= 0) {
1567 return -EEXIST;
1570 nb_hw_breakpoint++;
1571 break;
1573 case GDB_WATCHPOINT_WRITE:
1574 case GDB_WATCHPOINT_READ:
1575 case GDB_WATCHPOINT_ACCESS:
1576 if (nb_hw_watchpoint >= max_hw_watchpoint) {
1577 return -ENOBUFS;
1580 if (find_hw_breakpoint(addr, type) >= 0) {
1581 return -EEXIST;
1584 nb_hw_watchpoint++;
1585 break;
1587 default:
1588 return -ENOSYS;
1591 return 0;
1594 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1595 target_ulong len, int type)
1597 int n;
1599 n = find_hw_breakpoint(addr, type);
1600 if (n < 0) {
1601 return -ENOENT;
1604 switch (type) {
1605 case GDB_BREAKPOINT_HW:
1606 nb_hw_breakpoint--;
1607 break;
1609 case GDB_WATCHPOINT_WRITE:
1610 case GDB_WATCHPOINT_READ:
1611 case GDB_WATCHPOINT_ACCESS:
1612 nb_hw_watchpoint--;
1613 break;
1615 default:
1616 return -ENOSYS;
1618 hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1620 return 0;
1623 void kvm_arch_remove_all_hw_breakpoints(void)
1625 nb_hw_breakpoint = nb_hw_watchpoint = 0;
1628 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1630 int n;
1632 /* Software Breakpoint updates */
1633 if (kvm_sw_breakpoints_active(cs)) {
1634 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1637 assert((nb_hw_breakpoint + nb_hw_watchpoint)
1638 <= ARRAY_SIZE(hw_debug_points));
1639 assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1641 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1642 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1643 memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1644 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1645 switch (hw_debug_points[n].type) {
1646 case GDB_BREAKPOINT_HW:
1647 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1648 break;
1649 case GDB_WATCHPOINT_WRITE:
1650 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1651 break;
1652 case GDB_WATCHPOINT_READ:
1653 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1654 break;
1655 case GDB_WATCHPOINT_ACCESS:
1656 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1657 KVMPPC_DEBUG_WATCH_READ;
1658 break;
1659 default:
1660 cpu_abort(cs, "Unsupported breakpoint type\n");
1662 dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1667 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1669 CPUState *cs = CPU(cpu);
1670 CPUPPCState *env = &cpu->env;
1671 struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1672 int handle = 0;
1673 int n;
1674 int flag = 0;
1676 if (cs->singlestep_enabled) {
1677 handle = 1;
1678 } else if (arch_info->status) {
1679 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1680 if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1681 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1682 if (n >= 0) {
1683 handle = 1;
1685 } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1686 KVMPPC_DEBUG_WATCH_WRITE)) {
1687 n = find_hw_watchpoint(arch_info->address, &flag);
1688 if (n >= 0) {
1689 handle = 1;
1690 cs->watchpoint_hit = &hw_watchpoint;
1691 hw_watchpoint.vaddr = hw_debug_points[n].addr;
1692 hw_watchpoint.flags = flag;
1696 } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1697 handle = 1;
1698 } else {
1699 /* QEMU is not able to handle debug exception, so inject
1700 * program exception to guest;
1701 * Yes program exception NOT debug exception !!
1702 * When QEMU is using debug resources then debug exception must
1703 * be always set. To achieve this we set MSR_DE and also set
1704 * MSRP_DEP so guest cannot change MSR_DE.
1705 * When emulating debug resource for guest we want guest
1706 * to control MSR_DE (enable/disable debug interrupt on need).
1707 * Supporting both configurations are NOT possible.
1708 * So the result is that we cannot share debug resources
1709 * between QEMU and Guest on BOOKE architecture.
1710 * In the current design QEMU gets the priority over guest,
1711 * this means that if QEMU is using debug resources then guest
1712 * cannot use them;
1713 * For software breakpoint QEMU uses a privileged instruction;
1714 * So there cannot be any reason that we are here for guest
1715 * set debug exception, only possibility is guest executed a
1716 * privileged / illegal instruction and that's why we are
1717 * injecting a program interrupt.
1720 cpu_synchronize_state(cs);
1721 /* env->nip is PC, so increment this by 4 to use
1722 * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1724 env->nip += 4;
1725 cs->exception_index = POWERPC_EXCP_PROGRAM;
1726 env->error_code = POWERPC_EXCP_INVAL;
1727 ppc_cpu_do_interrupt(cs);
1730 return handle;
1733 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1735 PowerPCCPU *cpu = POWERPC_CPU(cs);
1736 CPUPPCState *env = &cpu->env;
1737 int ret;
1739 qemu_mutex_lock_iothread();
1741 switch (run->exit_reason) {
1742 case KVM_EXIT_DCR:
1743 if (run->dcr.is_write) {
1744 DPRINTF("handle dcr write\n");
1745 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1746 } else {
1747 DPRINTF("handle dcr read\n");
1748 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1750 break;
1751 case KVM_EXIT_HLT:
1752 DPRINTF("handle halt\n");
1753 ret = kvmppc_handle_halt(cpu);
1754 break;
1755 #if defined(TARGET_PPC64)
1756 case KVM_EXIT_PAPR_HCALL:
1757 DPRINTF("handle PAPR hypercall\n");
1758 run->papr_hcall.ret = spapr_hypercall(cpu,
1759 run->papr_hcall.nr,
1760 run->papr_hcall.args);
1761 ret = 0;
1762 break;
1763 #endif
1764 case KVM_EXIT_EPR:
1765 DPRINTF("handle epr\n");
1766 run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1767 ret = 0;
1768 break;
1769 case KVM_EXIT_WATCHDOG:
1770 DPRINTF("handle watchdog expiry\n");
1771 watchdog_perform_action();
1772 ret = 0;
1773 break;
1775 case KVM_EXIT_DEBUG:
1776 DPRINTF("handle debug exception\n");
1777 if (kvm_handle_debug(cpu, run)) {
1778 ret = EXCP_DEBUG;
1779 break;
1781 /* re-enter, this exception was guest-internal */
1782 ret = 0;
1783 break;
1785 default:
1786 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1787 ret = -1;
1788 break;
1791 qemu_mutex_unlock_iothread();
1792 return ret;
1795 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1797 CPUState *cs = CPU(cpu);
1798 uint32_t bits = tsr_bits;
1799 struct kvm_one_reg reg = {
1800 .id = KVM_REG_PPC_OR_TSR,
1801 .addr = (uintptr_t) &bits,
1804 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1807 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1810 CPUState *cs = CPU(cpu);
1811 uint32_t bits = tsr_bits;
1812 struct kvm_one_reg reg = {
1813 .id = KVM_REG_PPC_CLEAR_TSR,
1814 .addr = (uintptr_t) &bits,
1817 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1820 int kvmppc_set_tcr(PowerPCCPU *cpu)
1822 CPUState *cs = CPU(cpu);
1823 CPUPPCState *env = &cpu->env;
1824 uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1826 struct kvm_one_reg reg = {
1827 .id = KVM_REG_PPC_TCR,
1828 .addr = (uintptr_t) &tcr,
1831 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1834 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1836 CPUState *cs = CPU(cpu);
1837 int ret;
1839 if (!kvm_enabled()) {
1840 return -1;
1843 if (!cap_ppc_watchdog) {
1844 printf("warning: KVM does not support watchdog");
1845 return -1;
1848 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1849 if (ret < 0) {
1850 fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1851 __func__, strerror(-ret));
1852 return ret;
1855 return ret;
1858 static int read_cpuinfo(const char *field, char *value, int len)
1860 FILE *f;
1861 int ret = -1;
1862 int field_len = strlen(field);
1863 char line[512];
1865 f = fopen("/proc/cpuinfo", "r");
1866 if (!f) {
1867 return -1;
1870 do {
1871 if (!fgets(line, sizeof(line), f)) {
1872 break;
1874 if (!strncmp(line, field, field_len)) {
1875 pstrcpy(value, len, line);
1876 ret = 0;
1877 break;
1879 } while(*line);
1881 fclose(f);
1883 return ret;
1886 uint32_t kvmppc_get_tbfreq(void)
1888 char line[512];
1889 char *ns;
1890 uint32_t retval = NANOSECONDS_PER_SECOND;
1892 if (read_cpuinfo("timebase", line, sizeof(line))) {
1893 return retval;
1896 if (!(ns = strchr(line, ':'))) {
1897 return retval;
1900 ns++;
1902 return atoi(ns);
1905 bool kvmppc_get_host_serial(char **value)
1907 return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1908 NULL);
1911 bool kvmppc_get_host_model(char **value)
1913 return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1916 /* Try to find a device tree node for a CPU with clock-frequency property */
1917 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1919 struct dirent *dirp;
1920 DIR *dp;
1922 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1923 printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1924 return -1;
1927 buf[0] = '\0';
1928 while ((dirp = readdir(dp)) != NULL) {
1929 FILE *f;
1930 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1931 dirp->d_name);
1932 f = fopen(buf, "r");
1933 if (f) {
1934 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1935 fclose(f);
1936 break;
1938 buf[0] = '\0';
1940 closedir(dp);
1941 if (buf[0] == '\0') {
1942 printf("Unknown host!\n");
1943 return -1;
1946 return 0;
1949 static uint64_t kvmppc_read_int_dt(const char *filename)
1951 union {
1952 uint32_t v32;
1953 uint64_t v64;
1954 } u;
1955 FILE *f;
1956 int len;
1958 f = fopen(filename, "rb");
1959 if (!f) {
1960 return -1;
1963 len = fread(&u, 1, sizeof(u), f);
1964 fclose(f);
1965 switch (len) {
1966 case 4:
1967 /* property is a 32-bit quantity */
1968 return be32_to_cpu(u.v32);
1969 case 8:
1970 return be64_to_cpu(u.v64);
1973 return 0;
1976 /* Read a CPU node property from the host device tree that's a single
1977 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1978 * (can't find or open the property, or doesn't understand the
1979 * format) */
1980 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1982 char buf[PATH_MAX], *tmp;
1983 uint64_t val;
1985 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1986 return -1;
1989 tmp = g_strdup_printf("%s/%s", buf, propname);
1990 val = kvmppc_read_int_dt(tmp);
1991 g_free(tmp);
1993 return val;
1996 uint64_t kvmppc_get_clockfreq(void)
1998 return kvmppc_read_int_cpu_dt("clock-frequency");
2001 uint32_t kvmppc_get_vmx(void)
2003 return kvmppc_read_int_cpu_dt("ibm,vmx");
2006 uint32_t kvmppc_get_dfp(void)
2008 return kvmppc_read_int_cpu_dt("ibm,dfp");
2011 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
2013 PowerPCCPU *cpu = ppc_env_get_cpu(env);
2014 CPUState *cs = CPU(cpu);
2016 if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
2017 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
2018 return 0;
2021 return 1;
2024 int kvmppc_get_hasidle(CPUPPCState *env)
2026 struct kvm_ppc_pvinfo pvinfo;
2028 if (!kvmppc_get_pvinfo(env, &pvinfo) &&
2029 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
2030 return 1;
2033 return 0;
2036 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
2038 uint32_t *hc = (uint32_t*)buf;
2039 struct kvm_ppc_pvinfo pvinfo;
2041 if (!kvmppc_get_pvinfo(env, &pvinfo)) {
2042 memcpy(buf, pvinfo.hcall, buf_len);
2043 return 0;
2047 * Fallback to always fail hypercalls regardless of endianness:
2049 * tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
2050 * li r3, -1
2051 * b .+8 (becomes nop in wrong endian)
2052 * bswap32(li r3, -1)
2055 hc[0] = cpu_to_be32(0x08000048);
2056 hc[1] = cpu_to_be32(0x3860ffff);
2057 hc[2] = cpu_to_be32(0x48000008);
2058 hc[3] = cpu_to_be32(bswap32(0x3860ffff));
2060 return 1;
2063 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
2065 return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2068 void kvmppc_enable_logical_ci_hcalls(void)
2071 * FIXME: it would be nice if we could detect the cases where
2072 * we're using a device which requires the in kernel
2073 * implementation of these hcalls, but the kernel lacks them and
2074 * produce a warning.
2076 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2077 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2080 void kvmppc_enable_set_mode_hcall(void)
2082 kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2085 void kvmppc_enable_clear_ref_mod_hcalls(void)
2087 kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
2088 kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
2091 void kvmppc_set_papr(PowerPCCPU *cpu)
2093 CPUState *cs = CPU(cpu);
2094 int ret;
2096 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2097 if (ret) {
2098 error_report("This vCPU type or KVM version does not support PAPR");
2099 exit(1);
2102 /* Update the capability flag so we sync the right information
2103 * with kvm */
2104 cap_papr = 1;
2107 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr)
2109 return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr);
2112 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2114 CPUState *cs = CPU(cpu);
2115 int ret;
2117 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2118 if (ret && mpic_proxy) {
2119 error_report("This KVM version does not support EPR");
2120 exit(1);
2124 int kvmppc_smt_threads(void)
2126 return cap_ppc_smt ? cap_ppc_smt : 1;
2129 #ifdef TARGET_PPC64
2130 off_t kvmppc_alloc_rma(void **rma)
2132 off_t size;
2133 int fd;
2134 struct kvm_allocate_rma ret;
2136 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2137 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2138 * not necessary on this hardware
2139 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2141 * FIXME: We should allow the user to force contiguous RMA
2142 * allocation in the cap_ppc_rma==1 case.
2144 if (cap_ppc_rma < 2) {
2145 return 0;
2148 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
2149 if (fd < 0) {
2150 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2151 strerror(errno));
2152 return -1;
2155 size = MIN(ret.rma_size, 256ul << 20);
2157 *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2158 if (*rma == MAP_FAILED) {
2159 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
2160 return -1;
2163 return size;
2166 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2168 struct kvm_ppc_smmu_info info;
2169 long rampagesize, best_page_shift;
2170 int i;
2172 if (cap_ppc_rma >= 2) {
2173 return current_size;
2176 /* Find the largest hardware supported page size that's less than
2177 * or equal to the (logical) backing page size of guest RAM */
2178 kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2179 rampagesize = qemu_getrampagesize();
2180 best_page_shift = 0;
2182 for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2183 struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2185 if (!sps->page_shift) {
2186 continue;
2189 if ((sps->page_shift > best_page_shift)
2190 && ((1UL << sps->page_shift) <= rampagesize)) {
2191 best_page_shift = sps->page_shift;
2195 return MIN(current_size,
2196 1ULL << (best_page_shift + hash_shift - 7));
2198 #endif
2200 bool kvmppc_spapr_use_multitce(void)
2202 return cap_spapr_multitce;
2205 int kvmppc_spapr_enable_inkernel_multitce(void)
2207 int ret;
2209 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2210 H_PUT_TCE_INDIRECT, 1);
2211 if (!ret) {
2212 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2213 H_STUFF_TCE, 1);
2216 return ret;
2219 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
2220 uint64_t bus_offset, uint32_t nb_table,
2221 int *pfd, bool need_vfio)
2223 long len;
2224 int fd;
2225 void *table;
2227 /* Must set fd to -1 so we don't try to munmap when called for
2228 * destroying the table, which the upper layers -will- do
2230 *pfd = -1;
2231 if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2232 return NULL;
2235 if (cap_spapr_tce_64) {
2236 struct kvm_create_spapr_tce_64 args = {
2237 .liobn = liobn,
2238 .page_shift = page_shift,
2239 .offset = bus_offset >> page_shift,
2240 .size = nb_table,
2241 .flags = 0
2243 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args);
2244 if (fd < 0) {
2245 fprintf(stderr,
2246 "KVM: Failed to create TCE64 table for liobn 0x%x\n",
2247 liobn);
2248 return NULL;
2250 } else if (cap_spapr_tce) {
2251 uint64_t window_size = (uint64_t) nb_table << page_shift;
2252 struct kvm_create_spapr_tce args = {
2253 .liobn = liobn,
2254 .window_size = window_size,
2256 if ((window_size != args.window_size) || bus_offset) {
2257 return NULL;
2259 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2260 if (fd < 0) {
2261 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2262 liobn);
2263 return NULL;
2265 } else {
2266 return NULL;
2269 len = nb_table * sizeof(uint64_t);
2270 /* FIXME: round this up to page size */
2272 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2273 if (table == MAP_FAILED) {
2274 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2275 liobn);
2276 close(fd);
2277 return NULL;
2280 *pfd = fd;
2281 return table;
2284 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2286 long len;
2288 if (fd < 0) {
2289 return -1;
2292 len = nb_table * sizeof(uint64_t);
2293 if ((munmap(table, len) < 0) ||
2294 (close(fd) < 0)) {
2295 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2296 strerror(errno));
2297 /* Leak the table */
2300 return 0;
2303 int kvmppc_reset_htab(int shift_hint)
2305 uint32_t shift = shift_hint;
2307 if (!kvm_enabled()) {
2308 /* Full emulation, tell caller to allocate htab itself */
2309 return 0;
2311 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2312 int ret;
2313 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2314 if (ret == -ENOTTY) {
2315 /* At least some versions of PR KVM advertise the
2316 * capability, but don't implement the ioctl(). Oops.
2317 * Return 0 so that we allocate the htab in qemu, as is
2318 * correct for PR. */
2319 return 0;
2320 } else if (ret < 0) {
2321 return ret;
2323 return shift;
2326 /* We have a kernel that predates the htab reset calls. For PR
2327 * KVM, we need to allocate the htab ourselves, for an HV KVM of
2328 * this era, it has allocated a 16MB fixed size hash table already. */
2329 if (kvmppc_is_pr(kvm_state)) {
2330 /* PR - tell caller to allocate htab */
2331 return 0;
2332 } else {
2333 /* HV - assume 16MB kernel allocated htab */
2334 return 24;
2338 static inline uint32_t mfpvr(void)
2340 uint32_t pvr;
2342 asm ("mfpvr %0"
2343 : "=r"(pvr));
2344 return pvr;
2347 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2349 if (on) {
2350 *word |= flags;
2351 } else {
2352 *word &= ~flags;
2356 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2358 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2359 uint32_t vmx = kvmppc_get_vmx();
2360 uint32_t dfp = kvmppc_get_dfp();
2361 uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2362 uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2364 /* Now fix up the class with information we can query from the host */
2365 pcc->pvr = mfpvr();
2367 if (vmx != -1) {
2368 /* Only override when we know what the host supports */
2369 alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
2370 alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
2372 if (dfp != -1) {
2373 /* Only override when we know what the host supports */
2374 alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
2377 if (dcache_size != -1) {
2378 pcc->l1_dcache_size = dcache_size;
2381 if (icache_size != -1) {
2382 pcc->l1_icache_size = icache_size;
2385 #if defined(TARGET_PPC64)
2386 pcc->radix_page_info = kvm_get_radix_page_info();
2388 if ((pcc->pvr & 0xffffff00) == CPU_POWERPC_POWER9_DD1) {
2390 * POWER9 DD1 has some bugs which make it not really ISA 3.00
2391 * compliant. More importantly, advertising ISA 3.00
2392 * architected mode may prevent guests from activating
2393 * necessary DD1 workarounds.
2395 pcc->pcr_supported &= ~(PCR_COMPAT_3_00 | PCR_COMPAT_2_07
2396 | PCR_COMPAT_2_06 | PCR_COMPAT_2_05);
2398 #endif /* defined(TARGET_PPC64) */
2401 bool kvmppc_has_cap_epr(void)
2403 return cap_epr;
2406 bool kvmppc_has_cap_htab_fd(void)
2408 return cap_htab_fd;
2411 bool kvmppc_has_cap_fixup_hcalls(void)
2413 return cap_fixup_hcalls;
2416 bool kvmppc_has_cap_htm(void)
2418 return cap_htm;
2421 bool kvmppc_has_cap_mmu_radix(void)
2423 return cap_mmu_radix;
2426 bool kvmppc_has_cap_mmu_hash_v3(void)
2428 return cap_mmu_hash_v3;
2431 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2433 uint32_t host_pvr = mfpvr();
2434 PowerPCCPUClass *pvr_pcc;
2436 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2437 if (pvr_pcc == NULL) {
2438 pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2441 return pvr_pcc;
2444 static int kvm_ppc_register_host_cpu_type(void)
2446 TypeInfo type_info = {
2447 .name = TYPE_HOST_POWERPC_CPU,
2448 .class_init = kvmppc_host_cpu_class_init,
2450 PowerPCCPUClass *pvr_pcc;
2451 ObjectClass *oc;
2452 DeviceClass *dc;
2453 int i;
2455 pvr_pcc = kvm_ppc_get_host_cpu_class();
2456 if (pvr_pcc == NULL) {
2457 return -1;
2459 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2460 type_register(&type_info);
2462 oc = object_class_by_name(type_info.name);
2463 g_assert(oc);
2465 #if defined(TARGET_PPC64)
2466 type_info.name = g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE, "host");
2467 type_info.parent = TYPE_SPAPR_CPU_CORE,
2468 type_info.instance_size = sizeof(sPAPRCPUCore);
2469 type_info.instance_init = NULL;
2470 type_info.class_init = spapr_cpu_core_class_init;
2471 type_info.class_data = (void *) "host";
2472 type_register(&type_info);
2473 g_free((void *)type_info.name);
2474 #endif
2477 * Update generic CPU family class alias (e.g. on a POWER8NVL host,
2478 * we want "POWER8" to be a "family" alias that points to the current
2479 * host CPU type, too)
2481 dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc));
2482 for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) {
2483 if (strcmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) {
2484 char *suffix;
2486 ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc));
2487 suffix = strstr(ppc_cpu_aliases[i].model, "-"TYPE_POWERPC_CPU);
2488 if (suffix) {
2489 *suffix = 0;
2491 ppc_cpu_aliases[i].oc = oc;
2492 break;
2496 return 0;
2499 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2501 struct kvm_rtas_token_args args = {
2502 .token = token,
2505 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2506 return -ENOENT;
2509 strncpy(args.name, function, sizeof(args.name));
2511 return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2514 int kvmppc_get_htab_fd(bool write)
2516 struct kvm_get_htab_fd s = {
2517 .flags = write ? KVM_GET_HTAB_WRITE : 0,
2518 .start_index = 0,
2521 if (!cap_htab_fd) {
2522 fprintf(stderr, "KVM version doesn't support saving the hash table\n");
2523 return -1;
2526 return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2529 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2531 int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2532 uint8_t buf[bufsize];
2533 ssize_t rc;
2535 do {
2536 rc = read(fd, buf, bufsize);
2537 if (rc < 0) {
2538 fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2539 strerror(errno));
2540 return rc;
2541 } else if (rc) {
2542 uint8_t *buffer = buf;
2543 ssize_t n = rc;
2544 while (n) {
2545 struct kvm_get_htab_header *head =
2546 (struct kvm_get_htab_header *) buffer;
2547 size_t chunksize = sizeof(*head) +
2548 HASH_PTE_SIZE_64 * head->n_valid;
2550 qemu_put_be32(f, head->index);
2551 qemu_put_be16(f, head->n_valid);
2552 qemu_put_be16(f, head->n_invalid);
2553 qemu_put_buffer(f, (void *)(head + 1),
2554 HASH_PTE_SIZE_64 * head->n_valid);
2556 buffer += chunksize;
2557 n -= chunksize;
2560 } while ((rc != 0)
2561 && ((max_ns < 0)
2562 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2564 return (rc == 0) ? 1 : 0;
2567 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2568 uint16_t n_valid, uint16_t n_invalid)
2570 struct kvm_get_htab_header *buf;
2571 size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2572 ssize_t rc;
2574 buf = alloca(chunksize);
2575 buf->index = index;
2576 buf->n_valid = n_valid;
2577 buf->n_invalid = n_invalid;
2579 qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2581 rc = write(fd, buf, chunksize);
2582 if (rc < 0) {
2583 fprintf(stderr, "Error writing KVM hash table: %s\n",
2584 strerror(errno));
2585 return rc;
2587 if (rc != chunksize) {
2588 /* We should never get a short write on a single chunk */
2589 fprintf(stderr, "Short write, restoring KVM hash table\n");
2590 return -1;
2592 return 0;
2595 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2597 return true;
2600 void kvm_arch_init_irq_routing(KVMState *s)
2604 void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n)
2606 struct kvm_get_htab_fd ghf = {
2607 .flags = 0,
2608 .start_index = ptex,
2610 int fd, rc;
2611 int i;
2613 fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2614 if (fd < 0) {
2615 hw_error("kvmppc_read_hptes: Unable to open HPT fd");
2618 i = 0;
2619 while (i < n) {
2620 struct kvm_get_htab_header *hdr;
2621 int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP;
2622 char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64];
2624 rc = read(fd, buf, sizeof(buf));
2625 if (rc < 0) {
2626 hw_error("kvmppc_read_hptes: Unable to read HPTEs");
2629 hdr = (struct kvm_get_htab_header *)buf;
2630 while ((i < n) && ((char *)hdr < (buf + rc))) {
2631 int invalid = hdr->n_invalid;
2633 if (hdr->index != (ptex + i)) {
2634 hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32
2635 " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i);
2638 memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * hdr->n_valid);
2639 i += hdr->n_valid;
2641 if ((n - i) < invalid) {
2642 invalid = n - i;
2644 memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64);
2645 i += hdr->n_invalid;
2647 hdr = (struct kvm_get_htab_header *)
2648 ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid);
2652 close(fd);
2655 void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1)
2657 int fd, rc;
2658 struct kvm_get_htab_fd ghf;
2659 struct {
2660 struct kvm_get_htab_header hdr;
2661 uint64_t pte0;
2662 uint64_t pte1;
2663 } buf;
2665 ghf.flags = 0;
2666 ghf.start_index = 0; /* Ignored */
2667 fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2668 if (fd < 0) {
2669 hw_error("kvmppc_write_hpte: Unable to open HPT fd");
2672 buf.hdr.n_valid = 1;
2673 buf.hdr.n_invalid = 0;
2674 buf.hdr.index = ptex;
2675 buf.pte0 = cpu_to_be64(pte0);
2676 buf.pte1 = cpu_to_be64(pte1);
2678 rc = write(fd, &buf, sizeof(buf));
2679 if (rc != sizeof(buf)) {
2680 hw_error("kvmppc_write_hpte: Unable to update KVM HPT");
2682 close(fd);
2685 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2686 uint64_t address, uint32_t data, PCIDevice *dev)
2688 return 0;
2691 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2692 int vector, PCIDevice *dev)
2694 return 0;
2697 int kvm_arch_release_virq_post(int virq)
2699 return 0;
2702 int kvm_arch_msi_data_to_gsi(uint32_t data)
2704 return data & 0xffff;
2707 int kvmppc_enable_hwrng(void)
2709 if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2710 return -1;
2713 return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2716 void kvmppc_check_papr_resize_hpt(Error **errp)
2718 if (!kvm_enabled()) {
2719 return; /* No KVM, we're good */
2722 if (cap_resize_hpt) {
2723 return; /* Kernel has explicit support, we're good */
2726 /* Otherwise fallback on looking for PR KVM */
2727 if (kvmppc_is_pr(kvm_state)) {
2728 return;
2731 error_setg(errp,
2732 "Hash page table resizing not available with this KVM version");
2735 int kvmppc_resize_hpt_prepare(PowerPCCPU *cpu, target_ulong flags, int shift)
2737 CPUState *cs = CPU(cpu);
2738 struct kvm_ppc_resize_hpt rhpt = {
2739 .flags = flags,
2740 .shift = shift,
2743 if (!cap_resize_hpt) {
2744 return -ENOSYS;
2747 return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_PREPARE, &rhpt);
2750 int kvmppc_resize_hpt_commit(PowerPCCPU *cpu, target_ulong flags, int shift)
2752 CPUState *cs = CPU(cpu);
2753 struct kvm_ppc_resize_hpt rhpt = {
2754 .flags = flags,
2755 .shift = shift,
2758 if (!cap_resize_hpt) {
2759 return -ENOSYS;
2762 return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_COMMIT, &rhpt);
2765 static void kvmppc_pivot_hpt_cpu(CPUState *cs, run_on_cpu_data arg)
2767 target_ulong sdr1 = arg.target_ptr;
2768 PowerPCCPU *cpu = POWERPC_CPU(cs);
2769 CPUPPCState *env = &cpu->env;
2771 /* This is just for the benefit of PR KVM */
2772 cpu_synchronize_state(cs);
2773 env->spr[SPR_SDR1] = sdr1;
2774 if (kvmppc_put_books_sregs(cpu) < 0) {
2775 error_report("Unable to update SDR1 in KVM");
2776 exit(1);
2780 void kvmppc_update_sdr1(target_ulong sdr1)
2782 CPUState *cs;
2784 CPU_FOREACH(cs) {
2785 run_on_cpu(cs, kvmppc_pivot_hpt_cpu, RUN_ON_CPU_TARGET_PTR(sdr1));