target/arm: Implement AArch32 HCR and HCR2
[qemu.git] / target / ppc / kvm.c
blob9211ee2ee1a00a257cace94a2e63e0abd19a53fb
1 /*
2 * PowerPC implementation of KVM hooks
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
7 * Authors:
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
17 #include "qemu/osdep.h"
18 #include <dirent.h>
19 #include <sys/ioctl.h>
20 #include <sys/vfs.h>
22 #include <linux/kvm.h>
24 #include "qemu-common.h"
25 #include "qapi/error.h"
26 #include "qemu/error-report.h"
27 #include "cpu.h"
28 #include "cpu-models.h"
29 #include "qemu/timer.h"
30 #include "sysemu/sysemu.h"
31 #include "sysemu/hw_accel.h"
32 #include "kvm_ppc.h"
33 #include "sysemu/cpus.h"
34 #include "sysemu/device_tree.h"
35 #include "mmu-hash64.h"
37 #include "hw/sysbus.h"
38 #include "hw/ppc/spapr.h"
39 #include "hw/ppc/spapr_vio.h"
40 #include "hw/ppc/spapr_cpu_core.h"
41 #include "hw/ppc/ppc.h"
42 #include "sysemu/watchdog.h"
43 #include "trace.h"
44 #include "exec/gdbstub.h"
45 #include "exec/memattrs.h"
46 #include "exec/ram_addr.h"
47 #include "sysemu/hostmem.h"
48 #include "qemu/cutils.h"
49 #include "qemu/mmap-alloc.h"
50 #include "elf.h"
51 #include "sysemu/kvm_int.h"
53 //#define DEBUG_KVM
55 #ifdef DEBUG_KVM
56 #define DPRINTF(fmt, ...) \
57 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
58 #else
59 #define DPRINTF(fmt, ...) \
60 do { } while (0)
61 #endif
63 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
65 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
66 KVM_CAP_LAST_INFO
69 static int cap_interrupt_unset = false;
70 static int cap_interrupt_level = false;
71 static int cap_segstate;
72 static int cap_booke_sregs;
73 static int cap_ppc_smt;
74 static int cap_ppc_smt_possible;
75 static int cap_spapr_tce;
76 static int cap_spapr_tce_64;
77 static int cap_spapr_multitce;
78 static int cap_spapr_vfio;
79 static int cap_hior;
80 static int cap_one_reg;
81 static int cap_epr;
82 static int cap_ppc_watchdog;
83 static int cap_papr;
84 static int cap_htab_fd;
85 static int cap_fixup_hcalls;
86 static int cap_htm; /* Hardware transactional memory support */
87 static int cap_mmu_radix;
88 static int cap_mmu_hash_v3;
89 static int cap_resize_hpt;
90 static int cap_ppc_pvr_compat;
91 static int cap_ppc_safe_cache;
92 static int cap_ppc_safe_bounds_check;
93 static int cap_ppc_safe_indirect_branch;
95 static uint32_t debug_inst_opcode;
97 /* XXX We have a race condition where we actually have a level triggered
98 * interrupt, but the infrastructure can't expose that yet, so the guest
99 * takes but ignores it, goes to sleep and never gets notified that there's
100 * still an interrupt pending.
102 * As a quick workaround, let's just wake up again 20 ms after we injected
103 * an interrupt. That way we can assure that we're always reinjecting
104 * interrupts in case the guest swallowed them.
106 static QEMUTimer *idle_timer;
108 static void kvm_kick_cpu(void *opaque)
110 PowerPCCPU *cpu = opaque;
112 qemu_cpu_kick(CPU(cpu));
115 /* Check whether we are running with KVM-PR (instead of KVM-HV). This
116 * should only be used for fallback tests - generally we should use
117 * explicit capabilities for the features we want, rather than
118 * assuming what is/isn't available depending on the KVM variant. */
119 static bool kvmppc_is_pr(KVMState *ks)
121 /* Assume KVM-PR if the GET_PVINFO capability is available */
122 return kvm_vm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0;
125 static int kvm_ppc_register_host_cpu_type(MachineState *ms);
126 static void kvmppc_get_cpu_characteristics(KVMState *s);
128 int kvm_arch_init(MachineState *ms, KVMState *s)
130 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
131 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
132 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
133 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
134 cap_ppc_smt_possible = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT_POSSIBLE);
135 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
136 cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64);
137 cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
138 cap_spapr_vfio = kvm_vm_check_extension(s, KVM_CAP_SPAPR_TCE_VFIO);
139 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
140 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
141 cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
142 cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
143 /* Note: we don't set cap_papr here, because this capability is
144 * only activated after this by kvmppc_set_papr() */
145 cap_htab_fd = kvm_vm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
146 cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
147 cap_ppc_smt = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT);
148 cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
149 cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX);
150 cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3);
151 cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT);
152 kvmppc_get_cpu_characteristics(s);
154 * Note: setting it to false because there is not such capability
155 * in KVM at this moment.
157 * TODO: call kvm_vm_check_extension() with the right capability
158 * after the kernel starts implementing it.*/
159 cap_ppc_pvr_compat = false;
161 if (!cap_interrupt_level) {
162 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
163 "VM to stall at times!\n");
166 kvm_ppc_register_host_cpu_type(ms);
168 return 0;
171 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
173 return 0;
176 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
178 CPUPPCState *cenv = &cpu->env;
179 CPUState *cs = CPU(cpu);
180 struct kvm_sregs sregs;
181 int ret;
183 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
184 /* What we're really trying to say is "if we're on BookE, we use
185 the native PVR for now". This is the only sane way to check
186 it though, so we potentially confuse users that they can run
187 BookE guests on BookS. Let's hope nobody dares enough :) */
188 return 0;
189 } else {
190 if (!cap_segstate) {
191 fprintf(stderr, "kvm error: missing PVR setting capability\n");
192 return -ENOSYS;
196 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
197 if (ret) {
198 return ret;
201 sregs.pvr = cenv->spr[SPR_PVR];
202 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
205 /* Set up a shared TLB array with KVM */
206 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
208 CPUPPCState *env = &cpu->env;
209 CPUState *cs = CPU(cpu);
210 struct kvm_book3e_206_tlb_params params = {};
211 struct kvm_config_tlb cfg = {};
212 unsigned int entries = 0;
213 int ret, i;
215 if (!kvm_enabled() ||
216 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
217 return 0;
220 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
222 for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
223 params.tlb_sizes[i] = booke206_tlb_size(env, i);
224 params.tlb_ways[i] = booke206_tlb_ways(env, i);
225 entries += params.tlb_sizes[i];
228 assert(entries == env->nb_tlb);
229 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
231 env->tlb_dirty = true;
233 cfg.array = (uintptr_t)env->tlb.tlbm;
234 cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
235 cfg.params = (uintptr_t)&params;
236 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
238 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
239 if (ret < 0) {
240 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
241 __func__, strerror(-ret));
242 return ret;
245 env->kvm_sw_tlb = true;
246 return 0;
250 #if defined(TARGET_PPC64)
251 static void kvm_get_smmu_info(struct kvm_ppc_smmu_info *info, Error **errp)
253 int ret;
255 assert(kvm_state != NULL);
257 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
258 error_setg(errp, "KVM doesn't expose the MMU features it supports");
259 error_append_hint(errp, "Consider switching to a newer KVM\n");
260 return;
263 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_SMMU_INFO, info);
264 if (ret == 0) {
265 return;
268 error_setg_errno(errp, -ret,
269 "KVM failed to provide the MMU features it supports");
272 struct ppc_radix_page_info *kvm_get_radix_page_info(void)
274 KVMState *s = KVM_STATE(current_machine->accelerator);
275 struct ppc_radix_page_info *radix_page_info;
276 struct kvm_ppc_rmmu_info rmmu_info;
277 int i;
279 if (!kvm_check_extension(s, KVM_CAP_PPC_MMU_RADIX)) {
280 return NULL;
282 if (kvm_vm_ioctl(s, KVM_PPC_GET_RMMU_INFO, &rmmu_info)) {
283 return NULL;
285 radix_page_info = g_malloc0(sizeof(*radix_page_info));
286 radix_page_info->count = 0;
287 for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) {
288 if (rmmu_info.ap_encodings[i]) {
289 radix_page_info->entries[i] = rmmu_info.ap_encodings[i];
290 radix_page_info->count++;
293 return radix_page_info;
296 target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu,
297 bool radix, bool gtse,
298 uint64_t proc_tbl)
300 CPUState *cs = CPU(cpu);
301 int ret;
302 uint64_t flags = 0;
303 struct kvm_ppc_mmuv3_cfg cfg = {
304 .process_table = proc_tbl,
307 if (radix) {
308 flags |= KVM_PPC_MMUV3_RADIX;
310 if (gtse) {
311 flags |= KVM_PPC_MMUV3_GTSE;
313 cfg.flags = flags;
314 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_CONFIGURE_V3_MMU, &cfg);
315 switch (ret) {
316 case 0:
317 return H_SUCCESS;
318 case -EINVAL:
319 return H_PARAMETER;
320 case -ENODEV:
321 return H_NOT_AVAILABLE;
322 default:
323 return H_HARDWARE;
327 bool kvmppc_hpt_needs_host_contiguous_pages(void)
329 static struct kvm_ppc_smmu_info smmu_info;
331 if (!kvm_enabled()) {
332 return false;
335 kvm_get_smmu_info(&smmu_info, &error_fatal);
336 return !!(smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL);
339 void kvm_check_mmu(PowerPCCPU *cpu, Error **errp)
341 struct kvm_ppc_smmu_info smmu_info;
342 int iq, ik, jq, jk;
343 Error *local_err = NULL;
345 /* For now, we only have anything to check on hash64 MMUs */
346 if (!cpu->hash64_opts || !kvm_enabled()) {
347 return;
350 kvm_get_smmu_info(&smmu_info, &local_err);
351 if (local_err) {
352 error_propagate(errp, local_err);
353 return;
356 if (ppc_hash64_has(cpu, PPC_HASH64_1TSEG)
357 && !(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
358 error_setg(errp,
359 "KVM does not support 1TiB segments which guest expects");
360 return;
363 if (smmu_info.slb_size < cpu->hash64_opts->slb_size) {
364 error_setg(errp, "KVM only supports %u SLB entries, but guest needs %u",
365 smmu_info.slb_size, cpu->hash64_opts->slb_size);
366 return;
370 * Verify that every pagesize supported by the cpu model is
371 * supported by KVM with the same encodings
373 for (iq = 0; iq < ARRAY_SIZE(cpu->hash64_opts->sps); iq++) {
374 PPCHash64SegmentPageSizes *qsps = &cpu->hash64_opts->sps[iq];
375 struct kvm_ppc_one_seg_page_size *ksps;
377 for (ik = 0; ik < ARRAY_SIZE(smmu_info.sps); ik++) {
378 if (qsps->page_shift == smmu_info.sps[ik].page_shift) {
379 break;
382 if (ik >= ARRAY_SIZE(smmu_info.sps)) {
383 error_setg(errp, "KVM doesn't support for base page shift %u",
384 qsps->page_shift);
385 return;
388 ksps = &smmu_info.sps[ik];
389 if (ksps->slb_enc != qsps->slb_enc) {
390 error_setg(errp,
391 "KVM uses SLB encoding 0x%x for page shift %u, but guest expects 0x%x",
392 ksps->slb_enc, ksps->page_shift, qsps->slb_enc);
393 return;
396 for (jq = 0; jq < ARRAY_SIZE(qsps->enc); jq++) {
397 for (jk = 0; jk < ARRAY_SIZE(ksps->enc); jk++) {
398 if (qsps->enc[jq].page_shift == ksps->enc[jk].page_shift) {
399 break;
403 if (jk >= ARRAY_SIZE(ksps->enc)) {
404 error_setg(errp, "KVM doesn't support page shift %u/%u",
405 qsps->enc[jq].page_shift, qsps->page_shift);
406 return;
408 if (qsps->enc[jq].pte_enc != ksps->enc[jk].pte_enc) {
409 error_setg(errp,
410 "KVM uses PTE encoding 0x%x for page shift %u/%u, but guest expects 0x%x",
411 ksps->enc[jk].pte_enc, qsps->enc[jq].page_shift,
412 qsps->page_shift, qsps->enc[jq].pte_enc);
413 return;
418 if (ppc_hash64_has(cpu, PPC_HASH64_CI_LARGEPAGE)) {
419 /* Mostly what guest pagesizes we can use are related to the
420 * host pages used to map guest RAM, which is handled in the
421 * platform code. Cache-Inhibited largepages (64k) however are
422 * used for I/O, so if they're mapped to the host at all it
423 * will be a normal mapping, not a special hugepage one used
424 * for RAM. */
425 if (getpagesize() < 0x10000) {
426 error_setg(errp,
427 "KVM can't supply 64kiB CI pages, which guest expects");
431 #endif /* !defined (TARGET_PPC64) */
433 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
435 return POWERPC_CPU(cpu)->vcpu_id;
438 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
439 * book3s supports only 1 watchpoint, so array size
440 * of 4 is sufficient for now.
442 #define MAX_HW_BKPTS 4
444 static struct HWBreakpoint {
445 target_ulong addr;
446 int type;
447 } hw_debug_points[MAX_HW_BKPTS];
449 static CPUWatchpoint hw_watchpoint;
451 /* Default there is no breakpoint and watchpoint supported */
452 static int max_hw_breakpoint;
453 static int max_hw_watchpoint;
454 static int nb_hw_breakpoint;
455 static int nb_hw_watchpoint;
457 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
459 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
460 max_hw_breakpoint = 2;
461 max_hw_watchpoint = 2;
464 if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
465 fprintf(stderr, "Error initializing h/w breakpoints\n");
466 return;
470 int kvm_arch_init_vcpu(CPUState *cs)
472 PowerPCCPU *cpu = POWERPC_CPU(cs);
473 CPUPPCState *cenv = &cpu->env;
474 int ret;
476 /* Synchronize sregs with kvm */
477 ret = kvm_arch_sync_sregs(cpu);
478 if (ret) {
479 if (ret == -EINVAL) {
480 error_report("Register sync failed... If you're using kvm-hv.ko,"
481 " only \"-cpu host\" is possible");
483 return ret;
486 idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
488 switch (cenv->mmu_model) {
489 case POWERPC_MMU_BOOKE206:
490 /* This target supports access to KVM's guest TLB */
491 ret = kvm_booke206_tlb_init(cpu);
492 break;
493 case POWERPC_MMU_2_07:
494 if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) {
495 /* KVM-HV has transactional memory on POWER8 also without the
496 * KVM_CAP_PPC_HTM extension, so enable it here instead as
497 * long as it's availble to userspace on the host. */
498 if (qemu_getauxval(AT_HWCAP2) & PPC_FEATURE2_HAS_HTM) {
499 cap_htm = true;
502 break;
503 default:
504 break;
507 kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
508 kvmppc_hw_debug_points_init(cenv);
510 return ret;
513 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
515 CPUPPCState *env = &cpu->env;
516 CPUState *cs = CPU(cpu);
517 struct kvm_dirty_tlb dirty_tlb;
518 unsigned char *bitmap;
519 int ret;
521 if (!env->kvm_sw_tlb) {
522 return;
525 bitmap = g_malloc((env->nb_tlb + 7) / 8);
526 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
528 dirty_tlb.bitmap = (uintptr_t)bitmap;
529 dirty_tlb.num_dirty = env->nb_tlb;
531 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
532 if (ret) {
533 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
534 __func__, strerror(-ret));
537 g_free(bitmap);
540 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
542 PowerPCCPU *cpu = POWERPC_CPU(cs);
543 CPUPPCState *env = &cpu->env;
544 union {
545 uint32_t u32;
546 uint64_t u64;
547 } val;
548 struct kvm_one_reg reg = {
549 .id = id,
550 .addr = (uintptr_t) &val,
552 int ret;
554 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
555 if (ret != 0) {
556 trace_kvm_failed_spr_get(spr, strerror(errno));
557 } else {
558 switch (id & KVM_REG_SIZE_MASK) {
559 case KVM_REG_SIZE_U32:
560 env->spr[spr] = val.u32;
561 break;
563 case KVM_REG_SIZE_U64:
564 env->spr[spr] = val.u64;
565 break;
567 default:
568 /* Don't handle this size yet */
569 abort();
574 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
576 PowerPCCPU *cpu = POWERPC_CPU(cs);
577 CPUPPCState *env = &cpu->env;
578 union {
579 uint32_t u32;
580 uint64_t u64;
581 } val;
582 struct kvm_one_reg reg = {
583 .id = id,
584 .addr = (uintptr_t) &val,
586 int ret;
588 switch (id & KVM_REG_SIZE_MASK) {
589 case KVM_REG_SIZE_U32:
590 val.u32 = env->spr[spr];
591 break;
593 case KVM_REG_SIZE_U64:
594 val.u64 = env->spr[spr];
595 break;
597 default:
598 /* Don't handle this size yet */
599 abort();
602 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
603 if (ret != 0) {
604 trace_kvm_failed_spr_set(spr, strerror(errno));
608 static int kvm_put_fp(CPUState *cs)
610 PowerPCCPU *cpu = POWERPC_CPU(cs);
611 CPUPPCState *env = &cpu->env;
612 struct kvm_one_reg reg;
613 int i;
614 int ret;
616 if (env->insns_flags & PPC_FLOAT) {
617 uint64_t fpscr = env->fpscr;
618 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
620 reg.id = KVM_REG_PPC_FPSCR;
621 reg.addr = (uintptr_t)&fpscr;
622 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
623 if (ret < 0) {
624 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
625 return ret;
628 for (i = 0; i < 32; i++) {
629 uint64_t vsr[2];
631 #ifdef HOST_WORDS_BIGENDIAN
632 vsr[0] = float64_val(env->fpr[i]);
633 vsr[1] = env->vsr[i];
634 #else
635 vsr[0] = env->vsr[i];
636 vsr[1] = float64_val(env->fpr[i]);
637 #endif
638 reg.addr = (uintptr_t) &vsr;
639 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
641 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
642 if (ret < 0) {
643 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
644 i, strerror(errno));
645 return ret;
650 if (env->insns_flags & PPC_ALTIVEC) {
651 reg.id = KVM_REG_PPC_VSCR;
652 reg.addr = (uintptr_t)&env->vscr;
653 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
654 if (ret < 0) {
655 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
656 return ret;
659 for (i = 0; i < 32; i++) {
660 reg.id = KVM_REG_PPC_VR(i);
661 reg.addr = (uintptr_t)&env->avr[i];
662 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
663 if (ret < 0) {
664 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
665 return ret;
670 return 0;
673 static int kvm_get_fp(CPUState *cs)
675 PowerPCCPU *cpu = POWERPC_CPU(cs);
676 CPUPPCState *env = &cpu->env;
677 struct kvm_one_reg reg;
678 int i;
679 int ret;
681 if (env->insns_flags & PPC_FLOAT) {
682 uint64_t fpscr;
683 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
685 reg.id = KVM_REG_PPC_FPSCR;
686 reg.addr = (uintptr_t)&fpscr;
687 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
688 if (ret < 0) {
689 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
690 return ret;
691 } else {
692 env->fpscr = fpscr;
695 for (i = 0; i < 32; i++) {
696 uint64_t vsr[2];
698 reg.addr = (uintptr_t) &vsr;
699 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
701 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
702 if (ret < 0) {
703 DPRINTF("Unable to get %s%d from KVM: %s\n",
704 vsx ? "VSR" : "FPR", i, strerror(errno));
705 return ret;
706 } else {
707 #ifdef HOST_WORDS_BIGENDIAN
708 env->fpr[i] = vsr[0];
709 if (vsx) {
710 env->vsr[i] = vsr[1];
712 #else
713 env->fpr[i] = vsr[1];
714 if (vsx) {
715 env->vsr[i] = vsr[0];
717 #endif
722 if (env->insns_flags & PPC_ALTIVEC) {
723 reg.id = KVM_REG_PPC_VSCR;
724 reg.addr = (uintptr_t)&env->vscr;
725 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
726 if (ret < 0) {
727 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
728 return ret;
731 for (i = 0; i < 32; i++) {
732 reg.id = KVM_REG_PPC_VR(i);
733 reg.addr = (uintptr_t)&env->avr[i];
734 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
735 if (ret < 0) {
736 DPRINTF("Unable to get VR%d from KVM: %s\n",
737 i, strerror(errno));
738 return ret;
743 return 0;
746 #if defined(TARGET_PPC64)
747 static int kvm_get_vpa(CPUState *cs)
749 PowerPCCPU *cpu = POWERPC_CPU(cs);
750 sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu);
751 struct kvm_one_reg reg;
752 int ret;
754 reg.id = KVM_REG_PPC_VPA_ADDR;
755 reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
756 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
757 if (ret < 0) {
758 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
759 return ret;
762 assert((uintptr_t)&spapr_cpu->slb_shadow_size
763 == ((uintptr_t)&spapr_cpu->slb_shadow_addr + 8));
764 reg.id = KVM_REG_PPC_VPA_SLB;
765 reg.addr = (uintptr_t)&spapr_cpu->slb_shadow_addr;
766 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
767 if (ret < 0) {
768 DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
769 strerror(errno));
770 return ret;
773 assert((uintptr_t)&spapr_cpu->dtl_size
774 == ((uintptr_t)&spapr_cpu->dtl_addr + 8));
775 reg.id = KVM_REG_PPC_VPA_DTL;
776 reg.addr = (uintptr_t)&spapr_cpu->dtl_addr;
777 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
778 if (ret < 0) {
779 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
780 strerror(errno));
781 return ret;
784 return 0;
787 static int kvm_put_vpa(CPUState *cs)
789 PowerPCCPU *cpu = POWERPC_CPU(cs);
790 sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu);
791 struct kvm_one_reg reg;
792 int ret;
794 /* SLB shadow or DTL can't be registered unless a master VPA is
795 * registered. That means when restoring state, if a VPA *is*
796 * registered, we need to set that up first. If not, we need to
797 * deregister the others before deregistering the master VPA */
798 assert(spapr_cpu->vpa_addr
799 || !(spapr_cpu->slb_shadow_addr || spapr_cpu->dtl_addr));
801 if (spapr_cpu->vpa_addr) {
802 reg.id = KVM_REG_PPC_VPA_ADDR;
803 reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
804 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
805 if (ret < 0) {
806 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
807 return ret;
811 assert((uintptr_t)&spapr_cpu->slb_shadow_size
812 == ((uintptr_t)&spapr_cpu->slb_shadow_addr + 8));
813 reg.id = KVM_REG_PPC_VPA_SLB;
814 reg.addr = (uintptr_t)&spapr_cpu->slb_shadow_addr;
815 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
816 if (ret < 0) {
817 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
818 return ret;
821 assert((uintptr_t)&spapr_cpu->dtl_size
822 == ((uintptr_t)&spapr_cpu->dtl_addr + 8));
823 reg.id = KVM_REG_PPC_VPA_DTL;
824 reg.addr = (uintptr_t)&spapr_cpu->dtl_addr;
825 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
826 if (ret < 0) {
827 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
828 strerror(errno));
829 return ret;
832 if (!spapr_cpu->vpa_addr) {
833 reg.id = KVM_REG_PPC_VPA_ADDR;
834 reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
835 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
836 if (ret < 0) {
837 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
838 return ret;
842 return 0;
844 #endif /* TARGET_PPC64 */
846 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
848 CPUPPCState *env = &cpu->env;
849 struct kvm_sregs sregs;
850 int i;
852 sregs.pvr = env->spr[SPR_PVR];
854 if (cpu->vhyp) {
855 PPCVirtualHypervisorClass *vhc =
856 PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
857 sregs.u.s.sdr1 = vhc->encode_hpt_for_kvm_pr(cpu->vhyp);
858 } else {
859 sregs.u.s.sdr1 = env->spr[SPR_SDR1];
862 /* Sync SLB */
863 #ifdef TARGET_PPC64
864 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
865 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
866 if (env->slb[i].esid & SLB_ESID_V) {
867 sregs.u.s.ppc64.slb[i].slbe |= i;
869 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
871 #endif
873 /* Sync SRs */
874 for (i = 0; i < 16; i++) {
875 sregs.u.s.ppc32.sr[i] = env->sr[i];
878 /* Sync BATs */
879 for (i = 0; i < 8; i++) {
880 /* Beware. We have to swap upper and lower bits here */
881 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
882 | env->DBAT[1][i];
883 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
884 | env->IBAT[1][i];
887 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
890 int kvm_arch_put_registers(CPUState *cs, int level)
892 PowerPCCPU *cpu = POWERPC_CPU(cs);
893 CPUPPCState *env = &cpu->env;
894 struct kvm_regs regs;
895 int ret;
896 int i;
898 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
899 if (ret < 0) {
900 return ret;
903 regs.ctr = env->ctr;
904 regs.lr = env->lr;
905 regs.xer = cpu_read_xer(env);
906 regs.msr = env->msr;
907 regs.pc = env->nip;
909 regs.srr0 = env->spr[SPR_SRR0];
910 regs.srr1 = env->spr[SPR_SRR1];
912 regs.sprg0 = env->spr[SPR_SPRG0];
913 regs.sprg1 = env->spr[SPR_SPRG1];
914 regs.sprg2 = env->spr[SPR_SPRG2];
915 regs.sprg3 = env->spr[SPR_SPRG3];
916 regs.sprg4 = env->spr[SPR_SPRG4];
917 regs.sprg5 = env->spr[SPR_SPRG5];
918 regs.sprg6 = env->spr[SPR_SPRG6];
919 regs.sprg7 = env->spr[SPR_SPRG7];
921 regs.pid = env->spr[SPR_BOOKE_PID];
923 for (i = 0;i < 32; i++)
924 regs.gpr[i] = env->gpr[i];
926 regs.cr = 0;
927 for (i = 0; i < 8; i++) {
928 regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
931 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
932 if (ret < 0)
933 return ret;
935 kvm_put_fp(cs);
937 if (env->tlb_dirty) {
938 kvm_sw_tlb_put(cpu);
939 env->tlb_dirty = false;
942 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
943 ret = kvmppc_put_books_sregs(cpu);
944 if (ret < 0) {
945 return ret;
949 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
950 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
953 if (cap_one_reg) {
954 int i;
956 /* We deliberately ignore errors here, for kernels which have
957 * the ONE_REG calls, but don't support the specific
958 * registers, there's a reasonable chance things will still
959 * work, at least until we try to migrate. */
960 for (i = 0; i < 1024; i++) {
961 uint64_t id = env->spr_cb[i].one_reg_id;
963 if (id != 0) {
964 kvm_put_one_spr(cs, id, i);
968 #ifdef TARGET_PPC64
969 if (msr_ts) {
970 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
971 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
973 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
974 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
976 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
977 kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
978 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
979 kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
980 kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
981 kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
982 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
983 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
984 kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
985 kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
988 if (cap_papr) {
989 if (kvm_put_vpa(cs) < 0) {
990 DPRINTF("Warning: Unable to set VPA information to KVM\n");
994 kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
995 #endif /* TARGET_PPC64 */
998 return ret;
1001 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1003 env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1006 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1008 CPUPPCState *env = &cpu->env;
1009 struct kvm_sregs sregs;
1010 int ret;
1012 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1013 if (ret < 0) {
1014 return ret;
1017 if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1018 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1019 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1020 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1021 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1022 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1023 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1024 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1025 env->spr[SPR_DECR] = sregs.u.e.dec;
1026 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1027 env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1028 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1031 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1032 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1033 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1034 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1035 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1036 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1039 if (sregs.u.e.features & KVM_SREGS_E_64) {
1040 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1043 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1044 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1047 if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1048 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1049 kvm_sync_excp(env, POWERPC_EXCP_CRITICAL, SPR_BOOKE_IVOR0);
1050 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1051 kvm_sync_excp(env, POWERPC_EXCP_MCHECK, SPR_BOOKE_IVOR1);
1052 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1053 kvm_sync_excp(env, POWERPC_EXCP_DSI, SPR_BOOKE_IVOR2);
1054 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1055 kvm_sync_excp(env, POWERPC_EXCP_ISI, SPR_BOOKE_IVOR3);
1056 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1057 kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL, SPR_BOOKE_IVOR4);
1058 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1059 kvm_sync_excp(env, POWERPC_EXCP_ALIGN, SPR_BOOKE_IVOR5);
1060 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1061 kvm_sync_excp(env, POWERPC_EXCP_PROGRAM, SPR_BOOKE_IVOR6);
1062 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1063 kvm_sync_excp(env, POWERPC_EXCP_FPU, SPR_BOOKE_IVOR7);
1064 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1065 kvm_sync_excp(env, POWERPC_EXCP_SYSCALL, SPR_BOOKE_IVOR8);
1066 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1067 kvm_sync_excp(env, POWERPC_EXCP_APU, SPR_BOOKE_IVOR9);
1068 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1069 kvm_sync_excp(env, POWERPC_EXCP_DECR, SPR_BOOKE_IVOR10);
1070 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1071 kvm_sync_excp(env, POWERPC_EXCP_FIT, SPR_BOOKE_IVOR11);
1072 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1073 kvm_sync_excp(env, POWERPC_EXCP_WDT, SPR_BOOKE_IVOR12);
1074 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1075 kvm_sync_excp(env, POWERPC_EXCP_DTLB, SPR_BOOKE_IVOR13);
1076 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1077 kvm_sync_excp(env, POWERPC_EXCP_ITLB, SPR_BOOKE_IVOR14);
1078 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1079 kvm_sync_excp(env, POWERPC_EXCP_DEBUG, SPR_BOOKE_IVOR15);
1081 if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1082 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1083 kvm_sync_excp(env, POWERPC_EXCP_SPEU, SPR_BOOKE_IVOR32);
1084 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1085 kvm_sync_excp(env, POWERPC_EXCP_EFPDI, SPR_BOOKE_IVOR33);
1086 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1087 kvm_sync_excp(env, POWERPC_EXCP_EFPRI, SPR_BOOKE_IVOR34);
1090 if (sregs.u.e.features & KVM_SREGS_E_PM) {
1091 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1092 kvm_sync_excp(env, POWERPC_EXCP_EPERFM, SPR_BOOKE_IVOR35);
1095 if (sregs.u.e.features & KVM_SREGS_E_PC) {
1096 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1097 kvm_sync_excp(env, POWERPC_EXCP_DOORI, SPR_BOOKE_IVOR36);
1098 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1099 kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1103 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1104 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1105 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1106 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1107 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1108 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1109 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1110 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1111 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1112 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1113 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1116 if (sregs.u.e.features & KVM_SREGS_EXP) {
1117 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1120 if (sregs.u.e.features & KVM_SREGS_E_PD) {
1121 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1122 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1125 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1126 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1127 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1128 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1130 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1131 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1132 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1136 return 0;
1139 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1141 CPUPPCState *env = &cpu->env;
1142 struct kvm_sregs sregs;
1143 int ret;
1144 int i;
1146 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1147 if (ret < 0) {
1148 return ret;
1151 if (!cpu->vhyp) {
1152 ppc_store_sdr1(env, sregs.u.s.sdr1);
1155 /* Sync SLB */
1156 #ifdef TARGET_PPC64
1158 * The packed SLB array we get from KVM_GET_SREGS only contains
1159 * information about valid entries. So we flush our internal copy
1160 * to get rid of stale ones, then put all valid SLB entries back
1161 * in.
1163 memset(env->slb, 0, sizeof(env->slb));
1164 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1165 target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1166 target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1168 * Only restore valid entries
1170 if (rb & SLB_ESID_V) {
1171 ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1174 #endif
1176 /* Sync SRs */
1177 for (i = 0; i < 16; i++) {
1178 env->sr[i] = sregs.u.s.ppc32.sr[i];
1181 /* Sync BATs */
1182 for (i = 0; i < 8; i++) {
1183 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1184 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1185 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1186 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1189 return 0;
1192 int kvm_arch_get_registers(CPUState *cs)
1194 PowerPCCPU *cpu = POWERPC_CPU(cs);
1195 CPUPPCState *env = &cpu->env;
1196 struct kvm_regs regs;
1197 uint32_t cr;
1198 int i, ret;
1200 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1201 if (ret < 0)
1202 return ret;
1204 cr = regs.cr;
1205 for (i = 7; i >= 0; i--) {
1206 env->crf[i] = cr & 15;
1207 cr >>= 4;
1210 env->ctr = regs.ctr;
1211 env->lr = regs.lr;
1212 cpu_write_xer(env, regs.xer);
1213 env->msr = regs.msr;
1214 env->nip = regs.pc;
1216 env->spr[SPR_SRR0] = regs.srr0;
1217 env->spr[SPR_SRR1] = regs.srr1;
1219 env->spr[SPR_SPRG0] = regs.sprg0;
1220 env->spr[SPR_SPRG1] = regs.sprg1;
1221 env->spr[SPR_SPRG2] = regs.sprg2;
1222 env->spr[SPR_SPRG3] = regs.sprg3;
1223 env->spr[SPR_SPRG4] = regs.sprg4;
1224 env->spr[SPR_SPRG5] = regs.sprg5;
1225 env->spr[SPR_SPRG6] = regs.sprg6;
1226 env->spr[SPR_SPRG7] = regs.sprg7;
1228 env->spr[SPR_BOOKE_PID] = regs.pid;
1230 for (i = 0;i < 32; i++)
1231 env->gpr[i] = regs.gpr[i];
1233 kvm_get_fp(cs);
1235 if (cap_booke_sregs) {
1236 ret = kvmppc_get_booke_sregs(cpu);
1237 if (ret < 0) {
1238 return ret;
1242 if (cap_segstate) {
1243 ret = kvmppc_get_books_sregs(cpu);
1244 if (ret < 0) {
1245 return ret;
1249 if (cap_hior) {
1250 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1253 if (cap_one_reg) {
1254 int i;
1256 /* We deliberately ignore errors here, for kernels which have
1257 * the ONE_REG calls, but don't support the specific
1258 * registers, there's a reasonable chance things will still
1259 * work, at least until we try to migrate. */
1260 for (i = 0; i < 1024; i++) {
1261 uint64_t id = env->spr_cb[i].one_reg_id;
1263 if (id != 0) {
1264 kvm_get_one_spr(cs, id, i);
1268 #ifdef TARGET_PPC64
1269 if (msr_ts) {
1270 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1271 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1273 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1274 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1276 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1277 kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1278 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1279 kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1280 kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1281 kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1282 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1283 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1284 kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1285 kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1288 if (cap_papr) {
1289 if (kvm_get_vpa(cs) < 0) {
1290 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1294 kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1295 #endif
1298 return 0;
1301 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1303 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1305 if (irq != PPC_INTERRUPT_EXT) {
1306 return 0;
1309 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1310 return 0;
1313 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1315 return 0;
1318 #if defined(TARGET_PPCEMB)
1319 #define PPC_INPUT_INT PPC40x_INPUT_INT
1320 #elif defined(TARGET_PPC64)
1321 #define PPC_INPUT_INT PPC970_INPUT_INT
1322 #else
1323 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1324 #endif
1326 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1328 PowerPCCPU *cpu = POWERPC_CPU(cs);
1329 CPUPPCState *env = &cpu->env;
1330 int r;
1331 unsigned irq;
1333 qemu_mutex_lock_iothread();
1335 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1336 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1337 if (!cap_interrupt_level &&
1338 run->ready_for_interrupt_injection &&
1339 (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1340 (env->irq_input_state & (1<<PPC_INPUT_INT)))
1342 /* For now KVM disregards the 'irq' argument. However, in the
1343 * future KVM could cache it in-kernel to avoid a heavyweight exit
1344 * when reading the UIC.
1346 irq = KVM_INTERRUPT_SET;
1348 DPRINTF("injected interrupt %d\n", irq);
1349 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1350 if (r < 0) {
1351 printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1354 /* Always wake up soon in case the interrupt was level based */
1355 timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1356 (NANOSECONDS_PER_SECOND / 50));
1359 /* We don't know if there are more interrupts pending after this. However,
1360 * the guest will return to userspace in the course of handling this one
1361 * anyways, so we will get a chance to deliver the rest. */
1363 qemu_mutex_unlock_iothread();
1366 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1368 return MEMTXATTRS_UNSPECIFIED;
1371 int kvm_arch_process_async_events(CPUState *cs)
1373 return cs->halted;
1376 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1378 CPUState *cs = CPU(cpu);
1379 CPUPPCState *env = &cpu->env;
1381 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1382 cs->halted = 1;
1383 cs->exception_index = EXCP_HLT;
1386 return 0;
1389 /* map dcr access to existing qemu dcr emulation */
1390 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1392 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1393 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1395 return 0;
1398 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1400 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1401 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1403 return 0;
1406 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1408 /* Mixed endian case is not handled */
1409 uint32_t sc = debug_inst_opcode;
1411 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1412 sizeof(sc), 0) ||
1413 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1414 return -EINVAL;
1417 return 0;
1420 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1422 uint32_t sc;
1424 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1425 sc != debug_inst_opcode ||
1426 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1427 sizeof(sc), 1)) {
1428 return -EINVAL;
1431 return 0;
1434 static int find_hw_breakpoint(target_ulong addr, int type)
1436 int n;
1438 assert((nb_hw_breakpoint + nb_hw_watchpoint)
1439 <= ARRAY_SIZE(hw_debug_points));
1441 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1442 if (hw_debug_points[n].addr == addr &&
1443 hw_debug_points[n].type == type) {
1444 return n;
1448 return -1;
1451 static int find_hw_watchpoint(target_ulong addr, int *flag)
1453 int n;
1455 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1456 if (n >= 0) {
1457 *flag = BP_MEM_ACCESS;
1458 return n;
1461 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1462 if (n >= 0) {
1463 *flag = BP_MEM_WRITE;
1464 return n;
1467 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1468 if (n >= 0) {
1469 *flag = BP_MEM_READ;
1470 return n;
1473 return -1;
1476 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1477 target_ulong len, int type)
1479 if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1480 return -ENOBUFS;
1483 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1484 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1486 switch (type) {
1487 case GDB_BREAKPOINT_HW:
1488 if (nb_hw_breakpoint >= max_hw_breakpoint) {
1489 return -ENOBUFS;
1492 if (find_hw_breakpoint(addr, type) >= 0) {
1493 return -EEXIST;
1496 nb_hw_breakpoint++;
1497 break;
1499 case GDB_WATCHPOINT_WRITE:
1500 case GDB_WATCHPOINT_READ:
1501 case GDB_WATCHPOINT_ACCESS:
1502 if (nb_hw_watchpoint >= max_hw_watchpoint) {
1503 return -ENOBUFS;
1506 if (find_hw_breakpoint(addr, type) >= 0) {
1507 return -EEXIST;
1510 nb_hw_watchpoint++;
1511 break;
1513 default:
1514 return -ENOSYS;
1517 return 0;
1520 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1521 target_ulong len, int type)
1523 int n;
1525 n = find_hw_breakpoint(addr, type);
1526 if (n < 0) {
1527 return -ENOENT;
1530 switch (type) {
1531 case GDB_BREAKPOINT_HW:
1532 nb_hw_breakpoint--;
1533 break;
1535 case GDB_WATCHPOINT_WRITE:
1536 case GDB_WATCHPOINT_READ:
1537 case GDB_WATCHPOINT_ACCESS:
1538 nb_hw_watchpoint--;
1539 break;
1541 default:
1542 return -ENOSYS;
1544 hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1546 return 0;
1549 void kvm_arch_remove_all_hw_breakpoints(void)
1551 nb_hw_breakpoint = nb_hw_watchpoint = 0;
1554 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1556 int n;
1558 /* Software Breakpoint updates */
1559 if (kvm_sw_breakpoints_active(cs)) {
1560 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1563 assert((nb_hw_breakpoint + nb_hw_watchpoint)
1564 <= ARRAY_SIZE(hw_debug_points));
1565 assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1567 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1568 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1569 memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1570 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1571 switch (hw_debug_points[n].type) {
1572 case GDB_BREAKPOINT_HW:
1573 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1574 break;
1575 case GDB_WATCHPOINT_WRITE:
1576 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1577 break;
1578 case GDB_WATCHPOINT_READ:
1579 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1580 break;
1581 case GDB_WATCHPOINT_ACCESS:
1582 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1583 KVMPPC_DEBUG_WATCH_READ;
1584 break;
1585 default:
1586 cpu_abort(cs, "Unsupported breakpoint type\n");
1588 dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1593 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1595 CPUState *cs = CPU(cpu);
1596 CPUPPCState *env = &cpu->env;
1597 struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1598 int handle = 0;
1599 int n;
1600 int flag = 0;
1602 if (cs->singlestep_enabled) {
1603 handle = 1;
1604 } else if (arch_info->status) {
1605 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1606 if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1607 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1608 if (n >= 0) {
1609 handle = 1;
1611 } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1612 KVMPPC_DEBUG_WATCH_WRITE)) {
1613 n = find_hw_watchpoint(arch_info->address, &flag);
1614 if (n >= 0) {
1615 handle = 1;
1616 cs->watchpoint_hit = &hw_watchpoint;
1617 hw_watchpoint.vaddr = hw_debug_points[n].addr;
1618 hw_watchpoint.flags = flag;
1622 } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1623 handle = 1;
1624 } else {
1625 /* QEMU is not able to handle debug exception, so inject
1626 * program exception to guest;
1627 * Yes program exception NOT debug exception !!
1628 * When QEMU is using debug resources then debug exception must
1629 * be always set. To achieve this we set MSR_DE and also set
1630 * MSRP_DEP so guest cannot change MSR_DE.
1631 * When emulating debug resource for guest we want guest
1632 * to control MSR_DE (enable/disable debug interrupt on need).
1633 * Supporting both configurations are NOT possible.
1634 * So the result is that we cannot share debug resources
1635 * between QEMU and Guest on BOOKE architecture.
1636 * In the current design QEMU gets the priority over guest,
1637 * this means that if QEMU is using debug resources then guest
1638 * cannot use them;
1639 * For software breakpoint QEMU uses a privileged instruction;
1640 * So there cannot be any reason that we are here for guest
1641 * set debug exception, only possibility is guest executed a
1642 * privileged / illegal instruction and that's why we are
1643 * injecting a program interrupt.
1646 cpu_synchronize_state(cs);
1647 /* env->nip is PC, so increment this by 4 to use
1648 * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1650 env->nip += 4;
1651 cs->exception_index = POWERPC_EXCP_PROGRAM;
1652 env->error_code = POWERPC_EXCP_INVAL;
1653 ppc_cpu_do_interrupt(cs);
1656 return handle;
1659 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1661 PowerPCCPU *cpu = POWERPC_CPU(cs);
1662 CPUPPCState *env = &cpu->env;
1663 int ret;
1665 qemu_mutex_lock_iothread();
1667 switch (run->exit_reason) {
1668 case KVM_EXIT_DCR:
1669 if (run->dcr.is_write) {
1670 DPRINTF("handle dcr write\n");
1671 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1672 } else {
1673 DPRINTF("handle dcr read\n");
1674 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1676 break;
1677 case KVM_EXIT_HLT:
1678 DPRINTF("handle halt\n");
1679 ret = kvmppc_handle_halt(cpu);
1680 break;
1681 #if defined(TARGET_PPC64)
1682 case KVM_EXIT_PAPR_HCALL:
1683 DPRINTF("handle PAPR hypercall\n");
1684 run->papr_hcall.ret = spapr_hypercall(cpu,
1685 run->papr_hcall.nr,
1686 run->papr_hcall.args);
1687 ret = 0;
1688 break;
1689 #endif
1690 case KVM_EXIT_EPR:
1691 DPRINTF("handle epr\n");
1692 run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1693 ret = 0;
1694 break;
1695 case KVM_EXIT_WATCHDOG:
1696 DPRINTF("handle watchdog expiry\n");
1697 watchdog_perform_action();
1698 ret = 0;
1699 break;
1701 case KVM_EXIT_DEBUG:
1702 DPRINTF("handle debug exception\n");
1703 if (kvm_handle_debug(cpu, run)) {
1704 ret = EXCP_DEBUG;
1705 break;
1707 /* re-enter, this exception was guest-internal */
1708 ret = 0;
1709 break;
1711 default:
1712 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1713 ret = -1;
1714 break;
1717 qemu_mutex_unlock_iothread();
1718 return ret;
1721 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1723 CPUState *cs = CPU(cpu);
1724 uint32_t bits = tsr_bits;
1725 struct kvm_one_reg reg = {
1726 .id = KVM_REG_PPC_OR_TSR,
1727 .addr = (uintptr_t) &bits,
1730 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1733 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1736 CPUState *cs = CPU(cpu);
1737 uint32_t bits = tsr_bits;
1738 struct kvm_one_reg reg = {
1739 .id = KVM_REG_PPC_CLEAR_TSR,
1740 .addr = (uintptr_t) &bits,
1743 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1746 int kvmppc_set_tcr(PowerPCCPU *cpu)
1748 CPUState *cs = CPU(cpu);
1749 CPUPPCState *env = &cpu->env;
1750 uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1752 struct kvm_one_reg reg = {
1753 .id = KVM_REG_PPC_TCR,
1754 .addr = (uintptr_t) &tcr,
1757 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1760 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1762 CPUState *cs = CPU(cpu);
1763 int ret;
1765 if (!kvm_enabled()) {
1766 return -1;
1769 if (!cap_ppc_watchdog) {
1770 printf("warning: KVM does not support watchdog");
1771 return -1;
1774 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1775 if (ret < 0) {
1776 fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1777 __func__, strerror(-ret));
1778 return ret;
1781 return ret;
1784 static int read_cpuinfo(const char *field, char *value, int len)
1786 FILE *f;
1787 int ret = -1;
1788 int field_len = strlen(field);
1789 char line[512];
1791 f = fopen("/proc/cpuinfo", "r");
1792 if (!f) {
1793 return -1;
1796 do {
1797 if (!fgets(line, sizeof(line), f)) {
1798 break;
1800 if (!strncmp(line, field, field_len)) {
1801 pstrcpy(value, len, line);
1802 ret = 0;
1803 break;
1805 } while(*line);
1807 fclose(f);
1809 return ret;
1812 uint32_t kvmppc_get_tbfreq(void)
1814 char line[512];
1815 char *ns;
1816 uint32_t retval = NANOSECONDS_PER_SECOND;
1818 if (read_cpuinfo("timebase", line, sizeof(line))) {
1819 return retval;
1822 if (!(ns = strchr(line, ':'))) {
1823 return retval;
1826 ns++;
1828 return atoi(ns);
1831 bool kvmppc_get_host_serial(char **value)
1833 return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1834 NULL);
1837 bool kvmppc_get_host_model(char **value)
1839 return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1842 /* Try to find a device tree node for a CPU with clock-frequency property */
1843 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1845 struct dirent *dirp;
1846 DIR *dp;
1848 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1849 printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1850 return -1;
1853 buf[0] = '\0';
1854 while ((dirp = readdir(dp)) != NULL) {
1855 FILE *f;
1856 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1857 dirp->d_name);
1858 f = fopen(buf, "r");
1859 if (f) {
1860 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1861 fclose(f);
1862 break;
1864 buf[0] = '\0';
1866 closedir(dp);
1867 if (buf[0] == '\0') {
1868 printf("Unknown host!\n");
1869 return -1;
1872 return 0;
1875 static uint64_t kvmppc_read_int_dt(const char *filename)
1877 union {
1878 uint32_t v32;
1879 uint64_t v64;
1880 } u;
1881 FILE *f;
1882 int len;
1884 f = fopen(filename, "rb");
1885 if (!f) {
1886 return -1;
1889 len = fread(&u, 1, sizeof(u), f);
1890 fclose(f);
1891 switch (len) {
1892 case 4:
1893 /* property is a 32-bit quantity */
1894 return be32_to_cpu(u.v32);
1895 case 8:
1896 return be64_to_cpu(u.v64);
1899 return 0;
1902 /* Read a CPU node property from the host device tree that's a single
1903 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1904 * (can't find or open the property, or doesn't understand the
1905 * format) */
1906 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1908 char buf[PATH_MAX], *tmp;
1909 uint64_t val;
1911 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1912 return -1;
1915 tmp = g_strdup_printf("%s/%s", buf, propname);
1916 val = kvmppc_read_int_dt(tmp);
1917 g_free(tmp);
1919 return val;
1922 uint64_t kvmppc_get_clockfreq(void)
1924 return kvmppc_read_int_cpu_dt("clock-frequency");
1927 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1929 PowerPCCPU *cpu = ppc_env_get_cpu(env);
1930 CPUState *cs = CPU(cpu);
1932 if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1933 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1934 return 0;
1937 return 1;
1940 int kvmppc_get_hasidle(CPUPPCState *env)
1942 struct kvm_ppc_pvinfo pvinfo;
1944 if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1945 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1946 return 1;
1949 return 0;
1952 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1954 uint32_t *hc = (uint32_t*)buf;
1955 struct kvm_ppc_pvinfo pvinfo;
1957 if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1958 memcpy(buf, pvinfo.hcall, buf_len);
1959 return 0;
1963 * Fallback to always fail hypercalls regardless of endianness:
1965 * tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
1966 * li r3, -1
1967 * b .+8 (becomes nop in wrong endian)
1968 * bswap32(li r3, -1)
1971 hc[0] = cpu_to_be32(0x08000048);
1972 hc[1] = cpu_to_be32(0x3860ffff);
1973 hc[2] = cpu_to_be32(0x48000008);
1974 hc[3] = cpu_to_be32(bswap32(0x3860ffff));
1976 return 1;
1979 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
1981 return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
1984 void kvmppc_enable_logical_ci_hcalls(void)
1987 * FIXME: it would be nice if we could detect the cases where
1988 * we're using a device which requires the in kernel
1989 * implementation of these hcalls, but the kernel lacks them and
1990 * produce a warning.
1992 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
1993 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
1996 void kvmppc_enable_set_mode_hcall(void)
1998 kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2001 void kvmppc_enable_clear_ref_mod_hcalls(void)
2003 kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
2004 kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
2007 void kvmppc_set_papr(PowerPCCPU *cpu)
2009 CPUState *cs = CPU(cpu);
2010 int ret;
2012 if (!kvm_enabled()) {
2013 return;
2016 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2017 if (ret) {
2018 error_report("This vCPU type or KVM version does not support PAPR");
2019 exit(1);
2022 /* Update the capability flag so we sync the right information
2023 * with kvm */
2024 cap_papr = 1;
2027 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr)
2029 return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr);
2032 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2034 CPUState *cs = CPU(cpu);
2035 int ret;
2037 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2038 if (ret && mpic_proxy) {
2039 error_report("This KVM version does not support EPR");
2040 exit(1);
2044 int kvmppc_smt_threads(void)
2046 return cap_ppc_smt ? cap_ppc_smt : 1;
2049 int kvmppc_set_smt_threads(int smt)
2051 int ret;
2053 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_SMT, 0, smt, 0);
2054 if (!ret) {
2055 cap_ppc_smt = smt;
2057 return ret;
2060 void kvmppc_hint_smt_possible(Error **errp)
2062 int i;
2063 GString *g;
2064 char *s;
2066 assert(kvm_enabled());
2067 if (cap_ppc_smt_possible) {
2068 g = g_string_new("Available VSMT modes:");
2069 for (i = 63; i >= 0; i--) {
2070 if ((1UL << i) & cap_ppc_smt_possible) {
2071 g_string_append_printf(g, " %lu", (1UL << i));
2074 s = g_string_free(g, false);
2075 error_append_hint(errp, "%s.\n", s);
2076 g_free(s);
2077 } else {
2078 error_append_hint(errp,
2079 "This KVM seems to be too old to support VSMT.\n");
2084 #ifdef TARGET_PPC64
2085 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2087 struct kvm_ppc_smmu_info info;
2088 long rampagesize, best_page_shift;
2089 int i;
2091 /* Find the largest hardware supported page size that's less than
2092 * or equal to the (logical) backing page size of guest RAM */
2093 kvm_get_smmu_info(&info, &error_fatal);
2094 rampagesize = qemu_getrampagesize();
2095 best_page_shift = 0;
2097 for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2098 struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2100 if (!sps->page_shift) {
2101 continue;
2104 if ((sps->page_shift > best_page_shift)
2105 && ((1UL << sps->page_shift) <= rampagesize)) {
2106 best_page_shift = sps->page_shift;
2110 return MIN(current_size,
2111 1ULL << (best_page_shift + hash_shift - 7));
2113 #endif
2115 bool kvmppc_spapr_use_multitce(void)
2117 return cap_spapr_multitce;
2120 int kvmppc_spapr_enable_inkernel_multitce(void)
2122 int ret;
2124 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2125 H_PUT_TCE_INDIRECT, 1);
2126 if (!ret) {
2127 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2128 H_STUFF_TCE, 1);
2131 return ret;
2134 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
2135 uint64_t bus_offset, uint32_t nb_table,
2136 int *pfd, bool need_vfio)
2138 long len;
2139 int fd;
2140 void *table;
2142 /* Must set fd to -1 so we don't try to munmap when called for
2143 * destroying the table, which the upper layers -will- do
2145 *pfd = -1;
2146 if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2147 return NULL;
2150 if (cap_spapr_tce_64) {
2151 struct kvm_create_spapr_tce_64 args = {
2152 .liobn = liobn,
2153 .page_shift = page_shift,
2154 .offset = bus_offset >> page_shift,
2155 .size = nb_table,
2156 .flags = 0
2158 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args);
2159 if (fd < 0) {
2160 fprintf(stderr,
2161 "KVM: Failed to create TCE64 table for liobn 0x%x\n",
2162 liobn);
2163 return NULL;
2165 } else if (cap_spapr_tce) {
2166 uint64_t window_size = (uint64_t) nb_table << page_shift;
2167 struct kvm_create_spapr_tce args = {
2168 .liobn = liobn,
2169 .window_size = window_size,
2171 if ((window_size != args.window_size) || bus_offset) {
2172 return NULL;
2174 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2175 if (fd < 0) {
2176 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2177 liobn);
2178 return NULL;
2180 } else {
2181 return NULL;
2184 len = nb_table * sizeof(uint64_t);
2185 /* FIXME: round this up to page size */
2187 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2188 if (table == MAP_FAILED) {
2189 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2190 liobn);
2191 close(fd);
2192 return NULL;
2195 *pfd = fd;
2196 return table;
2199 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2201 long len;
2203 if (fd < 0) {
2204 return -1;
2207 len = nb_table * sizeof(uint64_t);
2208 if ((munmap(table, len) < 0) ||
2209 (close(fd) < 0)) {
2210 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2211 strerror(errno));
2212 /* Leak the table */
2215 return 0;
2218 int kvmppc_reset_htab(int shift_hint)
2220 uint32_t shift = shift_hint;
2222 if (!kvm_enabled()) {
2223 /* Full emulation, tell caller to allocate htab itself */
2224 return 0;
2226 if (kvm_vm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2227 int ret;
2228 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2229 if (ret == -ENOTTY) {
2230 /* At least some versions of PR KVM advertise the
2231 * capability, but don't implement the ioctl(). Oops.
2232 * Return 0 so that we allocate the htab in qemu, as is
2233 * correct for PR. */
2234 return 0;
2235 } else if (ret < 0) {
2236 return ret;
2238 return shift;
2241 /* We have a kernel that predates the htab reset calls. For PR
2242 * KVM, we need to allocate the htab ourselves, for an HV KVM of
2243 * this era, it has allocated a 16MB fixed size hash table already. */
2244 if (kvmppc_is_pr(kvm_state)) {
2245 /* PR - tell caller to allocate htab */
2246 return 0;
2247 } else {
2248 /* HV - assume 16MB kernel allocated htab */
2249 return 24;
2253 static inline uint32_t mfpvr(void)
2255 uint32_t pvr;
2257 asm ("mfpvr %0"
2258 : "=r"(pvr));
2259 return pvr;
2262 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2264 if (on) {
2265 *word |= flags;
2266 } else {
2267 *word &= ~flags;
2271 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2273 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2274 uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2275 uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2277 /* Now fix up the class with information we can query from the host */
2278 pcc->pvr = mfpvr();
2280 alter_insns(&pcc->insns_flags, PPC_ALTIVEC,
2281 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_ALTIVEC);
2282 alter_insns(&pcc->insns_flags2, PPC2_VSX,
2283 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_VSX);
2284 alter_insns(&pcc->insns_flags2, PPC2_DFP,
2285 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_DFP);
2287 if (dcache_size != -1) {
2288 pcc->l1_dcache_size = dcache_size;
2291 if (icache_size != -1) {
2292 pcc->l1_icache_size = icache_size;
2295 #if defined(TARGET_PPC64)
2296 pcc->radix_page_info = kvm_get_radix_page_info();
2298 if ((pcc->pvr & 0xffffff00) == CPU_POWERPC_POWER9_DD1) {
2300 * POWER9 DD1 has some bugs which make it not really ISA 3.00
2301 * compliant. More importantly, advertising ISA 3.00
2302 * architected mode may prevent guests from activating
2303 * necessary DD1 workarounds.
2305 pcc->pcr_supported &= ~(PCR_COMPAT_3_00 | PCR_COMPAT_2_07
2306 | PCR_COMPAT_2_06 | PCR_COMPAT_2_05);
2308 #endif /* defined(TARGET_PPC64) */
2311 bool kvmppc_has_cap_epr(void)
2313 return cap_epr;
2316 bool kvmppc_has_cap_fixup_hcalls(void)
2318 return cap_fixup_hcalls;
2321 bool kvmppc_has_cap_htm(void)
2323 return cap_htm;
2326 bool kvmppc_has_cap_mmu_radix(void)
2328 return cap_mmu_radix;
2331 bool kvmppc_has_cap_mmu_hash_v3(void)
2333 return cap_mmu_hash_v3;
2336 static bool kvmppc_power8_host(void)
2338 bool ret = false;
2339 #ifdef TARGET_PPC64
2341 uint32_t base_pvr = CPU_POWERPC_POWER_SERVER_MASK & mfpvr();
2342 ret = (base_pvr == CPU_POWERPC_POWER8E_BASE) ||
2343 (base_pvr == CPU_POWERPC_POWER8NVL_BASE) ||
2344 (base_pvr == CPU_POWERPC_POWER8_BASE);
2346 #endif /* TARGET_PPC64 */
2347 return ret;
2350 static int parse_cap_ppc_safe_cache(struct kvm_ppc_cpu_char c)
2352 bool l1d_thread_priv_req = !kvmppc_power8_host();
2354 if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_L1D_FLUSH_PR) {
2355 return 2;
2356 } else if ((!l1d_thread_priv_req ||
2357 c.character & c.character_mask & H_CPU_CHAR_L1D_THREAD_PRIV) &&
2358 (c.character & c.character_mask
2359 & (H_CPU_CHAR_L1D_FLUSH_ORI30 | H_CPU_CHAR_L1D_FLUSH_TRIG2))) {
2360 return 1;
2363 return 0;
2366 static int parse_cap_ppc_safe_bounds_check(struct kvm_ppc_cpu_char c)
2368 if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_BNDS_CHK_SPEC_BAR) {
2369 return 2;
2370 } else if (c.character & c.character_mask & H_CPU_CHAR_SPEC_BAR_ORI31) {
2371 return 1;
2374 return 0;
2377 static int parse_cap_ppc_safe_indirect_branch(struct kvm_ppc_cpu_char c)
2379 if (c.character & c.character_mask & H_CPU_CHAR_CACHE_COUNT_DIS) {
2380 return SPAPR_CAP_FIXED_CCD;
2381 } else if (c.character & c.character_mask & H_CPU_CHAR_BCCTRL_SERIALISED) {
2382 return SPAPR_CAP_FIXED_IBS;
2385 return 0;
2388 static void kvmppc_get_cpu_characteristics(KVMState *s)
2390 struct kvm_ppc_cpu_char c;
2391 int ret;
2393 /* Assume broken */
2394 cap_ppc_safe_cache = 0;
2395 cap_ppc_safe_bounds_check = 0;
2396 cap_ppc_safe_indirect_branch = 0;
2398 ret = kvm_vm_check_extension(s, KVM_CAP_PPC_GET_CPU_CHAR);
2399 if (!ret) {
2400 return;
2402 ret = kvm_vm_ioctl(s, KVM_PPC_GET_CPU_CHAR, &c);
2403 if (ret < 0) {
2404 return;
2407 cap_ppc_safe_cache = parse_cap_ppc_safe_cache(c);
2408 cap_ppc_safe_bounds_check = parse_cap_ppc_safe_bounds_check(c);
2409 cap_ppc_safe_indirect_branch = parse_cap_ppc_safe_indirect_branch(c);
2412 int kvmppc_get_cap_safe_cache(void)
2414 return cap_ppc_safe_cache;
2417 int kvmppc_get_cap_safe_bounds_check(void)
2419 return cap_ppc_safe_bounds_check;
2422 int kvmppc_get_cap_safe_indirect_branch(void)
2424 return cap_ppc_safe_indirect_branch;
2427 bool kvmppc_has_cap_spapr_vfio(void)
2429 return cap_spapr_vfio;
2432 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2434 uint32_t host_pvr = mfpvr();
2435 PowerPCCPUClass *pvr_pcc;
2437 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2438 if (pvr_pcc == NULL) {
2439 pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2442 return pvr_pcc;
2445 static int kvm_ppc_register_host_cpu_type(MachineState *ms)
2447 TypeInfo type_info = {
2448 .name = TYPE_HOST_POWERPC_CPU,
2449 .class_init = kvmppc_host_cpu_class_init,
2451 MachineClass *mc = MACHINE_GET_CLASS(ms);
2452 PowerPCCPUClass *pvr_pcc;
2453 ObjectClass *oc;
2454 DeviceClass *dc;
2455 int i;
2457 pvr_pcc = kvm_ppc_get_host_cpu_class();
2458 if (pvr_pcc == NULL) {
2459 return -1;
2461 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2462 type_register(&type_info);
2463 if (object_dynamic_cast(OBJECT(ms), TYPE_SPAPR_MACHINE)) {
2464 /* override TCG default cpu type with 'host' cpu model */
2465 mc->default_cpu_type = TYPE_HOST_POWERPC_CPU;
2468 oc = object_class_by_name(type_info.name);
2469 g_assert(oc);
2472 * Update generic CPU family class alias (e.g. on a POWER8NVL host,
2473 * we want "POWER8" to be a "family" alias that points to the current
2474 * host CPU type, too)
2476 dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc));
2477 for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) {
2478 if (strcasecmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) {
2479 char *suffix;
2481 ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc));
2482 suffix = strstr(ppc_cpu_aliases[i].model, POWERPC_CPU_TYPE_SUFFIX);
2483 if (suffix) {
2484 *suffix = 0;
2486 break;
2490 return 0;
2493 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2495 struct kvm_rtas_token_args args = {
2496 .token = token,
2499 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2500 return -ENOENT;
2503 strncpy(args.name, function, sizeof(args.name));
2505 return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2508 int kvmppc_get_htab_fd(bool write, uint64_t index, Error **errp)
2510 struct kvm_get_htab_fd s = {
2511 .flags = write ? KVM_GET_HTAB_WRITE : 0,
2512 .start_index = index,
2514 int ret;
2516 if (!cap_htab_fd) {
2517 error_setg(errp, "KVM version doesn't support %s the HPT",
2518 write ? "writing" : "reading");
2519 return -ENOTSUP;
2522 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2523 if (ret < 0) {
2524 error_setg(errp, "Unable to open fd for %s HPT %s KVM: %s",
2525 write ? "writing" : "reading", write ? "to" : "from",
2526 strerror(errno));
2527 return -errno;
2530 return ret;
2533 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2535 int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2536 uint8_t buf[bufsize];
2537 ssize_t rc;
2539 do {
2540 rc = read(fd, buf, bufsize);
2541 if (rc < 0) {
2542 fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2543 strerror(errno));
2544 return rc;
2545 } else if (rc) {
2546 uint8_t *buffer = buf;
2547 ssize_t n = rc;
2548 while (n) {
2549 struct kvm_get_htab_header *head =
2550 (struct kvm_get_htab_header *) buffer;
2551 size_t chunksize = sizeof(*head) +
2552 HASH_PTE_SIZE_64 * head->n_valid;
2554 qemu_put_be32(f, head->index);
2555 qemu_put_be16(f, head->n_valid);
2556 qemu_put_be16(f, head->n_invalid);
2557 qemu_put_buffer(f, (void *)(head + 1),
2558 HASH_PTE_SIZE_64 * head->n_valid);
2560 buffer += chunksize;
2561 n -= chunksize;
2564 } while ((rc != 0)
2565 && ((max_ns < 0)
2566 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2568 return (rc == 0) ? 1 : 0;
2571 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2572 uint16_t n_valid, uint16_t n_invalid)
2574 struct kvm_get_htab_header *buf;
2575 size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2576 ssize_t rc;
2578 buf = alloca(chunksize);
2579 buf->index = index;
2580 buf->n_valid = n_valid;
2581 buf->n_invalid = n_invalid;
2583 qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2585 rc = write(fd, buf, chunksize);
2586 if (rc < 0) {
2587 fprintf(stderr, "Error writing KVM hash table: %s\n",
2588 strerror(errno));
2589 return rc;
2591 if (rc != chunksize) {
2592 /* We should never get a short write on a single chunk */
2593 fprintf(stderr, "Short write, restoring KVM hash table\n");
2594 return -1;
2596 return 0;
2599 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2601 return true;
2604 void kvm_arch_init_irq_routing(KVMState *s)
2608 void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n)
2610 int fd, rc;
2611 int i;
2613 fd = kvmppc_get_htab_fd(false, ptex, &error_abort);
2615 i = 0;
2616 while (i < n) {
2617 struct kvm_get_htab_header *hdr;
2618 int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP;
2619 char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64];
2621 rc = read(fd, buf, sizeof(buf));
2622 if (rc < 0) {
2623 hw_error("kvmppc_read_hptes: Unable to read HPTEs");
2626 hdr = (struct kvm_get_htab_header *)buf;
2627 while ((i < n) && ((char *)hdr < (buf + rc))) {
2628 int invalid = hdr->n_invalid, valid = hdr->n_valid;
2630 if (hdr->index != (ptex + i)) {
2631 hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32
2632 " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i);
2635 if (n - i < valid) {
2636 valid = n - i;
2638 memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * valid);
2639 i += valid;
2641 if ((n - i) < invalid) {
2642 invalid = n - i;
2644 memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64);
2645 i += invalid;
2647 hdr = (struct kvm_get_htab_header *)
2648 ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid);
2652 close(fd);
2655 void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1)
2657 int fd, rc;
2658 struct {
2659 struct kvm_get_htab_header hdr;
2660 uint64_t pte0;
2661 uint64_t pte1;
2662 } buf;
2664 fd = kvmppc_get_htab_fd(true, 0 /* Ignored */, &error_abort);
2666 buf.hdr.n_valid = 1;
2667 buf.hdr.n_invalid = 0;
2668 buf.hdr.index = ptex;
2669 buf.pte0 = cpu_to_be64(pte0);
2670 buf.pte1 = cpu_to_be64(pte1);
2672 rc = write(fd, &buf, sizeof(buf));
2673 if (rc != sizeof(buf)) {
2674 hw_error("kvmppc_write_hpte: Unable to update KVM HPT");
2676 close(fd);
2679 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2680 uint64_t address, uint32_t data, PCIDevice *dev)
2682 return 0;
2685 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2686 int vector, PCIDevice *dev)
2688 return 0;
2691 int kvm_arch_release_virq_post(int virq)
2693 return 0;
2696 int kvm_arch_msi_data_to_gsi(uint32_t data)
2698 return data & 0xffff;
2701 int kvmppc_enable_hwrng(void)
2703 if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2704 return -1;
2707 return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2710 void kvmppc_check_papr_resize_hpt(Error **errp)
2712 if (!kvm_enabled()) {
2713 return; /* No KVM, we're good */
2716 if (cap_resize_hpt) {
2717 return; /* Kernel has explicit support, we're good */
2720 /* Otherwise fallback on looking for PR KVM */
2721 if (kvmppc_is_pr(kvm_state)) {
2722 return;
2725 error_setg(errp,
2726 "Hash page table resizing not available with this KVM version");
2729 int kvmppc_resize_hpt_prepare(PowerPCCPU *cpu, target_ulong flags, int shift)
2731 CPUState *cs = CPU(cpu);
2732 struct kvm_ppc_resize_hpt rhpt = {
2733 .flags = flags,
2734 .shift = shift,
2737 if (!cap_resize_hpt) {
2738 return -ENOSYS;
2741 return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_PREPARE, &rhpt);
2744 int kvmppc_resize_hpt_commit(PowerPCCPU *cpu, target_ulong flags, int shift)
2746 CPUState *cs = CPU(cpu);
2747 struct kvm_ppc_resize_hpt rhpt = {
2748 .flags = flags,
2749 .shift = shift,
2752 if (!cap_resize_hpt) {
2753 return -ENOSYS;
2756 return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_COMMIT, &rhpt);
2760 * This is a helper function to detect a post migration scenario
2761 * in which a guest, running as KVM-HV, freezes in cpu_post_load because
2762 * the guest kernel can't handle a PVR value other than the actual host
2763 * PVR in KVM_SET_SREGS, even if pvr_match() returns true.
2765 * If we don't have cap_ppc_pvr_compat and we're not running in PR
2766 * (so, we're HV), return true. The workaround itself is done in
2767 * cpu_post_load.
2769 * The order here is important: we'll only check for KVM PR as a
2770 * fallback if the guest kernel can't handle the situation itself.
2771 * We need to avoid as much as possible querying the running KVM type
2772 * in QEMU level.
2774 bool kvmppc_pvr_workaround_required(PowerPCCPU *cpu)
2776 CPUState *cs = CPU(cpu);
2778 if (!kvm_enabled()) {
2779 return false;
2782 if (cap_ppc_pvr_compat) {
2783 return false;
2786 return !kvmppc_is_pr(cs->kvm_state);