.travis.yml: Remove disable-uuid
[qemu/ar7.git] / target / ppc / kvm.c
blobd01852fe311253bc8befcb132aff42e278a9b748
1 /*
2 * PowerPC implementation of KVM hooks
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
7 * Authors:
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
17 #include "qemu/osdep.h"
18 #include <dirent.h>
19 #include <sys/ioctl.h>
20 #include <sys/vfs.h>
22 #include <linux/kvm.h>
24 #include "qemu-common.h"
25 #include "qapi/error.h"
26 #include "qemu/error-report.h"
27 #include "cpu.h"
28 #include "cpu-models.h"
29 #include "qemu/timer.h"
30 #include "sysemu/sysemu.h"
31 #include "sysemu/hw_accel.h"
32 #include "kvm_ppc.h"
33 #include "sysemu/cpus.h"
34 #include "sysemu/device_tree.h"
35 #include "mmu-hash64.h"
37 #include "hw/sysbus.h"
38 #include "hw/ppc/spapr.h"
39 #include "hw/ppc/spapr_cpu_core.h"
40 #include "hw/ppc/ppc.h"
41 #include "sysemu/watchdog.h"
42 #include "trace.h"
43 #include "exec/gdbstub.h"
44 #include "exec/memattrs.h"
45 #include "exec/ram_addr.h"
46 #include "sysemu/hostmem.h"
47 #include "qemu/cutils.h"
48 #include "qemu/mmap-alloc.h"
49 #include "elf.h"
50 #include "sysemu/kvm_int.h"
52 //#define DEBUG_KVM
54 #ifdef DEBUG_KVM
55 #define DPRINTF(fmt, ...) \
56 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
57 #else
58 #define DPRINTF(fmt, ...) \
59 do { } while (0)
60 #endif
62 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
64 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
65 KVM_CAP_LAST_INFO
68 static int cap_interrupt_unset = false;
69 static int cap_interrupt_level = false;
70 static int cap_segstate;
71 static int cap_booke_sregs;
72 static int cap_ppc_smt;
73 static int cap_ppc_smt_possible;
74 static int cap_spapr_tce;
75 static int cap_spapr_tce_64;
76 static int cap_spapr_multitce;
77 static int cap_spapr_vfio;
78 static int cap_hior;
79 static int cap_one_reg;
80 static int cap_epr;
81 static int cap_ppc_watchdog;
82 static int cap_papr;
83 static int cap_htab_fd;
84 static int cap_fixup_hcalls;
85 static int cap_htm; /* Hardware transactional memory support */
86 static int cap_mmu_radix;
87 static int cap_mmu_hash_v3;
88 static int cap_resize_hpt;
89 static int cap_ppc_pvr_compat;
90 static int cap_ppc_safe_cache;
91 static int cap_ppc_safe_bounds_check;
92 static int cap_ppc_safe_indirect_branch;
93 static int cap_ppc_nested_kvm_hv;
95 static uint32_t debug_inst_opcode;
97 /* XXX We have a race condition where we actually have a level triggered
98 * interrupt, but the infrastructure can't expose that yet, so the guest
99 * takes but ignores it, goes to sleep and never gets notified that there's
100 * still an interrupt pending.
102 * As a quick workaround, let's just wake up again 20 ms after we injected
103 * an interrupt. That way we can assure that we're always reinjecting
104 * interrupts in case the guest swallowed them.
106 static QEMUTimer *idle_timer;
108 static void kvm_kick_cpu(void *opaque)
110 PowerPCCPU *cpu = opaque;
112 qemu_cpu_kick(CPU(cpu));
115 /* Check whether we are running with KVM-PR (instead of KVM-HV). This
116 * should only be used for fallback tests - generally we should use
117 * explicit capabilities for the features we want, rather than
118 * assuming what is/isn't available depending on the KVM variant. */
119 static bool kvmppc_is_pr(KVMState *ks)
121 /* Assume KVM-PR if the GET_PVINFO capability is available */
122 return kvm_vm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0;
125 static int kvm_ppc_register_host_cpu_type(MachineState *ms);
126 static void kvmppc_get_cpu_characteristics(KVMState *s);
128 int kvm_arch_init(MachineState *ms, KVMState *s)
130 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
131 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
132 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
133 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
134 cap_ppc_smt_possible = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT_POSSIBLE);
135 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
136 cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64);
137 cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
138 cap_spapr_vfio = kvm_vm_check_extension(s, KVM_CAP_SPAPR_TCE_VFIO);
139 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
140 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
141 cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
142 cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
143 /* Note: we don't set cap_papr here, because this capability is
144 * only activated after this by kvmppc_set_papr() */
145 cap_htab_fd = kvm_vm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
146 cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
147 cap_ppc_smt = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT);
148 cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
149 cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX);
150 cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3);
151 cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT);
152 kvmppc_get_cpu_characteristics(s);
153 cap_ppc_nested_kvm_hv = kvm_vm_check_extension(s, KVM_CAP_PPC_NESTED_HV);
155 * Note: setting it to false because there is not such capability
156 * in KVM at this moment.
158 * TODO: call kvm_vm_check_extension() with the right capability
159 * after the kernel starts implementing it.*/
160 cap_ppc_pvr_compat = false;
162 if (!cap_interrupt_level) {
163 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
164 "VM to stall at times!\n");
167 kvm_ppc_register_host_cpu_type(ms);
169 return 0;
172 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
174 return 0;
177 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
179 CPUPPCState *cenv = &cpu->env;
180 CPUState *cs = CPU(cpu);
181 struct kvm_sregs sregs;
182 int ret;
184 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
185 /* What we're really trying to say is "if we're on BookE, we use
186 the native PVR for now". This is the only sane way to check
187 it though, so we potentially confuse users that they can run
188 BookE guests on BookS. Let's hope nobody dares enough :) */
189 return 0;
190 } else {
191 if (!cap_segstate) {
192 fprintf(stderr, "kvm error: missing PVR setting capability\n");
193 return -ENOSYS;
197 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
198 if (ret) {
199 return ret;
202 sregs.pvr = cenv->spr[SPR_PVR];
203 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
206 /* Set up a shared TLB array with KVM */
207 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
209 CPUPPCState *env = &cpu->env;
210 CPUState *cs = CPU(cpu);
211 struct kvm_book3e_206_tlb_params params = {};
212 struct kvm_config_tlb cfg = {};
213 unsigned int entries = 0;
214 int ret, i;
216 if (!kvm_enabled() ||
217 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
218 return 0;
221 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
223 for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
224 params.tlb_sizes[i] = booke206_tlb_size(env, i);
225 params.tlb_ways[i] = booke206_tlb_ways(env, i);
226 entries += params.tlb_sizes[i];
229 assert(entries == env->nb_tlb);
230 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
232 env->tlb_dirty = true;
234 cfg.array = (uintptr_t)env->tlb.tlbm;
235 cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
236 cfg.params = (uintptr_t)&params;
237 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
239 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
240 if (ret < 0) {
241 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
242 __func__, strerror(-ret));
243 return ret;
246 env->kvm_sw_tlb = true;
247 return 0;
251 #if defined(TARGET_PPC64)
252 static void kvm_get_smmu_info(struct kvm_ppc_smmu_info *info, Error **errp)
254 int ret;
256 assert(kvm_state != NULL);
258 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
259 error_setg(errp, "KVM doesn't expose the MMU features it supports");
260 error_append_hint(errp, "Consider switching to a newer KVM\n");
261 return;
264 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_SMMU_INFO, info);
265 if (ret == 0) {
266 return;
269 error_setg_errno(errp, -ret,
270 "KVM failed to provide the MMU features it supports");
273 struct ppc_radix_page_info *kvm_get_radix_page_info(void)
275 KVMState *s = KVM_STATE(current_machine->accelerator);
276 struct ppc_radix_page_info *radix_page_info;
277 struct kvm_ppc_rmmu_info rmmu_info;
278 int i;
280 if (!kvm_check_extension(s, KVM_CAP_PPC_MMU_RADIX)) {
281 return NULL;
283 if (kvm_vm_ioctl(s, KVM_PPC_GET_RMMU_INFO, &rmmu_info)) {
284 return NULL;
286 radix_page_info = g_malloc0(sizeof(*radix_page_info));
287 radix_page_info->count = 0;
288 for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) {
289 if (rmmu_info.ap_encodings[i]) {
290 radix_page_info->entries[i] = rmmu_info.ap_encodings[i];
291 radix_page_info->count++;
294 return radix_page_info;
297 target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu,
298 bool radix, bool gtse,
299 uint64_t proc_tbl)
301 CPUState *cs = CPU(cpu);
302 int ret;
303 uint64_t flags = 0;
304 struct kvm_ppc_mmuv3_cfg cfg = {
305 .process_table = proc_tbl,
308 if (radix) {
309 flags |= KVM_PPC_MMUV3_RADIX;
311 if (gtse) {
312 flags |= KVM_PPC_MMUV3_GTSE;
314 cfg.flags = flags;
315 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_CONFIGURE_V3_MMU, &cfg);
316 switch (ret) {
317 case 0:
318 return H_SUCCESS;
319 case -EINVAL:
320 return H_PARAMETER;
321 case -ENODEV:
322 return H_NOT_AVAILABLE;
323 default:
324 return H_HARDWARE;
328 bool kvmppc_hpt_needs_host_contiguous_pages(void)
330 static struct kvm_ppc_smmu_info smmu_info;
332 if (!kvm_enabled()) {
333 return false;
336 kvm_get_smmu_info(&smmu_info, &error_fatal);
337 return !!(smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL);
340 void kvm_check_mmu(PowerPCCPU *cpu, Error **errp)
342 struct kvm_ppc_smmu_info smmu_info;
343 int iq, ik, jq, jk;
344 Error *local_err = NULL;
346 /* For now, we only have anything to check on hash64 MMUs */
347 if (!cpu->hash64_opts || !kvm_enabled()) {
348 return;
351 kvm_get_smmu_info(&smmu_info, &local_err);
352 if (local_err) {
353 error_propagate(errp, local_err);
354 return;
357 if (ppc_hash64_has(cpu, PPC_HASH64_1TSEG)
358 && !(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
359 error_setg(errp,
360 "KVM does not support 1TiB segments which guest expects");
361 return;
364 if (smmu_info.slb_size < cpu->hash64_opts->slb_size) {
365 error_setg(errp, "KVM only supports %u SLB entries, but guest needs %u",
366 smmu_info.slb_size, cpu->hash64_opts->slb_size);
367 return;
371 * Verify that every pagesize supported by the cpu model is
372 * supported by KVM with the same encodings
374 for (iq = 0; iq < ARRAY_SIZE(cpu->hash64_opts->sps); iq++) {
375 PPCHash64SegmentPageSizes *qsps = &cpu->hash64_opts->sps[iq];
376 struct kvm_ppc_one_seg_page_size *ksps;
378 for (ik = 0; ik < ARRAY_SIZE(smmu_info.sps); ik++) {
379 if (qsps->page_shift == smmu_info.sps[ik].page_shift) {
380 break;
383 if (ik >= ARRAY_SIZE(smmu_info.sps)) {
384 error_setg(errp, "KVM doesn't support for base page shift %u",
385 qsps->page_shift);
386 return;
389 ksps = &smmu_info.sps[ik];
390 if (ksps->slb_enc != qsps->slb_enc) {
391 error_setg(errp,
392 "KVM uses SLB encoding 0x%x for page shift %u, but guest expects 0x%x",
393 ksps->slb_enc, ksps->page_shift, qsps->slb_enc);
394 return;
397 for (jq = 0; jq < ARRAY_SIZE(qsps->enc); jq++) {
398 for (jk = 0; jk < ARRAY_SIZE(ksps->enc); jk++) {
399 if (qsps->enc[jq].page_shift == ksps->enc[jk].page_shift) {
400 break;
404 if (jk >= ARRAY_SIZE(ksps->enc)) {
405 error_setg(errp, "KVM doesn't support page shift %u/%u",
406 qsps->enc[jq].page_shift, qsps->page_shift);
407 return;
409 if (qsps->enc[jq].pte_enc != ksps->enc[jk].pte_enc) {
410 error_setg(errp,
411 "KVM uses PTE encoding 0x%x for page shift %u/%u, but guest expects 0x%x",
412 ksps->enc[jk].pte_enc, qsps->enc[jq].page_shift,
413 qsps->page_shift, qsps->enc[jq].pte_enc);
414 return;
419 if (ppc_hash64_has(cpu, PPC_HASH64_CI_LARGEPAGE)) {
420 /* Mostly what guest pagesizes we can use are related to the
421 * host pages used to map guest RAM, which is handled in the
422 * platform code. Cache-Inhibited largepages (64k) however are
423 * used for I/O, so if they're mapped to the host at all it
424 * will be a normal mapping, not a special hugepage one used
425 * for RAM. */
426 if (getpagesize() < 0x10000) {
427 error_setg(errp,
428 "KVM can't supply 64kiB CI pages, which guest expects");
432 #endif /* !defined (TARGET_PPC64) */
434 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
436 return POWERPC_CPU(cpu)->vcpu_id;
439 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
440 * book3s supports only 1 watchpoint, so array size
441 * of 4 is sufficient for now.
443 #define MAX_HW_BKPTS 4
445 static struct HWBreakpoint {
446 target_ulong addr;
447 int type;
448 } hw_debug_points[MAX_HW_BKPTS];
450 static CPUWatchpoint hw_watchpoint;
452 /* Default there is no breakpoint and watchpoint supported */
453 static int max_hw_breakpoint;
454 static int max_hw_watchpoint;
455 static int nb_hw_breakpoint;
456 static int nb_hw_watchpoint;
458 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
460 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
461 max_hw_breakpoint = 2;
462 max_hw_watchpoint = 2;
465 if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
466 fprintf(stderr, "Error initializing h/w breakpoints\n");
467 return;
471 int kvm_arch_init_vcpu(CPUState *cs)
473 PowerPCCPU *cpu = POWERPC_CPU(cs);
474 CPUPPCState *cenv = &cpu->env;
475 int ret;
477 /* Synchronize sregs with kvm */
478 ret = kvm_arch_sync_sregs(cpu);
479 if (ret) {
480 if (ret == -EINVAL) {
481 error_report("Register sync failed... If you're using kvm-hv.ko,"
482 " only \"-cpu host\" is possible");
484 return ret;
487 idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
489 switch (cenv->mmu_model) {
490 case POWERPC_MMU_BOOKE206:
491 /* This target supports access to KVM's guest TLB */
492 ret = kvm_booke206_tlb_init(cpu);
493 break;
494 case POWERPC_MMU_2_07:
495 if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) {
496 /* KVM-HV has transactional memory on POWER8 also without the
497 * KVM_CAP_PPC_HTM extension, so enable it here instead as
498 * long as it's availble to userspace on the host. */
499 if (qemu_getauxval(AT_HWCAP2) & PPC_FEATURE2_HAS_HTM) {
500 cap_htm = true;
503 break;
504 default:
505 break;
508 kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
509 kvmppc_hw_debug_points_init(cenv);
511 return ret;
514 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
516 CPUPPCState *env = &cpu->env;
517 CPUState *cs = CPU(cpu);
518 struct kvm_dirty_tlb dirty_tlb;
519 unsigned char *bitmap;
520 int ret;
522 if (!env->kvm_sw_tlb) {
523 return;
526 bitmap = g_malloc((env->nb_tlb + 7) / 8);
527 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
529 dirty_tlb.bitmap = (uintptr_t)bitmap;
530 dirty_tlb.num_dirty = env->nb_tlb;
532 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
533 if (ret) {
534 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
535 __func__, strerror(-ret));
538 g_free(bitmap);
541 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
543 PowerPCCPU *cpu = POWERPC_CPU(cs);
544 CPUPPCState *env = &cpu->env;
545 union {
546 uint32_t u32;
547 uint64_t u64;
548 } val;
549 struct kvm_one_reg reg = {
550 .id = id,
551 .addr = (uintptr_t) &val,
553 int ret;
555 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
556 if (ret != 0) {
557 trace_kvm_failed_spr_get(spr, strerror(errno));
558 } else {
559 switch (id & KVM_REG_SIZE_MASK) {
560 case KVM_REG_SIZE_U32:
561 env->spr[spr] = val.u32;
562 break;
564 case KVM_REG_SIZE_U64:
565 env->spr[spr] = val.u64;
566 break;
568 default:
569 /* Don't handle this size yet */
570 abort();
575 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
577 PowerPCCPU *cpu = POWERPC_CPU(cs);
578 CPUPPCState *env = &cpu->env;
579 union {
580 uint32_t u32;
581 uint64_t u64;
582 } val;
583 struct kvm_one_reg reg = {
584 .id = id,
585 .addr = (uintptr_t) &val,
587 int ret;
589 switch (id & KVM_REG_SIZE_MASK) {
590 case KVM_REG_SIZE_U32:
591 val.u32 = env->spr[spr];
592 break;
594 case KVM_REG_SIZE_U64:
595 val.u64 = env->spr[spr];
596 break;
598 default:
599 /* Don't handle this size yet */
600 abort();
603 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
604 if (ret != 0) {
605 trace_kvm_failed_spr_set(spr, strerror(errno));
609 static int kvm_put_fp(CPUState *cs)
611 PowerPCCPU *cpu = POWERPC_CPU(cs);
612 CPUPPCState *env = &cpu->env;
613 struct kvm_one_reg reg;
614 int i;
615 int ret;
617 if (env->insns_flags & PPC_FLOAT) {
618 uint64_t fpscr = env->fpscr;
619 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
621 reg.id = KVM_REG_PPC_FPSCR;
622 reg.addr = (uintptr_t)&fpscr;
623 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
624 if (ret < 0) {
625 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
626 return ret;
629 for (i = 0; i < 32; i++) {
630 uint64_t vsr[2];
631 uint64_t *fpr = cpu_fpr_ptr(&cpu->env, i);
632 uint64_t *vsrl = cpu_vsrl_ptr(&cpu->env, i);
634 #ifdef HOST_WORDS_BIGENDIAN
635 vsr[0] = float64_val(*fpr);
636 vsr[1] = *vsrl;
637 #else
638 vsr[0] = *vsrl;
639 vsr[1] = float64_val(*fpr);
640 #endif
641 reg.addr = (uintptr_t) &vsr;
642 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
644 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
645 if (ret < 0) {
646 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
647 i, strerror(errno));
648 return ret;
653 if (env->insns_flags & PPC_ALTIVEC) {
654 reg.id = KVM_REG_PPC_VSCR;
655 reg.addr = (uintptr_t)&env->vscr;
656 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
657 if (ret < 0) {
658 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
659 return ret;
662 for (i = 0; i < 32; i++) {
663 reg.id = KVM_REG_PPC_VR(i);
664 reg.addr = (uintptr_t)cpu_avr_ptr(env, i);
665 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
666 if (ret < 0) {
667 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
668 return ret;
673 return 0;
676 static int kvm_get_fp(CPUState *cs)
678 PowerPCCPU *cpu = POWERPC_CPU(cs);
679 CPUPPCState *env = &cpu->env;
680 struct kvm_one_reg reg;
681 int i;
682 int ret;
684 if (env->insns_flags & PPC_FLOAT) {
685 uint64_t fpscr;
686 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
688 reg.id = KVM_REG_PPC_FPSCR;
689 reg.addr = (uintptr_t)&fpscr;
690 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
691 if (ret < 0) {
692 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
693 return ret;
694 } else {
695 env->fpscr = fpscr;
698 for (i = 0; i < 32; i++) {
699 uint64_t vsr[2];
700 uint64_t *fpr = cpu_fpr_ptr(&cpu->env, i);
701 uint64_t *vsrl = cpu_vsrl_ptr(&cpu->env, i);
703 reg.addr = (uintptr_t) &vsr;
704 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
706 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
707 if (ret < 0) {
708 DPRINTF("Unable to get %s%d from KVM: %s\n",
709 vsx ? "VSR" : "FPR", i, strerror(errno));
710 return ret;
711 } else {
712 #ifdef HOST_WORDS_BIGENDIAN
713 *fpr = vsr[0];
714 if (vsx) {
715 *vsrl = vsr[1];
717 #else
718 *fpr = vsr[1];
719 if (vsx) {
720 *vsrl = vsr[0];
722 #endif
727 if (env->insns_flags & PPC_ALTIVEC) {
728 reg.id = KVM_REG_PPC_VSCR;
729 reg.addr = (uintptr_t)&env->vscr;
730 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
731 if (ret < 0) {
732 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
733 return ret;
736 for (i = 0; i < 32; i++) {
737 reg.id = KVM_REG_PPC_VR(i);
738 reg.addr = (uintptr_t)cpu_avr_ptr(env, i);
739 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
740 if (ret < 0) {
741 DPRINTF("Unable to get VR%d from KVM: %s\n",
742 i, strerror(errno));
743 return ret;
748 return 0;
751 #if defined(TARGET_PPC64)
752 static int kvm_get_vpa(CPUState *cs)
754 PowerPCCPU *cpu = POWERPC_CPU(cs);
755 sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu);
756 struct kvm_one_reg reg;
757 int ret;
759 reg.id = KVM_REG_PPC_VPA_ADDR;
760 reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
761 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
762 if (ret < 0) {
763 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
764 return ret;
767 assert((uintptr_t)&spapr_cpu->slb_shadow_size
768 == ((uintptr_t)&spapr_cpu->slb_shadow_addr + 8));
769 reg.id = KVM_REG_PPC_VPA_SLB;
770 reg.addr = (uintptr_t)&spapr_cpu->slb_shadow_addr;
771 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
772 if (ret < 0) {
773 DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
774 strerror(errno));
775 return ret;
778 assert((uintptr_t)&spapr_cpu->dtl_size
779 == ((uintptr_t)&spapr_cpu->dtl_addr + 8));
780 reg.id = KVM_REG_PPC_VPA_DTL;
781 reg.addr = (uintptr_t)&spapr_cpu->dtl_addr;
782 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
783 if (ret < 0) {
784 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
785 strerror(errno));
786 return ret;
789 return 0;
792 static int kvm_put_vpa(CPUState *cs)
794 PowerPCCPU *cpu = POWERPC_CPU(cs);
795 sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu);
796 struct kvm_one_reg reg;
797 int ret;
799 /* SLB shadow or DTL can't be registered unless a master VPA is
800 * registered. That means when restoring state, if a VPA *is*
801 * registered, we need to set that up first. If not, we need to
802 * deregister the others before deregistering the master VPA */
803 assert(spapr_cpu->vpa_addr
804 || !(spapr_cpu->slb_shadow_addr || spapr_cpu->dtl_addr));
806 if (spapr_cpu->vpa_addr) {
807 reg.id = KVM_REG_PPC_VPA_ADDR;
808 reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
809 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
810 if (ret < 0) {
811 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
812 return ret;
816 assert((uintptr_t)&spapr_cpu->slb_shadow_size
817 == ((uintptr_t)&spapr_cpu->slb_shadow_addr + 8));
818 reg.id = KVM_REG_PPC_VPA_SLB;
819 reg.addr = (uintptr_t)&spapr_cpu->slb_shadow_addr;
820 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
821 if (ret < 0) {
822 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
823 return ret;
826 assert((uintptr_t)&spapr_cpu->dtl_size
827 == ((uintptr_t)&spapr_cpu->dtl_addr + 8));
828 reg.id = KVM_REG_PPC_VPA_DTL;
829 reg.addr = (uintptr_t)&spapr_cpu->dtl_addr;
830 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
831 if (ret < 0) {
832 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
833 strerror(errno));
834 return ret;
837 if (!spapr_cpu->vpa_addr) {
838 reg.id = KVM_REG_PPC_VPA_ADDR;
839 reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
840 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
841 if (ret < 0) {
842 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
843 return ret;
847 return 0;
849 #endif /* TARGET_PPC64 */
851 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
853 CPUPPCState *env = &cpu->env;
854 struct kvm_sregs sregs;
855 int i;
857 sregs.pvr = env->spr[SPR_PVR];
859 if (cpu->vhyp) {
860 PPCVirtualHypervisorClass *vhc =
861 PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
862 sregs.u.s.sdr1 = vhc->encode_hpt_for_kvm_pr(cpu->vhyp);
863 } else {
864 sregs.u.s.sdr1 = env->spr[SPR_SDR1];
867 /* Sync SLB */
868 #ifdef TARGET_PPC64
869 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
870 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
871 if (env->slb[i].esid & SLB_ESID_V) {
872 sregs.u.s.ppc64.slb[i].slbe |= i;
874 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
876 #endif
878 /* Sync SRs */
879 for (i = 0; i < 16; i++) {
880 sregs.u.s.ppc32.sr[i] = env->sr[i];
883 /* Sync BATs */
884 for (i = 0; i < 8; i++) {
885 /* Beware. We have to swap upper and lower bits here */
886 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
887 | env->DBAT[1][i];
888 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
889 | env->IBAT[1][i];
892 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
895 int kvm_arch_put_registers(CPUState *cs, int level)
897 PowerPCCPU *cpu = POWERPC_CPU(cs);
898 CPUPPCState *env = &cpu->env;
899 struct kvm_regs regs;
900 int ret;
901 int i;
903 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
904 if (ret < 0) {
905 return ret;
908 regs.ctr = env->ctr;
909 regs.lr = env->lr;
910 regs.xer = cpu_read_xer(env);
911 regs.msr = env->msr;
912 regs.pc = env->nip;
914 regs.srr0 = env->spr[SPR_SRR0];
915 regs.srr1 = env->spr[SPR_SRR1];
917 regs.sprg0 = env->spr[SPR_SPRG0];
918 regs.sprg1 = env->spr[SPR_SPRG1];
919 regs.sprg2 = env->spr[SPR_SPRG2];
920 regs.sprg3 = env->spr[SPR_SPRG3];
921 regs.sprg4 = env->spr[SPR_SPRG4];
922 regs.sprg5 = env->spr[SPR_SPRG5];
923 regs.sprg6 = env->spr[SPR_SPRG6];
924 regs.sprg7 = env->spr[SPR_SPRG7];
926 regs.pid = env->spr[SPR_BOOKE_PID];
928 for (i = 0;i < 32; i++)
929 regs.gpr[i] = env->gpr[i];
931 regs.cr = 0;
932 for (i = 0; i < 8; i++) {
933 regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
936 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
937 if (ret < 0)
938 return ret;
940 kvm_put_fp(cs);
942 if (env->tlb_dirty) {
943 kvm_sw_tlb_put(cpu);
944 env->tlb_dirty = false;
947 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
948 ret = kvmppc_put_books_sregs(cpu);
949 if (ret < 0) {
950 return ret;
954 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
955 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
958 if (cap_one_reg) {
959 int i;
961 /* We deliberately ignore errors here, for kernels which have
962 * the ONE_REG calls, but don't support the specific
963 * registers, there's a reasonable chance things will still
964 * work, at least until we try to migrate. */
965 for (i = 0; i < 1024; i++) {
966 uint64_t id = env->spr_cb[i].one_reg_id;
968 if (id != 0) {
969 kvm_put_one_spr(cs, id, i);
973 #ifdef TARGET_PPC64
974 if (msr_ts) {
975 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
976 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
978 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
979 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
981 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
982 kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
983 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
984 kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
985 kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
986 kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
987 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
988 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
989 kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
990 kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
993 if (cap_papr) {
994 if (kvm_put_vpa(cs) < 0) {
995 DPRINTF("Warning: Unable to set VPA information to KVM\n");
999 kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1000 #endif /* TARGET_PPC64 */
1003 return ret;
1006 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1008 env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1011 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1013 CPUPPCState *env = &cpu->env;
1014 struct kvm_sregs sregs;
1015 int ret;
1017 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1018 if (ret < 0) {
1019 return ret;
1022 if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1023 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1024 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1025 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1026 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1027 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1028 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1029 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1030 env->spr[SPR_DECR] = sregs.u.e.dec;
1031 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1032 env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1033 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1036 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1037 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1038 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1039 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1040 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1041 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1044 if (sregs.u.e.features & KVM_SREGS_E_64) {
1045 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1048 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1049 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1052 if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1053 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1054 kvm_sync_excp(env, POWERPC_EXCP_CRITICAL, SPR_BOOKE_IVOR0);
1055 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1056 kvm_sync_excp(env, POWERPC_EXCP_MCHECK, SPR_BOOKE_IVOR1);
1057 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1058 kvm_sync_excp(env, POWERPC_EXCP_DSI, SPR_BOOKE_IVOR2);
1059 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1060 kvm_sync_excp(env, POWERPC_EXCP_ISI, SPR_BOOKE_IVOR3);
1061 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1062 kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL, SPR_BOOKE_IVOR4);
1063 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1064 kvm_sync_excp(env, POWERPC_EXCP_ALIGN, SPR_BOOKE_IVOR5);
1065 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1066 kvm_sync_excp(env, POWERPC_EXCP_PROGRAM, SPR_BOOKE_IVOR6);
1067 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1068 kvm_sync_excp(env, POWERPC_EXCP_FPU, SPR_BOOKE_IVOR7);
1069 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1070 kvm_sync_excp(env, POWERPC_EXCP_SYSCALL, SPR_BOOKE_IVOR8);
1071 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1072 kvm_sync_excp(env, POWERPC_EXCP_APU, SPR_BOOKE_IVOR9);
1073 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1074 kvm_sync_excp(env, POWERPC_EXCP_DECR, SPR_BOOKE_IVOR10);
1075 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1076 kvm_sync_excp(env, POWERPC_EXCP_FIT, SPR_BOOKE_IVOR11);
1077 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1078 kvm_sync_excp(env, POWERPC_EXCP_WDT, SPR_BOOKE_IVOR12);
1079 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1080 kvm_sync_excp(env, POWERPC_EXCP_DTLB, SPR_BOOKE_IVOR13);
1081 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1082 kvm_sync_excp(env, POWERPC_EXCP_ITLB, SPR_BOOKE_IVOR14);
1083 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1084 kvm_sync_excp(env, POWERPC_EXCP_DEBUG, SPR_BOOKE_IVOR15);
1086 if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1087 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1088 kvm_sync_excp(env, POWERPC_EXCP_SPEU, SPR_BOOKE_IVOR32);
1089 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1090 kvm_sync_excp(env, POWERPC_EXCP_EFPDI, SPR_BOOKE_IVOR33);
1091 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1092 kvm_sync_excp(env, POWERPC_EXCP_EFPRI, SPR_BOOKE_IVOR34);
1095 if (sregs.u.e.features & KVM_SREGS_E_PM) {
1096 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1097 kvm_sync_excp(env, POWERPC_EXCP_EPERFM, SPR_BOOKE_IVOR35);
1100 if (sregs.u.e.features & KVM_SREGS_E_PC) {
1101 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1102 kvm_sync_excp(env, POWERPC_EXCP_DOORI, SPR_BOOKE_IVOR36);
1103 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1104 kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1108 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1109 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1110 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1111 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1112 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1113 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1114 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1115 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1116 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1117 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1118 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1121 if (sregs.u.e.features & KVM_SREGS_EXP) {
1122 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1125 if (sregs.u.e.features & KVM_SREGS_E_PD) {
1126 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1127 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1130 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1131 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1132 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1133 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1135 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1136 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1137 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1141 return 0;
1144 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1146 CPUPPCState *env = &cpu->env;
1147 struct kvm_sregs sregs;
1148 int ret;
1149 int i;
1151 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1152 if (ret < 0) {
1153 return ret;
1156 if (!cpu->vhyp) {
1157 ppc_store_sdr1(env, sregs.u.s.sdr1);
1160 /* Sync SLB */
1161 #ifdef TARGET_PPC64
1163 * The packed SLB array we get from KVM_GET_SREGS only contains
1164 * information about valid entries. So we flush our internal copy
1165 * to get rid of stale ones, then put all valid SLB entries back
1166 * in.
1168 memset(env->slb, 0, sizeof(env->slb));
1169 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1170 target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1171 target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1173 * Only restore valid entries
1175 if (rb & SLB_ESID_V) {
1176 ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1179 #endif
1181 /* Sync SRs */
1182 for (i = 0; i < 16; i++) {
1183 env->sr[i] = sregs.u.s.ppc32.sr[i];
1186 /* Sync BATs */
1187 for (i = 0; i < 8; i++) {
1188 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1189 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1190 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1191 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1194 return 0;
1197 int kvm_arch_get_registers(CPUState *cs)
1199 PowerPCCPU *cpu = POWERPC_CPU(cs);
1200 CPUPPCState *env = &cpu->env;
1201 struct kvm_regs regs;
1202 uint32_t cr;
1203 int i, ret;
1205 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1206 if (ret < 0)
1207 return ret;
1209 cr = regs.cr;
1210 for (i = 7; i >= 0; i--) {
1211 env->crf[i] = cr & 15;
1212 cr >>= 4;
1215 env->ctr = regs.ctr;
1216 env->lr = regs.lr;
1217 cpu_write_xer(env, regs.xer);
1218 env->msr = regs.msr;
1219 env->nip = regs.pc;
1221 env->spr[SPR_SRR0] = regs.srr0;
1222 env->spr[SPR_SRR1] = regs.srr1;
1224 env->spr[SPR_SPRG0] = regs.sprg0;
1225 env->spr[SPR_SPRG1] = regs.sprg1;
1226 env->spr[SPR_SPRG2] = regs.sprg2;
1227 env->spr[SPR_SPRG3] = regs.sprg3;
1228 env->spr[SPR_SPRG4] = regs.sprg4;
1229 env->spr[SPR_SPRG5] = regs.sprg5;
1230 env->spr[SPR_SPRG6] = regs.sprg6;
1231 env->spr[SPR_SPRG7] = regs.sprg7;
1233 env->spr[SPR_BOOKE_PID] = regs.pid;
1235 for (i = 0;i < 32; i++)
1236 env->gpr[i] = regs.gpr[i];
1238 kvm_get_fp(cs);
1240 if (cap_booke_sregs) {
1241 ret = kvmppc_get_booke_sregs(cpu);
1242 if (ret < 0) {
1243 return ret;
1247 if (cap_segstate) {
1248 ret = kvmppc_get_books_sregs(cpu);
1249 if (ret < 0) {
1250 return ret;
1254 if (cap_hior) {
1255 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1258 if (cap_one_reg) {
1259 int i;
1261 /* We deliberately ignore errors here, for kernels which have
1262 * the ONE_REG calls, but don't support the specific
1263 * registers, there's a reasonable chance things will still
1264 * work, at least until we try to migrate. */
1265 for (i = 0; i < 1024; i++) {
1266 uint64_t id = env->spr_cb[i].one_reg_id;
1268 if (id != 0) {
1269 kvm_get_one_spr(cs, id, i);
1273 #ifdef TARGET_PPC64
1274 if (msr_ts) {
1275 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1276 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1278 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1279 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1281 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1282 kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1283 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1284 kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1285 kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1286 kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1287 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1288 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1289 kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1290 kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1293 if (cap_papr) {
1294 if (kvm_get_vpa(cs) < 0) {
1295 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1299 kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1300 #endif
1303 return 0;
1306 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1308 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1310 if (irq != PPC_INTERRUPT_EXT) {
1311 return 0;
1314 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1315 return 0;
1318 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1320 return 0;
1323 #if defined(TARGET_PPC64)
1324 #define PPC_INPUT_INT PPC970_INPUT_INT
1325 #else
1326 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1327 #endif
1329 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1331 PowerPCCPU *cpu = POWERPC_CPU(cs);
1332 CPUPPCState *env = &cpu->env;
1333 int r;
1334 unsigned irq;
1336 qemu_mutex_lock_iothread();
1338 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1339 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1340 if (!cap_interrupt_level &&
1341 run->ready_for_interrupt_injection &&
1342 (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1343 (env->irq_input_state & (1<<PPC_INPUT_INT)))
1345 /* For now KVM disregards the 'irq' argument. However, in the
1346 * future KVM could cache it in-kernel to avoid a heavyweight exit
1347 * when reading the UIC.
1349 irq = KVM_INTERRUPT_SET;
1351 DPRINTF("injected interrupt %d\n", irq);
1352 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1353 if (r < 0) {
1354 printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1357 /* Always wake up soon in case the interrupt was level based */
1358 timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1359 (NANOSECONDS_PER_SECOND / 50));
1362 /* We don't know if there are more interrupts pending after this. However,
1363 * the guest will return to userspace in the course of handling this one
1364 * anyways, so we will get a chance to deliver the rest. */
1366 qemu_mutex_unlock_iothread();
1369 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1371 return MEMTXATTRS_UNSPECIFIED;
1374 int kvm_arch_process_async_events(CPUState *cs)
1376 return cs->halted;
1379 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1381 CPUState *cs = CPU(cpu);
1382 CPUPPCState *env = &cpu->env;
1384 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1385 cs->halted = 1;
1386 cs->exception_index = EXCP_HLT;
1389 return 0;
1392 /* map dcr access to existing qemu dcr emulation */
1393 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1395 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1396 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1398 return 0;
1401 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1403 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1404 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1406 return 0;
1409 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1411 /* Mixed endian case is not handled */
1412 uint32_t sc = debug_inst_opcode;
1414 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1415 sizeof(sc), 0) ||
1416 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1417 return -EINVAL;
1420 return 0;
1423 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1425 uint32_t sc;
1427 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1428 sc != debug_inst_opcode ||
1429 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1430 sizeof(sc), 1)) {
1431 return -EINVAL;
1434 return 0;
1437 static int find_hw_breakpoint(target_ulong addr, int type)
1439 int n;
1441 assert((nb_hw_breakpoint + nb_hw_watchpoint)
1442 <= ARRAY_SIZE(hw_debug_points));
1444 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1445 if (hw_debug_points[n].addr == addr &&
1446 hw_debug_points[n].type == type) {
1447 return n;
1451 return -1;
1454 static int find_hw_watchpoint(target_ulong addr, int *flag)
1456 int n;
1458 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1459 if (n >= 0) {
1460 *flag = BP_MEM_ACCESS;
1461 return n;
1464 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1465 if (n >= 0) {
1466 *flag = BP_MEM_WRITE;
1467 return n;
1470 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1471 if (n >= 0) {
1472 *flag = BP_MEM_READ;
1473 return n;
1476 return -1;
1479 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1480 target_ulong len, int type)
1482 if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1483 return -ENOBUFS;
1486 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1487 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1489 switch (type) {
1490 case GDB_BREAKPOINT_HW:
1491 if (nb_hw_breakpoint >= max_hw_breakpoint) {
1492 return -ENOBUFS;
1495 if (find_hw_breakpoint(addr, type) >= 0) {
1496 return -EEXIST;
1499 nb_hw_breakpoint++;
1500 break;
1502 case GDB_WATCHPOINT_WRITE:
1503 case GDB_WATCHPOINT_READ:
1504 case GDB_WATCHPOINT_ACCESS:
1505 if (nb_hw_watchpoint >= max_hw_watchpoint) {
1506 return -ENOBUFS;
1509 if (find_hw_breakpoint(addr, type) >= 0) {
1510 return -EEXIST;
1513 nb_hw_watchpoint++;
1514 break;
1516 default:
1517 return -ENOSYS;
1520 return 0;
1523 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1524 target_ulong len, int type)
1526 int n;
1528 n = find_hw_breakpoint(addr, type);
1529 if (n < 0) {
1530 return -ENOENT;
1533 switch (type) {
1534 case GDB_BREAKPOINT_HW:
1535 nb_hw_breakpoint--;
1536 break;
1538 case GDB_WATCHPOINT_WRITE:
1539 case GDB_WATCHPOINT_READ:
1540 case GDB_WATCHPOINT_ACCESS:
1541 nb_hw_watchpoint--;
1542 break;
1544 default:
1545 return -ENOSYS;
1547 hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1549 return 0;
1552 void kvm_arch_remove_all_hw_breakpoints(void)
1554 nb_hw_breakpoint = nb_hw_watchpoint = 0;
1557 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1559 int n;
1561 /* Software Breakpoint updates */
1562 if (kvm_sw_breakpoints_active(cs)) {
1563 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1566 assert((nb_hw_breakpoint + nb_hw_watchpoint)
1567 <= ARRAY_SIZE(hw_debug_points));
1568 assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1570 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1571 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1572 memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1573 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1574 switch (hw_debug_points[n].type) {
1575 case GDB_BREAKPOINT_HW:
1576 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1577 break;
1578 case GDB_WATCHPOINT_WRITE:
1579 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1580 break;
1581 case GDB_WATCHPOINT_READ:
1582 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1583 break;
1584 case GDB_WATCHPOINT_ACCESS:
1585 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1586 KVMPPC_DEBUG_WATCH_READ;
1587 break;
1588 default:
1589 cpu_abort(cs, "Unsupported breakpoint type\n");
1591 dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1596 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1598 CPUState *cs = CPU(cpu);
1599 CPUPPCState *env = &cpu->env;
1600 struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1601 int handle = 0;
1602 int n;
1603 int flag = 0;
1605 if (cs->singlestep_enabled) {
1606 handle = 1;
1607 } else if (arch_info->status) {
1608 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1609 if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1610 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1611 if (n >= 0) {
1612 handle = 1;
1614 } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1615 KVMPPC_DEBUG_WATCH_WRITE)) {
1616 n = find_hw_watchpoint(arch_info->address, &flag);
1617 if (n >= 0) {
1618 handle = 1;
1619 cs->watchpoint_hit = &hw_watchpoint;
1620 hw_watchpoint.vaddr = hw_debug_points[n].addr;
1621 hw_watchpoint.flags = flag;
1625 } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1626 handle = 1;
1627 } else {
1628 /* QEMU is not able to handle debug exception, so inject
1629 * program exception to guest;
1630 * Yes program exception NOT debug exception !!
1631 * When QEMU is using debug resources then debug exception must
1632 * be always set. To achieve this we set MSR_DE and also set
1633 * MSRP_DEP so guest cannot change MSR_DE.
1634 * When emulating debug resource for guest we want guest
1635 * to control MSR_DE (enable/disable debug interrupt on need).
1636 * Supporting both configurations are NOT possible.
1637 * So the result is that we cannot share debug resources
1638 * between QEMU and Guest on BOOKE architecture.
1639 * In the current design QEMU gets the priority over guest,
1640 * this means that if QEMU is using debug resources then guest
1641 * cannot use them;
1642 * For software breakpoint QEMU uses a privileged instruction;
1643 * So there cannot be any reason that we are here for guest
1644 * set debug exception, only possibility is guest executed a
1645 * privileged / illegal instruction and that's why we are
1646 * injecting a program interrupt.
1649 cpu_synchronize_state(cs);
1650 /* env->nip is PC, so increment this by 4 to use
1651 * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1653 env->nip += 4;
1654 cs->exception_index = POWERPC_EXCP_PROGRAM;
1655 env->error_code = POWERPC_EXCP_INVAL;
1656 ppc_cpu_do_interrupt(cs);
1659 return handle;
1662 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1664 PowerPCCPU *cpu = POWERPC_CPU(cs);
1665 CPUPPCState *env = &cpu->env;
1666 int ret;
1668 qemu_mutex_lock_iothread();
1670 switch (run->exit_reason) {
1671 case KVM_EXIT_DCR:
1672 if (run->dcr.is_write) {
1673 DPRINTF("handle dcr write\n");
1674 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1675 } else {
1676 DPRINTF("handle dcr read\n");
1677 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1679 break;
1680 case KVM_EXIT_HLT:
1681 DPRINTF("handle halt\n");
1682 ret = kvmppc_handle_halt(cpu);
1683 break;
1684 #if defined(TARGET_PPC64)
1685 case KVM_EXIT_PAPR_HCALL:
1686 DPRINTF("handle PAPR hypercall\n");
1687 run->papr_hcall.ret = spapr_hypercall(cpu,
1688 run->papr_hcall.nr,
1689 run->papr_hcall.args);
1690 ret = 0;
1691 break;
1692 #endif
1693 case KVM_EXIT_EPR:
1694 DPRINTF("handle epr\n");
1695 run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1696 ret = 0;
1697 break;
1698 case KVM_EXIT_WATCHDOG:
1699 DPRINTF("handle watchdog expiry\n");
1700 watchdog_perform_action();
1701 ret = 0;
1702 break;
1704 case KVM_EXIT_DEBUG:
1705 DPRINTF("handle debug exception\n");
1706 if (kvm_handle_debug(cpu, run)) {
1707 ret = EXCP_DEBUG;
1708 break;
1710 /* re-enter, this exception was guest-internal */
1711 ret = 0;
1712 break;
1714 default:
1715 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1716 ret = -1;
1717 break;
1720 qemu_mutex_unlock_iothread();
1721 return ret;
1724 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1726 CPUState *cs = CPU(cpu);
1727 uint32_t bits = tsr_bits;
1728 struct kvm_one_reg reg = {
1729 .id = KVM_REG_PPC_OR_TSR,
1730 .addr = (uintptr_t) &bits,
1733 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1736 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1739 CPUState *cs = CPU(cpu);
1740 uint32_t bits = tsr_bits;
1741 struct kvm_one_reg reg = {
1742 .id = KVM_REG_PPC_CLEAR_TSR,
1743 .addr = (uintptr_t) &bits,
1746 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1749 int kvmppc_set_tcr(PowerPCCPU *cpu)
1751 CPUState *cs = CPU(cpu);
1752 CPUPPCState *env = &cpu->env;
1753 uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1755 struct kvm_one_reg reg = {
1756 .id = KVM_REG_PPC_TCR,
1757 .addr = (uintptr_t) &tcr,
1760 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1763 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1765 CPUState *cs = CPU(cpu);
1766 int ret;
1768 if (!kvm_enabled()) {
1769 return -1;
1772 if (!cap_ppc_watchdog) {
1773 printf("warning: KVM does not support watchdog");
1774 return -1;
1777 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1778 if (ret < 0) {
1779 fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1780 __func__, strerror(-ret));
1781 return ret;
1784 return ret;
1787 static int read_cpuinfo(const char *field, char *value, int len)
1789 FILE *f;
1790 int ret = -1;
1791 int field_len = strlen(field);
1792 char line[512];
1794 f = fopen("/proc/cpuinfo", "r");
1795 if (!f) {
1796 return -1;
1799 do {
1800 if (!fgets(line, sizeof(line), f)) {
1801 break;
1803 if (!strncmp(line, field, field_len)) {
1804 pstrcpy(value, len, line);
1805 ret = 0;
1806 break;
1808 } while(*line);
1810 fclose(f);
1812 return ret;
1815 uint32_t kvmppc_get_tbfreq(void)
1817 char line[512];
1818 char *ns;
1819 uint32_t retval = NANOSECONDS_PER_SECOND;
1821 if (read_cpuinfo("timebase", line, sizeof(line))) {
1822 return retval;
1825 if (!(ns = strchr(line, ':'))) {
1826 return retval;
1829 ns++;
1831 return atoi(ns);
1834 bool kvmppc_get_host_serial(char **value)
1836 return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1837 NULL);
1840 bool kvmppc_get_host_model(char **value)
1842 return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1845 /* Try to find a device tree node for a CPU with clock-frequency property */
1846 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1848 struct dirent *dirp;
1849 DIR *dp;
1851 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1852 printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1853 return -1;
1856 buf[0] = '\0';
1857 while ((dirp = readdir(dp)) != NULL) {
1858 FILE *f;
1859 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1860 dirp->d_name);
1861 f = fopen(buf, "r");
1862 if (f) {
1863 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1864 fclose(f);
1865 break;
1867 buf[0] = '\0';
1869 closedir(dp);
1870 if (buf[0] == '\0') {
1871 printf("Unknown host!\n");
1872 return -1;
1875 return 0;
1878 static uint64_t kvmppc_read_int_dt(const char *filename)
1880 union {
1881 uint32_t v32;
1882 uint64_t v64;
1883 } u;
1884 FILE *f;
1885 int len;
1887 f = fopen(filename, "rb");
1888 if (!f) {
1889 return -1;
1892 len = fread(&u, 1, sizeof(u), f);
1893 fclose(f);
1894 switch (len) {
1895 case 4:
1896 /* property is a 32-bit quantity */
1897 return be32_to_cpu(u.v32);
1898 case 8:
1899 return be64_to_cpu(u.v64);
1902 return 0;
1905 /* Read a CPU node property from the host device tree that's a single
1906 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1907 * (can't find or open the property, or doesn't understand the
1908 * format) */
1909 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1911 char buf[PATH_MAX], *tmp;
1912 uint64_t val;
1914 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1915 return -1;
1918 tmp = g_strdup_printf("%s/%s", buf, propname);
1919 val = kvmppc_read_int_dt(tmp);
1920 g_free(tmp);
1922 return val;
1925 uint64_t kvmppc_get_clockfreq(void)
1927 return kvmppc_read_int_cpu_dt("clock-frequency");
1930 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1932 PowerPCCPU *cpu = ppc_env_get_cpu(env);
1933 CPUState *cs = CPU(cpu);
1935 if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1936 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1937 return 0;
1940 return 1;
1943 int kvmppc_get_hasidle(CPUPPCState *env)
1945 struct kvm_ppc_pvinfo pvinfo;
1947 if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1948 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1949 return 1;
1952 return 0;
1955 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1957 uint32_t *hc = (uint32_t*)buf;
1958 struct kvm_ppc_pvinfo pvinfo;
1960 if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1961 memcpy(buf, pvinfo.hcall, buf_len);
1962 return 0;
1966 * Fallback to always fail hypercalls regardless of endianness:
1968 * tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
1969 * li r3, -1
1970 * b .+8 (becomes nop in wrong endian)
1971 * bswap32(li r3, -1)
1974 hc[0] = cpu_to_be32(0x08000048);
1975 hc[1] = cpu_to_be32(0x3860ffff);
1976 hc[2] = cpu_to_be32(0x48000008);
1977 hc[3] = cpu_to_be32(bswap32(0x3860ffff));
1979 return 1;
1982 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
1984 return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
1987 void kvmppc_enable_logical_ci_hcalls(void)
1990 * FIXME: it would be nice if we could detect the cases where
1991 * we're using a device which requires the in kernel
1992 * implementation of these hcalls, but the kernel lacks them and
1993 * produce a warning.
1995 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
1996 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
1999 void kvmppc_enable_set_mode_hcall(void)
2001 kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2004 void kvmppc_enable_clear_ref_mod_hcalls(void)
2006 kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
2007 kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
2010 void kvmppc_set_papr(PowerPCCPU *cpu)
2012 CPUState *cs = CPU(cpu);
2013 int ret;
2015 if (!kvm_enabled()) {
2016 return;
2019 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2020 if (ret) {
2021 error_report("This vCPU type or KVM version does not support PAPR");
2022 exit(1);
2025 /* Update the capability flag so we sync the right information
2026 * with kvm */
2027 cap_papr = 1;
2030 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr)
2032 return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr);
2035 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2037 CPUState *cs = CPU(cpu);
2038 int ret;
2040 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2041 if (ret && mpic_proxy) {
2042 error_report("This KVM version does not support EPR");
2043 exit(1);
2047 int kvmppc_smt_threads(void)
2049 return cap_ppc_smt ? cap_ppc_smt : 1;
2052 int kvmppc_set_smt_threads(int smt)
2054 int ret;
2056 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_SMT, 0, smt, 0);
2057 if (!ret) {
2058 cap_ppc_smt = smt;
2060 return ret;
2063 void kvmppc_hint_smt_possible(Error **errp)
2065 int i;
2066 GString *g;
2067 char *s;
2069 assert(kvm_enabled());
2070 if (cap_ppc_smt_possible) {
2071 g = g_string_new("Available VSMT modes:");
2072 for (i = 63; i >= 0; i--) {
2073 if ((1UL << i) & cap_ppc_smt_possible) {
2074 g_string_append_printf(g, " %lu", (1UL << i));
2077 s = g_string_free(g, false);
2078 error_append_hint(errp, "%s.\n", s);
2079 g_free(s);
2080 } else {
2081 error_append_hint(errp,
2082 "This KVM seems to be too old to support VSMT.\n");
2087 #ifdef TARGET_PPC64
2088 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2090 struct kvm_ppc_smmu_info info;
2091 long rampagesize, best_page_shift;
2092 int i;
2094 /* Find the largest hardware supported page size that's less than
2095 * or equal to the (logical) backing page size of guest RAM */
2096 kvm_get_smmu_info(&info, &error_fatal);
2097 rampagesize = qemu_getrampagesize();
2098 best_page_shift = 0;
2100 for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2101 struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2103 if (!sps->page_shift) {
2104 continue;
2107 if ((sps->page_shift > best_page_shift)
2108 && ((1UL << sps->page_shift) <= rampagesize)) {
2109 best_page_shift = sps->page_shift;
2113 return MIN(current_size,
2114 1ULL << (best_page_shift + hash_shift - 7));
2116 #endif
2118 bool kvmppc_spapr_use_multitce(void)
2120 return cap_spapr_multitce;
2123 int kvmppc_spapr_enable_inkernel_multitce(void)
2125 int ret;
2127 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2128 H_PUT_TCE_INDIRECT, 1);
2129 if (!ret) {
2130 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2131 H_STUFF_TCE, 1);
2134 return ret;
2137 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
2138 uint64_t bus_offset, uint32_t nb_table,
2139 int *pfd, bool need_vfio)
2141 long len;
2142 int fd;
2143 void *table;
2145 /* Must set fd to -1 so we don't try to munmap when called for
2146 * destroying the table, which the upper layers -will- do
2148 *pfd = -1;
2149 if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2150 return NULL;
2153 if (cap_spapr_tce_64) {
2154 struct kvm_create_spapr_tce_64 args = {
2155 .liobn = liobn,
2156 .page_shift = page_shift,
2157 .offset = bus_offset >> page_shift,
2158 .size = nb_table,
2159 .flags = 0
2161 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args);
2162 if (fd < 0) {
2163 fprintf(stderr,
2164 "KVM: Failed to create TCE64 table for liobn 0x%x\n",
2165 liobn);
2166 return NULL;
2168 } else if (cap_spapr_tce) {
2169 uint64_t window_size = (uint64_t) nb_table << page_shift;
2170 struct kvm_create_spapr_tce args = {
2171 .liobn = liobn,
2172 .window_size = window_size,
2174 if ((window_size != args.window_size) || bus_offset) {
2175 return NULL;
2177 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2178 if (fd < 0) {
2179 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2180 liobn);
2181 return NULL;
2183 } else {
2184 return NULL;
2187 len = nb_table * sizeof(uint64_t);
2188 /* FIXME: round this up to page size */
2190 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2191 if (table == MAP_FAILED) {
2192 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2193 liobn);
2194 close(fd);
2195 return NULL;
2198 *pfd = fd;
2199 return table;
2202 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2204 long len;
2206 if (fd < 0) {
2207 return -1;
2210 len = nb_table * sizeof(uint64_t);
2211 if ((munmap(table, len) < 0) ||
2212 (close(fd) < 0)) {
2213 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2214 strerror(errno));
2215 /* Leak the table */
2218 return 0;
2221 int kvmppc_reset_htab(int shift_hint)
2223 uint32_t shift = shift_hint;
2225 if (!kvm_enabled()) {
2226 /* Full emulation, tell caller to allocate htab itself */
2227 return 0;
2229 if (kvm_vm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2230 int ret;
2231 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2232 if (ret == -ENOTTY) {
2233 /* At least some versions of PR KVM advertise the
2234 * capability, but don't implement the ioctl(). Oops.
2235 * Return 0 so that we allocate the htab in qemu, as is
2236 * correct for PR. */
2237 return 0;
2238 } else if (ret < 0) {
2239 return ret;
2241 return shift;
2244 /* We have a kernel that predates the htab reset calls. For PR
2245 * KVM, we need to allocate the htab ourselves, for an HV KVM of
2246 * this era, it has allocated a 16MB fixed size hash table already. */
2247 if (kvmppc_is_pr(kvm_state)) {
2248 /* PR - tell caller to allocate htab */
2249 return 0;
2250 } else {
2251 /* HV - assume 16MB kernel allocated htab */
2252 return 24;
2256 static inline uint32_t mfpvr(void)
2258 uint32_t pvr;
2260 asm ("mfpvr %0"
2261 : "=r"(pvr));
2262 return pvr;
2265 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2267 if (on) {
2268 *word |= flags;
2269 } else {
2270 *word &= ~flags;
2274 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2276 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2277 uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2278 uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2280 /* Now fix up the class with information we can query from the host */
2281 pcc->pvr = mfpvr();
2283 alter_insns(&pcc->insns_flags, PPC_ALTIVEC,
2284 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_ALTIVEC);
2285 alter_insns(&pcc->insns_flags2, PPC2_VSX,
2286 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_VSX);
2287 alter_insns(&pcc->insns_flags2, PPC2_DFP,
2288 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_DFP);
2290 if (dcache_size != -1) {
2291 pcc->l1_dcache_size = dcache_size;
2294 if (icache_size != -1) {
2295 pcc->l1_icache_size = icache_size;
2298 #if defined(TARGET_PPC64)
2299 pcc->radix_page_info = kvm_get_radix_page_info();
2301 if ((pcc->pvr & 0xffffff00) == CPU_POWERPC_POWER9_DD1) {
2303 * POWER9 DD1 has some bugs which make it not really ISA 3.00
2304 * compliant. More importantly, advertising ISA 3.00
2305 * architected mode may prevent guests from activating
2306 * necessary DD1 workarounds.
2308 pcc->pcr_supported &= ~(PCR_COMPAT_3_00 | PCR_COMPAT_2_07
2309 | PCR_COMPAT_2_06 | PCR_COMPAT_2_05);
2311 #endif /* defined(TARGET_PPC64) */
2314 bool kvmppc_has_cap_epr(void)
2316 return cap_epr;
2319 bool kvmppc_has_cap_fixup_hcalls(void)
2321 return cap_fixup_hcalls;
2324 bool kvmppc_has_cap_htm(void)
2326 return cap_htm;
2329 bool kvmppc_has_cap_mmu_radix(void)
2331 return cap_mmu_radix;
2334 bool kvmppc_has_cap_mmu_hash_v3(void)
2336 return cap_mmu_hash_v3;
2339 static bool kvmppc_power8_host(void)
2341 bool ret = false;
2342 #ifdef TARGET_PPC64
2344 uint32_t base_pvr = CPU_POWERPC_POWER_SERVER_MASK & mfpvr();
2345 ret = (base_pvr == CPU_POWERPC_POWER8E_BASE) ||
2346 (base_pvr == CPU_POWERPC_POWER8NVL_BASE) ||
2347 (base_pvr == CPU_POWERPC_POWER8_BASE);
2349 #endif /* TARGET_PPC64 */
2350 return ret;
2353 static int parse_cap_ppc_safe_cache(struct kvm_ppc_cpu_char c)
2355 bool l1d_thread_priv_req = !kvmppc_power8_host();
2357 if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_L1D_FLUSH_PR) {
2358 return 2;
2359 } else if ((!l1d_thread_priv_req ||
2360 c.character & c.character_mask & H_CPU_CHAR_L1D_THREAD_PRIV) &&
2361 (c.character & c.character_mask
2362 & (H_CPU_CHAR_L1D_FLUSH_ORI30 | H_CPU_CHAR_L1D_FLUSH_TRIG2))) {
2363 return 1;
2366 return 0;
2369 static int parse_cap_ppc_safe_bounds_check(struct kvm_ppc_cpu_char c)
2371 if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_BNDS_CHK_SPEC_BAR) {
2372 return 2;
2373 } else if (c.character & c.character_mask & H_CPU_CHAR_SPEC_BAR_ORI31) {
2374 return 1;
2377 return 0;
2380 static int parse_cap_ppc_safe_indirect_branch(struct kvm_ppc_cpu_char c)
2382 if (c.character & c.character_mask & H_CPU_CHAR_CACHE_COUNT_DIS) {
2383 return SPAPR_CAP_FIXED_CCD;
2384 } else if (c.character & c.character_mask & H_CPU_CHAR_BCCTRL_SERIALISED) {
2385 return SPAPR_CAP_FIXED_IBS;
2388 return 0;
2391 static void kvmppc_get_cpu_characteristics(KVMState *s)
2393 struct kvm_ppc_cpu_char c;
2394 int ret;
2396 /* Assume broken */
2397 cap_ppc_safe_cache = 0;
2398 cap_ppc_safe_bounds_check = 0;
2399 cap_ppc_safe_indirect_branch = 0;
2401 ret = kvm_vm_check_extension(s, KVM_CAP_PPC_GET_CPU_CHAR);
2402 if (!ret) {
2403 return;
2405 ret = kvm_vm_ioctl(s, KVM_PPC_GET_CPU_CHAR, &c);
2406 if (ret < 0) {
2407 return;
2410 cap_ppc_safe_cache = parse_cap_ppc_safe_cache(c);
2411 cap_ppc_safe_bounds_check = parse_cap_ppc_safe_bounds_check(c);
2412 cap_ppc_safe_indirect_branch = parse_cap_ppc_safe_indirect_branch(c);
2415 int kvmppc_get_cap_safe_cache(void)
2417 return cap_ppc_safe_cache;
2420 int kvmppc_get_cap_safe_bounds_check(void)
2422 return cap_ppc_safe_bounds_check;
2425 int kvmppc_get_cap_safe_indirect_branch(void)
2427 return cap_ppc_safe_indirect_branch;
2430 bool kvmppc_has_cap_nested_kvm_hv(void)
2432 return !!cap_ppc_nested_kvm_hv;
2435 int kvmppc_set_cap_nested_kvm_hv(int enable)
2437 return kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_NESTED_HV, 0, enable);
2440 bool kvmppc_has_cap_spapr_vfio(void)
2442 return cap_spapr_vfio;
2445 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2447 uint32_t host_pvr = mfpvr();
2448 PowerPCCPUClass *pvr_pcc;
2450 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2451 if (pvr_pcc == NULL) {
2452 pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2455 return pvr_pcc;
2458 static int kvm_ppc_register_host_cpu_type(MachineState *ms)
2460 TypeInfo type_info = {
2461 .name = TYPE_HOST_POWERPC_CPU,
2462 .class_init = kvmppc_host_cpu_class_init,
2464 MachineClass *mc = MACHINE_GET_CLASS(ms);
2465 PowerPCCPUClass *pvr_pcc;
2466 ObjectClass *oc;
2467 DeviceClass *dc;
2468 int i;
2470 pvr_pcc = kvm_ppc_get_host_cpu_class();
2471 if (pvr_pcc == NULL) {
2472 return -1;
2474 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2475 type_register(&type_info);
2476 if (object_dynamic_cast(OBJECT(ms), TYPE_SPAPR_MACHINE)) {
2477 /* override TCG default cpu type with 'host' cpu model */
2478 mc->default_cpu_type = TYPE_HOST_POWERPC_CPU;
2481 oc = object_class_by_name(type_info.name);
2482 g_assert(oc);
2485 * Update generic CPU family class alias (e.g. on a POWER8NVL host,
2486 * we want "POWER8" to be a "family" alias that points to the current
2487 * host CPU type, too)
2489 dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc));
2490 for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) {
2491 if (strcasecmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) {
2492 char *suffix;
2494 ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc));
2495 suffix = strstr(ppc_cpu_aliases[i].model, POWERPC_CPU_TYPE_SUFFIX);
2496 if (suffix) {
2497 *suffix = 0;
2499 break;
2503 return 0;
2506 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2508 struct kvm_rtas_token_args args = {
2509 .token = token,
2512 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2513 return -ENOENT;
2516 strncpy(args.name, function, sizeof(args.name));
2518 return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2521 int kvmppc_get_htab_fd(bool write, uint64_t index, Error **errp)
2523 struct kvm_get_htab_fd s = {
2524 .flags = write ? KVM_GET_HTAB_WRITE : 0,
2525 .start_index = index,
2527 int ret;
2529 if (!cap_htab_fd) {
2530 error_setg(errp, "KVM version doesn't support %s the HPT",
2531 write ? "writing" : "reading");
2532 return -ENOTSUP;
2535 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2536 if (ret < 0) {
2537 error_setg(errp, "Unable to open fd for %s HPT %s KVM: %s",
2538 write ? "writing" : "reading", write ? "to" : "from",
2539 strerror(errno));
2540 return -errno;
2543 return ret;
2546 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2548 int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2549 uint8_t buf[bufsize];
2550 ssize_t rc;
2552 do {
2553 rc = read(fd, buf, bufsize);
2554 if (rc < 0) {
2555 fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2556 strerror(errno));
2557 return rc;
2558 } else if (rc) {
2559 uint8_t *buffer = buf;
2560 ssize_t n = rc;
2561 while (n) {
2562 struct kvm_get_htab_header *head =
2563 (struct kvm_get_htab_header *) buffer;
2564 size_t chunksize = sizeof(*head) +
2565 HASH_PTE_SIZE_64 * head->n_valid;
2567 qemu_put_be32(f, head->index);
2568 qemu_put_be16(f, head->n_valid);
2569 qemu_put_be16(f, head->n_invalid);
2570 qemu_put_buffer(f, (void *)(head + 1),
2571 HASH_PTE_SIZE_64 * head->n_valid);
2573 buffer += chunksize;
2574 n -= chunksize;
2577 } while ((rc != 0)
2578 && ((max_ns < 0)
2579 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2581 return (rc == 0) ? 1 : 0;
2584 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2585 uint16_t n_valid, uint16_t n_invalid)
2587 struct kvm_get_htab_header *buf;
2588 size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2589 ssize_t rc;
2591 buf = alloca(chunksize);
2592 buf->index = index;
2593 buf->n_valid = n_valid;
2594 buf->n_invalid = n_invalid;
2596 qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2598 rc = write(fd, buf, chunksize);
2599 if (rc < 0) {
2600 fprintf(stderr, "Error writing KVM hash table: %s\n",
2601 strerror(errno));
2602 return rc;
2604 if (rc != chunksize) {
2605 /* We should never get a short write on a single chunk */
2606 fprintf(stderr, "Short write, restoring KVM hash table\n");
2607 return -1;
2609 return 0;
2612 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2614 return true;
2617 void kvm_arch_init_irq_routing(KVMState *s)
2621 void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n)
2623 int fd, rc;
2624 int i;
2626 fd = kvmppc_get_htab_fd(false, ptex, &error_abort);
2628 i = 0;
2629 while (i < n) {
2630 struct kvm_get_htab_header *hdr;
2631 int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP;
2632 char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64];
2634 rc = read(fd, buf, sizeof(buf));
2635 if (rc < 0) {
2636 hw_error("kvmppc_read_hptes: Unable to read HPTEs");
2639 hdr = (struct kvm_get_htab_header *)buf;
2640 while ((i < n) && ((char *)hdr < (buf + rc))) {
2641 int invalid = hdr->n_invalid, valid = hdr->n_valid;
2643 if (hdr->index != (ptex + i)) {
2644 hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32
2645 " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i);
2648 if (n - i < valid) {
2649 valid = n - i;
2651 memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * valid);
2652 i += valid;
2654 if ((n - i) < invalid) {
2655 invalid = n - i;
2657 memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64);
2658 i += invalid;
2660 hdr = (struct kvm_get_htab_header *)
2661 ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid);
2665 close(fd);
2668 void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1)
2670 int fd, rc;
2671 struct {
2672 struct kvm_get_htab_header hdr;
2673 uint64_t pte0;
2674 uint64_t pte1;
2675 } buf;
2677 fd = kvmppc_get_htab_fd(true, 0 /* Ignored */, &error_abort);
2679 buf.hdr.n_valid = 1;
2680 buf.hdr.n_invalid = 0;
2681 buf.hdr.index = ptex;
2682 buf.pte0 = cpu_to_be64(pte0);
2683 buf.pte1 = cpu_to_be64(pte1);
2685 rc = write(fd, &buf, sizeof(buf));
2686 if (rc != sizeof(buf)) {
2687 hw_error("kvmppc_write_hpte: Unable to update KVM HPT");
2689 close(fd);
2692 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2693 uint64_t address, uint32_t data, PCIDevice *dev)
2695 return 0;
2698 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2699 int vector, PCIDevice *dev)
2701 return 0;
2704 int kvm_arch_release_virq_post(int virq)
2706 return 0;
2709 int kvm_arch_msi_data_to_gsi(uint32_t data)
2711 return data & 0xffff;
2714 int kvmppc_enable_hwrng(void)
2716 if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2717 return -1;
2720 return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2723 void kvmppc_check_papr_resize_hpt(Error **errp)
2725 if (!kvm_enabled()) {
2726 return; /* No KVM, we're good */
2729 if (cap_resize_hpt) {
2730 return; /* Kernel has explicit support, we're good */
2733 /* Otherwise fallback on looking for PR KVM */
2734 if (kvmppc_is_pr(kvm_state)) {
2735 return;
2738 error_setg(errp,
2739 "Hash page table resizing not available with this KVM version");
2742 int kvmppc_resize_hpt_prepare(PowerPCCPU *cpu, target_ulong flags, int shift)
2744 CPUState *cs = CPU(cpu);
2745 struct kvm_ppc_resize_hpt rhpt = {
2746 .flags = flags,
2747 .shift = shift,
2750 if (!cap_resize_hpt) {
2751 return -ENOSYS;
2754 return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_PREPARE, &rhpt);
2757 int kvmppc_resize_hpt_commit(PowerPCCPU *cpu, target_ulong flags, int shift)
2759 CPUState *cs = CPU(cpu);
2760 struct kvm_ppc_resize_hpt rhpt = {
2761 .flags = flags,
2762 .shift = shift,
2765 if (!cap_resize_hpt) {
2766 return -ENOSYS;
2769 return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_COMMIT, &rhpt);
2773 * This is a helper function to detect a post migration scenario
2774 * in which a guest, running as KVM-HV, freezes in cpu_post_load because
2775 * the guest kernel can't handle a PVR value other than the actual host
2776 * PVR in KVM_SET_SREGS, even if pvr_match() returns true.
2778 * If we don't have cap_ppc_pvr_compat and we're not running in PR
2779 * (so, we're HV), return true. The workaround itself is done in
2780 * cpu_post_load.
2782 * The order here is important: we'll only check for KVM PR as a
2783 * fallback if the guest kernel can't handle the situation itself.
2784 * We need to avoid as much as possible querying the running KVM type
2785 * in QEMU level.
2787 bool kvmppc_pvr_workaround_required(PowerPCCPU *cpu)
2789 CPUState *cs = CPU(cpu);
2791 if (!kvm_enabled()) {
2792 return false;
2795 if (cap_ppc_pvr_compat) {
2796 return false;
2799 return !kvmppc_is_pr(cs->kvm_state);
2802 void kvmppc_set_reg_ppc_online(PowerPCCPU *cpu, unsigned int online)
2804 CPUState *cs = CPU(cpu);
2806 if (kvm_enabled()) {
2807 kvm_set_one_reg(cs, KVM_REG_PPC_ONLINE, &online);