docker: docker.py use "version" to probe usage
[qemu.git] / target / ppc / kvm.c
blob5c0e313ca68402d7eac911540d9ab8b0f06ea9c3
1 /*
2 * PowerPC implementation of KVM hooks
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
7 * Authors:
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
17 #include "qemu/osdep.h"
18 #include <dirent.h>
19 #include <sys/ioctl.h>
20 #include <sys/vfs.h>
22 #include <linux/kvm.h>
24 #include "qemu-common.h"
25 #include "qapi/error.h"
26 #include "qemu/error-report.h"
27 #include "cpu.h"
28 #include "cpu-models.h"
29 #include "qemu/timer.h"
30 #include "sysemu/sysemu.h"
31 #include "sysemu/hw_accel.h"
32 #include "kvm_ppc.h"
33 #include "sysemu/cpus.h"
34 #include "sysemu/device_tree.h"
35 #include "mmu-hash64.h"
37 #include "hw/sysbus.h"
38 #include "hw/ppc/spapr.h"
39 #include "hw/ppc/spapr_vio.h"
40 #include "hw/ppc/spapr_cpu_core.h"
41 #include "hw/ppc/ppc.h"
42 #include "sysemu/watchdog.h"
43 #include "trace.h"
44 #include "exec/gdbstub.h"
45 #include "exec/memattrs.h"
46 #include "exec/ram_addr.h"
47 #include "sysemu/hostmem.h"
48 #include "qemu/cutils.h"
49 #include "qemu/mmap-alloc.h"
50 #include "elf.h"
51 #include "sysemu/kvm_int.h"
53 //#define DEBUG_KVM
55 #ifdef DEBUG_KVM
56 #define DPRINTF(fmt, ...) \
57 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
58 #else
59 #define DPRINTF(fmt, ...) \
60 do { } while (0)
61 #endif
63 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
65 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
66 KVM_CAP_LAST_INFO
69 static int cap_interrupt_unset = false;
70 static int cap_interrupt_level = false;
71 static int cap_segstate;
72 static int cap_booke_sregs;
73 static int cap_ppc_smt;
74 static int cap_ppc_smt_possible;
75 static int cap_spapr_tce;
76 static int cap_spapr_tce_64;
77 static int cap_spapr_multitce;
78 static int cap_spapr_vfio;
79 static int cap_hior;
80 static int cap_one_reg;
81 static int cap_epr;
82 static int cap_ppc_watchdog;
83 static int cap_papr;
84 static int cap_htab_fd;
85 static int cap_fixup_hcalls;
86 static int cap_htm; /* Hardware transactional memory support */
87 static int cap_mmu_radix;
88 static int cap_mmu_hash_v3;
89 static int cap_resize_hpt;
90 static int cap_ppc_pvr_compat;
91 static int cap_ppc_safe_cache;
92 static int cap_ppc_safe_bounds_check;
93 static int cap_ppc_safe_indirect_branch;
95 static uint32_t debug_inst_opcode;
97 /* XXX We have a race condition where we actually have a level triggered
98 * interrupt, but the infrastructure can't expose that yet, so the guest
99 * takes but ignores it, goes to sleep and never gets notified that there's
100 * still an interrupt pending.
102 * As a quick workaround, let's just wake up again 20 ms after we injected
103 * an interrupt. That way we can assure that we're always reinjecting
104 * interrupts in case the guest swallowed them.
106 static QEMUTimer *idle_timer;
108 static void kvm_kick_cpu(void *opaque)
110 PowerPCCPU *cpu = opaque;
112 qemu_cpu_kick(CPU(cpu));
115 /* Check whether we are running with KVM-PR (instead of KVM-HV). This
116 * should only be used for fallback tests - generally we should use
117 * explicit capabilities for the features we want, rather than
118 * assuming what is/isn't available depending on the KVM variant. */
119 static bool kvmppc_is_pr(KVMState *ks)
121 /* Assume KVM-PR if the GET_PVINFO capability is available */
122 return kvm_vm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0;
125 static int kvm_ppc_register_host_cpu_type(MachineState *ms);
126 static void kvmppc_get_cpu_characteristics(KVMState *s);
128 int kvm_arch_init(MachineState *ms, KVMState *s)
130 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
131 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
132 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
133 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
134 cap_ppc_smt_possible = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT_POSSIBLE);
135 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
136 cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64);
137 cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
138 cap_spapr_vfio = kvm_vm_check_extension(s, KVM_CAP_SPAPR_TCE_VFIO);
139 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
140 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
141 cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
142 cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
143 /* Note: we don't set cap_papr here, because this capability is
144 * only activated after this by kvmppc_set_papr() */
145 cap_htab_fd = kvm_vm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
146 cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
147 cap_ppc_smt = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT);
148 cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
149 cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX);
150 cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3);
151 cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT);
152 kvmppc_get_cpu_characteristics(s);
154 * Note: setting it to false because there is not such capability
155 * in KVM at this moment.
157 * TODO: call kvm_vm_check_extension() with the right capability
158 * after the kernel starts implementing it.*/
159 cap_ppc_pvr_compat = false;
161 if (!cap_interrupt_level) {
162 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
163 "VM to stall at times!\n");
166 kvm_ppc_register_host_cpu_type(ms);
168 return 0;
171 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
173 return 0;
176 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
178 CPUPPCState *cenv = &cpu->env;
179 CPUState *cs = CPU(cpu);
180 struct kvm_sregs sregs;
181 int ret;
183 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
184 /* What we're really trying to say is "if we're on BookE, we use
185 the native PVR for now". This is the only sane way to check
186 it though, so we potentially confuse users that they can run
187 BookE guests on BookS. Let's hope nobody dares enough :) */
188 return 0;
189 } else {
190 if (!cap_segstate) {
191 fprintf(stderr, "kvm error: missing PVR setting capability\n");
192 return -ENOSYS;
196 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
197 if (ret) {
198 return ret;
201 sregs.pvr = cenv->spr[SPR_PVR];
202 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
205 /* Set up a shared TLB array with KVM */
206 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
208 CPUPPCState *env = &cpu->env;
209 CPUState *cs = CPU(cpu);
210 struct kvm_book3e_206_tlb_params params = {};
211 struct kvm_config_tlb cfg = {};
212 unsigned int entries = 0;
213 int ret, i;
215 if (!kvm_enabled() ||
216 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
217 return 0;
220 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
222 for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
223 params.tlb_sizes[i] = booke206_tlb_size(env, i);
224 params.tlb_ways[i] = booke206_tlb_ways(env, i);
225 entries += params.tlb_sizes[i];
228 assert(entries == env->nb_tlb);
229 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
231 env->tlb_dirty = true;
233 cfg.array = (uintptr_t)env->tlb.tlbm;
234 cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
235 cfg.params = (uintptr_t)&params;
236 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
238 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
239 if (ret < 0) {
240 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
241 __func__, strerror(-ret));
242 return ret;
245 env->kvm_sw_tlb = true;
246 return 0;
250 #if defined(TARGET_PPC64)
251 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
252 struct kvm_ppc_smmu_info *info)
254 CPUPPCState *env = &cpu->env;
255 CPUState *cs = CPU(cpu);
257 memset(info, 0, sizeof(*info));
259 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
260 * need to "guess" what the supported page sizes are.
262 * For that to work we make a few assumptions:
264 * - Check whether we are running "PR" KVM which only supports 4K
265 * and 16M pages, but supports them regardless of the backing
266 * store characteritics. We also don't support 1T segments.
268 * This is safe as if HV KVM ever supports that capability or PR
269 * KVM grows supports for more page/segment sizes, those versions
270 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
271 * will not hit this fallback
273 * - Else we are running HV KVM. This means we only support page
274 * sizes that fit in the backing store. Additionally we only
275 * advertize 64K pages if the processor is ARCH 2.06 and we assume
276 * P7 encodings for the SLB and hash table. Here too, we assume
277 * support for any newer processor will mean a kernel that
278 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
279 * this fallback.
281 if (kvmppc_is_pr(cs->kvm_state)) {
282 /* No flags */
283 info->flags = 0;
284 info->slb_size = 64;
286 /* Standard 4k base page size segment */
287 info->sps[0].page_shift = 12;
288 info->sps[0].slb_enc = 0;
289 info->sps[0].enc[0].page_shift = 12;
290 info->sps[0].enc[0].pte_enc = 0;
292 /* Standard 16M large page size segment */
293 info->sps[1].page_shift = 24;
294 info->sps[1].slb_enc = SLB_VSID_L;
295 info->sps[1].enc[0].page_shift = 24;
296 info->sps[1].enc[0].pte_enc = 0;
297 } else {
298 int i = 0;
300 /* HV KVM has backing store size restrictions */
301 info->flags = KVM_PPC_PAGE_SIZES_REAL;
303 if (ppc_hash64_has(cpu, PPC_HASH64_1TSEG)) {
304 info->flags |= KVM_PPC_1T_SEGMENTS;
307 if (env->mmu_model == POWERPC_MMU_2_06 ||
308 env->mmu_model == POWERPC_MMU_2_07) {
309 info->slb_size = 32;
310 } else {
311 info->slb_size = 64;
314 /* Standard 4k base page size segment */
315 info->sps[i].page_shift = 12;
316 info->sps[i].slb_enc = 0;
317 info->sps[i].enc[0].page_shift = 12;
318 info->sps[i].enc[0].pte_enc = 0;
319 i++;
321 /* 64K on MMU 2.06 and later */
322 if (env->mmu_model == POWERPC_MMU_2_06 ||
323 env->mmu_model == POWERPC_MMU_2_07) {
324 info->sps[i].page_shift = 16;
325 info->sps[i].slb_enc = 0x110;
326 info->sps[i].enc[0].page_shift = 16;
327 info->sps[i].enc[0].pte_enc = 1;
328 i++;
331 /* Standard 16M large page size segment */
332 info->sps[i].page_shift = 24;
333 info->sps[i].slb_enc = SLB_VSID_L;
334 info->sps[i].enc[0].page_shift = 24;
335 info->sps[i].enc[0].pte_enc = 0;
339 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
341 CPUState *cs = CPU(cpu);
342 int ret;
344 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
345 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
346 if (ret == 0) {
347 return;
351 kvm_get_fallback_smmu_info(cpu, info);
354 struct ppc_radix_page_info *kvm_get_radix_page_info(void)
356 KVMState *s = KVM_STATE(current_machine->accelerator);
357 struct ppc_radix_page_info *radix_page_info;
358 struct kvm_ppc_rmmu_info rmmu_info;
359 int i;
361 if (!kvm_check_extension(s, KVM_CAP_PPC_MMU_RADIX)) {
362 return NULL;
364 if (kvm_vm_ioctl(s, KVM_PPC_GET_RMMU_INFO, &rmmu_info)) {
365 return NULL;
367 radix_page_info = g_malloc0(sizeof(*radix_page_info));
368 radix_page_info->count = 0;
369 for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) {
370 if (rmmu_info.ap_encodings[i]) {
371 radix_page_info->entries[i] = rmmu_info.ap_encodings[i];
372 radix_page_info->count++;
375 return radix_page_info;
378 target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu,
379 bool radix, bool gtse,
380 uint64_t proc_tbl)
382 CPUState *cs = CPU(cpu);
383 int ret;
384 uint64_t flags = 0;
385 struct kvm_ppc_mmuv3_cfg cfg = {
386 .process_table = proc_tbl,
389 if (radix) {
390 flags |= KVM_PPC_MMUV3_RADIX;
392 if (gtse) {
393 flags |= KVM_PPC_MMUV3_GTSE;
395 cfg.flags = flags;
396 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_CONFIGURE_V3_MMU, &cfg);
397 switch (ret) {
398 case 0:
399 return H_SUCCESS;
400 case -EINVAL:
401 return H_PARAMETER;
402 case -ENODEV:
403 return H_NOT_AVAILABLE;
404 default:
405 return H_HARDWARE;
409 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
411 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
412 return true;
415 return (1ul << shift) <= rampgsize;
418 static long max_cpu_page_size;
420 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
422 static struct kvm_ppc_smmu_info smmu_info;
423 static bool has_smmu_info;
424 CPUPPCState *env = &cpu->env;
425 int iq, ik, jq, jk;
427 /* We only handle page sizes for 64-bit server guests for now */
428 if (!(env->mmu_model & POWERPC_MMU_64)) {
429 return;
432 /* Collect MMU info from kernel if not already */
433 if (!has_smmu_info) {
434 kvm_get_smmu_info(cpu, &smmu_info);
435 has_smmu_info = true;
438 if (!max_cpu_page_size) {
439 max_cpu_page_size = qemu_getrampagesize();
442 /* Convert to QEMU form */
443 memset(cpu->hash64_opts->sps, 0, sizeof(*cpu->hash64_opts->sps));
445 /* If we have HV KVM, we need to forbid CI large pages if our
446 * host page size is smaller than 64K.
448 if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
449 if (getpagesize() >= 0x10000) {
450 cpu->hash64_opts->flags |= PPC_HASH64_CI_LARGEPAGE;
451 } else {
452 cpu->hash64_opts->flags &= ~PPC_HASH64_CI_LARGEPAGE;
457 * XXX This loop should be an entry wide AND of the capabilities that
458 * the selected CPU has with the capabilities that KVM supports.
460 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
461 PPCHash64SegmentPageSizes *qsps = &cpu->hash64_opts->sps[iq];
462 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
464 if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
465 ksps->page_shift)) {
466 continue;
468 qsps->page_shift = ksps->page_shift;
469 qsps->slb_enc = ksps->slb_enc;
470 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
471 if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
472 ksps->enc[jk].page_shift)) {
473 continue;
475 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
476 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
477 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
478 break;
481 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
482 break;
485 cpu->hash64_opts->slb_size = smmu_info.slb_size;
486 if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
487 cpu->hash64_opts->flags &= ~PPC_HASH64_1TSEG;
491 bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path)
493 Object *mem_obj = object_resolve_path(obj_path, NULL);
494 long pagesize = host_memory_backend_pagesize(MEMORY_BACKEND(mem_obj));
496 return pagesize >= max_cpu_page_size;
499 #else /* defined (TARGET_PPC64) */
501 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
505 bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path)
507 return true;
510 #endif /* !defined (TARGET_PPC64) */
512 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
514 return POWERPC_CPU(cpu)->vcpu_id;
517 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
518 * book3s supports only 1 watchpoint, so array size
519 * of 4 is sufficient for now.
521 #define MAX_HW_BKPTS 4
523 static struct HWBreakpoint {
524 target_ulong addr;
525 int type;
526 } hw_debug_points[MAX_HW_BKPTS];
528 static CPUWatchpoint hw_watchpoint;
530 /* Default there is no breakpoint and watchpoint supported */
531 static int max_hw_breakpoint;
532 static int max_hw_watchpoint;
533 static int nb_hw_breakpoint;
534 static int nb_hw_watchpoint;
536 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
538 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
539 max_hw_breakpoint = 2;
540 max_hw_watchpoint = 2;
543 if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
544 fprintf(stderr, "Error initializing h/w breakpoints\n");
545 return;
549 int kvm_arch_init_vcpu(CPUState *cs)
551 PowerPCCPU *cpu = POWERPC_CPU(cs);
552 CPUPPCState *cenv = &cpu->env;
553 int ret;
555 /* Gather server mmu info from KVM and update the CPU state */
556 kvm_fixup_page_sizes(cpu);
558 /* Synchronize sregs with kvm */
559 ret = kvm_arch_sync_sregs(cpu);
560 if (ret) {
561 if (ret == -EINVAL) {
562 error_report("Register sync failed... If you're using kvm-hv.ko,"
563 " only \"-cpu host\" is possible");
565 return ret;
568 idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
570 switch (cenv->mmu_model) {
571 case POWERPC_MMU_BOOKE206:
572 /* This target supports access to KVM's guest TLB */
573 ret = kvm_booke206_tlb_init(cpu);
574 break;
575 case POWERPC_MMU_2_07:
576 if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) {
577 /* KVM-HV has transactional memory on POWER8 also without the
578 * KVM_CAP_PPC_HTM extension, so enable it here instead as
579 * long as it's availble to userspace on the host. */
580 if (qemu_getauxval(AT_HWCAP2) & PPC_FEATURE2_HAS_HTM) {
581 cap_htm = true;
584 break;
585 default:
586 break;
589 kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
590 kvmppc_hw_debug_points_init(cenv);
592 return ret;
595 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
597 CPUPPCState *env = &cpu->env;
598 CPUState *cs = CPU(cpu);
599 struct kvm_dirty_tlb dirty_tlb;
600 unsigned char *bitmap;
601 int ret;
603 if (!env->kvm_sw_tlb) {
604 return;
607 bitmap = g_malloc((env->nb_tlb + 7) / 8);
608 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
610 dirty_tlb.bitmap = (uintptr_t)bitmap;
611 dirty_tlb.num_dirty = env->nb_tlb;
613 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
614 if (ret) {
615 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
616 __func__, strerror(-ret));
619 g_free(bitmap);
622 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
624 PowerPCCPU *cpu = POWERPC_CPU(cs);
625 CPUPPCState *env = &cpu->env;
626 union {
627 uint32_t u32;
628 uint64_t u64;
629 } val;
630 struct kvm_one_reg reg = {
631 .id = id,
632 .addr = (uintptr_t) &val,
634 int ret;
636 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
637 if (ret != 0) {
638 trace_kvm_failed_spr_get(spr, strerror(errno));
639 } else {
640 switch (id & KVM_REG_SIZE_MASK) {
641 case KVM_REG_SIZE_U32:
642 env->spr[spr] = val.u32;
643 break;
645 case KVM_REG_SIZE_U64:
646 env->spr[spr] = val.u64;
647 break;
649 default:
650 /* Don't handle this size yet */
651 abort();
656 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
658 PowerPCCPU *cpu = POWERPC_CPU(cs);
659 CPUPPCState *env = &cpu->env;
660 union {
661 uint32_t u32;
662 uint64_t u64;
663 } val;
664 struct kvm_one_reg reg = {
665 .id = id,
666 .addr = (uintptr_t) &val,
668 int ret;
670 switch (id & KVM_REG_SIZE_MASK) {
671 case KVM_REG_SIZE_U32:
672 val.u32 = env->spr[spr];
673 break;
675 case KVM_REG_SIZE_U64:
676 val.u64 = env->spr[spr];
677 break;
679 default:
680 /* Don't handle this size yet */
681 abort();
684 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
685 if (ret != 0) {
686 trace_kvm_failed_spr_set(spr, strerror(errno));
690 static int kvm_put_fp(CPUState *cs)
692 PowerPCCPU *cpu = POWERPC_CPU(cs);
693 CPUPPCState *env = &cpu->env;
694 struct kvm_one_reg reg;
695 int i;
696 int ret;
698 if (env->insns_flags & PPC_FLOAT) {
699 uint64_t fpscr = env->fpscr;
700 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
702 reg.id = KVM_REG_PPC_FPSCR;
703 reg.addr = (uintptr_t)&fpscr;
704 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
705 if (ret < 0) {
706 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
707 return ret;
710 for (i = 0; i < 32; i++) {
711 uint64_t vsr[2];
713 #ifdef HOST_WORDS_BIGENDIAN
714 vsr[0] = float64_val(env->fpr[i]);
715 vsr[1] = env->vsr[i];
716 #else
717 vsr[0] = env->vsr[i];
718 vsr[1] = float64_val(env->fpr[i]);
719 #endif
720 reg.addr = (uintptr_t) &vsr;
721 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
723 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
724 if (ret < 0) {
725 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
726 i, strerror(errno));
727 return ret;
732 if (env->insns_flags & PPC_ALTIVEC) {
733 reg.id = KVM_REG_PPC_VSCR;
734 reg.addr = (uintptr_t)&env->vscr;
735 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
736 if (ret < 0) {
737 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
738 return ret;
741 for (i = 0; i < 32; i++) {
742 reg.id = KVM_REG_PPC_VR(i);
743 reg.addr = (uintptr_t)&env->avr[i];
744 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
745 if (ret < 0) {
746 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
747 return ret;
752 return 0;
755 static int kvm_get_fp(CPUState *cs)
757 PowerPCCPU *cpu = POWERPC_CPU(cs);
758 CPUPPCState *env = &cpu->env;
759 struct kvm_one_reg reg;
760 int i;
761 int ret;
763 if (env->insns_flags & PPC_FLOAT) {
764 uint64_t fpscr;
765 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
767 reg.id = KVM_REG_PPC_FPSCR;
768 reg.addr = (uintptr_t)&fpscr;
769 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
770 if (ret < 0) {
771 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
772 return ret;
773 } else {
774 env->fpscr = fpscr;
777 for (i = 0; i < 32; i++) {
778 uint64_t vsr[2];
780 reg.addr = (uintptr_t) &vsr;
781 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
783 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
784 if (ret < 0) {
785 DPRINTF("Unable to get %s%d from KVM: %s\n",
786 vsx ? "VSR" : "FPR", i, strerror(errno));
787 return ret;
788 } else {
789 #ifdef HOST_WORDS_BIGENDIAN
790 env->fpr[i] = vsr[0];
791 if (vsx) {
792 env->vsr[i] = vsr[1];
794 #else
795 env->fpr[i] = vsr[1];
796 if (vsx) {
797 env->vsr[i] = vsr[0];
799 #endif
804 if (env->insns_flags & PPC_ALTIVEC) {
805 reg.id = KVM_REG_PPC_VSCR;
806 reg.addr = (uintptr_t)&env->vscr;
807 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
808 if (ret < 0) {
809 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
810 return ret;
813 for (i = 0; i < 32; i++) {
814 reg.id = KVM_REG_PPC_VR(i);
815 reg.addr = (uintptr_t)&env->avr[i];
816 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
817 if (ret < 0) {
818 DPRINTF("Unable to get VR%d from KVM: %s\n",
819 i, strerror(errno));
820 return ret;
825 return 0;
828 #if defined(TARGET_PPC64)
829 static int kvm_get_vpa(CPUState *cs)
831 PowerPCCPU *cpu = POWERPC_CPU(cs);
832 sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu);
833 struct kvm_one_reg reg;
834 int ret;
836 reg.id = KVM_REG_PPC_VPA_ADDR;
837 reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
838 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
839 if (ret < 0) {
840 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
841 return ret;
844 assert((uintptr_t)&spapr_cpu->slb_shadow_size
845 == ((uintptr_t)&spapr_cpu->slb_shadow_addr + 8));
846 reg.id = KVM_REG_PPC_VPA_SLB;
847 reg.addr = (uintptr_t)&spapr_cpu->slb_shadow_addr;
848 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
849 if (ret < 0) {
850 DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
851 strerror(errno));
852 return ret;
855 assert((uintptr_t)&spapr_cpu->dtl_size
856 == ((uintptr_t)&spapr_cpu->dtl_addr + 8));
857 reg.id = KVM_REG_PPC_VPA_DTL;
858 reg.addr = (uintptr_t)&spapr_cpu->dtl_addr;
859 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
860 if (ret < 0) {
861 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
862 strerror(errno));
863 return ret;
866 return 0;
869 static int kvm_put_vpa(CPUState *cs)
871 PowerPCCPU *cpu = POWERPC_CPU(cs);
872 sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu);
873 struct kvm_one_reg reg;
874 int ret;
876 /* SLB shadow or DTL can't be registered unless a master VPA is
877 * registered. That means when restoring state, if a VPA *is*
878 * registered, we need to set that up first. If not, we need to
879 * deregister the others before deregistering the master VPA */
880 assert(spapr_cpu->vpa_addr
881 || !(spapr_cpu->slb_shadow_addr || spapr_cpu->dtl_addr));
883 if (spapr_cpu->vpa_addr) {
884 reg.id = KVM_REG_PPC_VPA_ADDR;
885 reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
886 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
887 if (ret < 0) {
888 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
889 return ret;
893 assert((uintptr_t)&spapr_cpu->slb_shadow_size
894 == ((uintptr_t)&spapr_cpu->slb_shadow_addr + 8));
895 reg.id = KVM_REG_PPC_VPA_SLB;
896 reg.addr = (uintptr_t)&spapr_cpu->slb_shadow_addr;
897 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
898 if (ret < 0) {
899 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
900 return ret;
903 assert((uintptr_t)&spapr_cpu->dtl_size
904 == ((uintptr_t)&spapr_cpu->dtl_addr + 8));
905 reg.id = KVM_REG_PPC_VPA_DTL;
906 reg.addr = (uintptr_t)&spapr_cpu->dtl_addr;
907 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
908 if (ret < 0) {
909 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
910 strerror(errno));
911 return ret;
914 if (!spapr_cpu->vpa_addr) {
915 reg.id = KVM_REG_PPC_VPA_ADDR;
916 reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
917 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
918 if (ret < 0) {
919 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
920 return ret;
924 return 0;
926 #endif /* TARGET_PPC64 */
928 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
930 CPUPPCState *env = &cpu->env;
931 struct kvm_sregs sregs;
932 int i;
934 sregs.pvr = env->spr[SPR_PVR];
936 if (cpu->vhyp) {
937 PPCVirtualHypervisorClass *vhc =
938 PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
939 sregs.u.s.sdr1 = vhc->encode_hpt_for_kvm_pr(cpu->vhyp);
940 } else {
941 sregs.u.s.sdr1 = env->spr[SPR_SDR1];
944 /* Sync SLB */
945 #ifdef TARGET_PPC64
946 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
947 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
948 if (env->slb[i].esid & SLB_ESID_V) {
949 sregs.u.s.ppc64.slb[i].slbe |= i;
951 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
953 #endif
955 /* Sync SRs */
956 for (i = 0; i < 16; i++) {
957 sregs.u.s.ppc32.sr[i] = env->sr[i];
960 /* Sync BATs */
961 for (i = 0; i < 8; i++) {
962 /* Beware. We have to swap upper and lower bits here */
963 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
964 | env->DBAT[1][i];
965 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
966 | env->IBAT[1][i];
969 return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
972 int kvm_arch_put_registers(CPUState *cs, int level)
974 PowerPCCPU *cpu = POWERPC_CPU(cs);
975 CPUPPCState *env = &cpu->env;
976 struct kvm_regs regs;
977 int ret;
978 int i;
980 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
981 if (ret < 0) {
982 return ret;
985 regs.ctr = env->ctr;
986 regs.lr = env->lr;
987 regs.xer = cpu_read_xer(env);
988 regs.msr = env->msr;
989 regs.pc = env->nip;
991 regs.srr0 = env->spr[SPR_SRR0];
992 regs.srr1 = env->spr[SPR_SRR1];
994 regs.sprg0 = env->spr[SPR_SPRG0];
995 regs.sprg1 = env->spr[SPR_SPRG1];
996 regs.sprg2 = env->spr[SPR_SPRG2];
997 regs.sprg3 = env->spr[SPR_SPRG3];
998 regs.sprg4 = env->spr[SPR_SPRG4];
999 regs.sprg5 = env->spr[SPR_SPRG5];
1000 regs.sprg6 = env->spr[SPR_SPRG6];
1001 regs.sprg7 = env->spr[SPR_SPRG7];
1003 regs.pid = env->spr[SPR_BOOKE_PID];
1005 for (i = 0;i < 32; i++)
1006 regs.gpr[i] = env->gpr[i];
1008 regs.cr = 0;
1009 for (i = 0; i < 8; i++) {
1010 regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
1013 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
1014 if (ret < 0)
1015 return ret;
1017 kvm_put_fp(cs);
1019 if (env->tlb_dirty) {
1020 kvm_sw_tlb_put(cpu);
1021 env->tlb_dirty = false;
1024 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
1025 ret = kvmppc_put_books_sregs(cpu);
1026 if (ret < 0) {
1027 return ret;
1031 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
1032 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1035 if (cap_one_reg) {
1036 int i;
1038 /* We deliberately ignore errors here, for kernels which have
1039 * the ONE_REG calls, but don't support the specific
1040 * registers, there's a reasonable chance things will still
1041 * work, at least until we try to migrate. */
1042 for (i = 0; i < 1024; i++) {
1043 uint64_t id = env->spr_cb[i].one_reg_id;
1045 if (id != 0) {
1046 kvm_put_one_spr(cs, id, i);
1050 #ifdef TARGET_PPC64
1051 if (msr_ts) {
1052 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1053 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1055 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1056 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1058 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1059 kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1060 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1061 kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1062 kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1063 kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1064 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1065 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1066 kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1067 kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1070 if (cap_papr) {
1071 if (kvm_put_vpa(cs) < 0) {
1072 DPRINTF("Warning: Unable to set VPA information to KVM\n");
1076 kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1077 #endif /* TARGET_PPC64 */
1080 return ret;
1083 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1085 env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1088 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1090 CPUPPCState *env = &cpu->env;
1091 struct kvm_sregs sregs;
1092 int ret;
1094 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1095 if (ret < 0) {
1096 return ret;
1099 if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1100 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1101 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1102 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1103 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1104 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1105 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1106 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1107 env->spr[SPR_DECR] = sregs.u.e.dec;
1108 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1109 env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1110 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1113 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1114 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1115 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1116 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1117 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1118 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1121 if (sregs.u.e.features & KVM_SREGS_E_64) {
1122 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1125 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1126 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1129 if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1130 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1131 kvm_sync_excp(env, POWERPC_EXCP_CRITICAL, SPR_BOOKE_IVOR0);
1132 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1133 kvm_sync_excp(env, POWERPC_EXCP_MCHECK, SPR_BOOKE_IVOR1);
1134 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1135 kvm_sync_excp(env, POWERPC_EXCP_DSI, SPR_BOOKE_IVOR2);
1136 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1137 kvm_sync_excp(env, POWERPC_EXCP_ISI, SPR_BOOKE_IVOR3);
1138 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1139 kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL, SPR_BOOKE_IVOR4);
1140 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1141 kvm_sync_excp(env, POWERPC_EXCP_ALIGN, SPR_BOOKE_IVOR5);
1142 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1143 kvm_sync_excp(env, POWERPC_EXCP_PROGRAM, SPR_BOOKE_IVOR6);
1144 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1145 kvm_sync_excp(env, POWERPC_EXCP_FPU, SPR_BOOKE_IVOR7);
1146 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1147 kvm_sync_excp(env, POWERPC_EXCP_SYSCALL, SPR_BOOKE_IVOR8);
1148 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1149 kvm_sync_excp(env, POWERPC_EXCP_APU, SPR_BOOKE_IVOR9);
1150 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1151 kvm_sync_excp(env, POWERPC_EXCP_DECR, SPR_BOOKE_IVOR10);
1152 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1153 kvm_sync_excp(env, POWERPC_EXCP_FIT, SPR_BOOKE_IVOR11);
1154 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1155 kvm_sync_excp(env, POWERPC_EXCP_WDT, SPR_BOOKE_IVOR12);
1156 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1157 kvm_sync_excp(env, POWERPC_EXCP_DTLB, SPR_BOOKE_IVOR13);
1158 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1159 kvm_sync_excp(env, POWERPC_EXCP_ITLB, SPR_BOOKE_IVOR14);
1160 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1161 kvm_sync_excp(env, POWERPC_EXCP_DEBUG, SPR_BOOKE_IVOR15);
1163 if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1164 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1165 kvm_sync_excp(env, POWERPC_EXCP_SPEU, SPR_BOOKE_IVOR32);
1166 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1167 kvm_sync_excp(env, POWERPC_EXCP_EFPDI, SPR_BOOKE_IVOR33);
1168 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1169 kvm_sync_excp(env, POWERPC_EXCP_EFPRI, SPR_BOOKE_IVOR34);
1172 if (sregs.u.e.features & KVM_SREGS_E_PM) {
1173 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1174 kvm_sync_excp(env, POWERPC_EXCP_EPERFM, SPR_BOOKE_IVOR35);
1177 if (sregs.u.e.features & KVM_SREGS_E_PC) {
1178 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1179 kvm_sync_excp(env, POWERPC_EXCP_DOORI, SPR_BOOKE_IVOR36);
1180 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1181 kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1185 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1186 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1187 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1188 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1189 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1190 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1191 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1192 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1193 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1194 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1195 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1198 if (sregs.u.e.features & KVM_SREGS_EXP) {
1199 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1202 if (sregs.u.e.features & KVM_SREGS_E_PD) {
1203 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1204 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1207 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1208 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1209 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1210 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1212 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1213 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1214 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1218 return 0;
1221 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1223 CPUPPCState *env = &cpu->env;
1224 struct kvm_sregs sregs;
1225 int ret;
1226 int i;
1228 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1229 if (ret < 0) {
1230 return ret;
1233 if (!cpu->vhyp) {
1234 ppc_store_sdr1(env, sregs.u.s.sdr1);
1237 /* Sync SLB */
1238 #ifdef TARGET_PPC64
1240 * The packed SLB array we get from KVM_GET_SREGS only contains
1241 * information about valid entries. So we flush our internal copy
1242 * to get rid of stale ones, then put all valid SLB entries back
1243 * in.
1245 memset(env->slb, 0, sizeof(env->slb));
1246 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1247 target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1248 target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1250 * Only restore valid entries
1252 if (rb & SLB_ESID_V) {
1253 ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1256 #endif
1258 /* Sync SRs */
1259 for (i = 0; i < 16; i++) {
1260 env->sr[i] = sregs.u.s.ppc32.sr[i];
1263 /* Sync BATs */
1264 for (i = 0; i < 8; i++) {
1265 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1266 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1267 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1268 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1271 return 0;
1274 int kvm_arch_get_registers(CPUState *cs)
1276 PowerPCCPU *cpu = POWERPC_CPU(cs);
1277 CPUPPCState *env = &cpu->env;
1278 struct kvm_regs regs;
1279 uint32_t cr;
1280 int i, ret;
1282 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1283 if (ret < 0)
1284 return ret;
1286 cr = regs.cr;
1287 for (i = 7; i >= 0; i--) {
1288 env->crf[i] = cr & 15;
1289 cr >>= 4;
1292 env->ctr = regs.ctr;
1293 env->lr = regs.lr;
1294 cpu_write_xer(env, regs.xer);
1295 env->msr = regs.msr;
1296 env->nip = regs.pc;
1298 env->spr[SPR_SRR0] = regs.srr0;
1299 env->spr[SPR_SRR1] = regs.srr1;
1301 env->spr[SPR_SPRG0] = regs.sprg0;
1302 env->spr[SPR_SPRG1] = regs.sprg1;
1303 env->spr[SPR_SPRG2] = regs.sprg2;
1304 env->spr[SPR_SPRG3] = regs.sprg3;
1305 env->spr[SPR_SPRG4] = regs.sprg4;
1306 env->spr[SPR_SPRG5] = regs.sprg5;
1307 env->spr[SPR_SPRG6] = regs.sprg6;
1308 env->spr[SPR_SPRG7] = regs.sprg7;
1310 env->spr[SPR_BOOKE_PID] = regs.pid;
1312 for (i = 0;i < 32; i++)
1313 env->gpr[i] = regs.gpr[i];
1315 kvm_get_fp(cs);
1317 if (cap_booke_sregs) {
1318 ret = kvmppc_get_booke_sregs(cpu);
1319 if (ret < 0) {
1320 return ret;
1324 if (cap_segstate) {
1325 ret = kvmppc_get_books_sregs(cpu);
1326 if (ret < 0) {
1327 return ret;
1331 if (cap_hior) {
1332 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1335 if (cap_one_reg) {
1336 int i;
1338 /* We deliberately ignore errors here, for kernels which have
1339 * the ONE_REG calls, but don't support the specific
1340 * registers, there's a reasonable chance things will still
1341 * work, at least until we try to migrate. */
1342 for (i = 0; i < 1024; i++) {
1343 uint64_t id = env->spr_cb[i].one_reg_id;
1345 if (id != 0) {
1346 kvm_get_one_spr(cs, id, i);
1350 #ifdef TARGET_PPC64
1351 if (msr_ts) {
1352 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1353 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1355 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1356 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1358 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1359 kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1360 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1361 kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1362 kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1363 kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1364 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1365 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1366 kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1367 kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1370 if (cap_papr) {
1371 if (kvm_get_vpa(cs) < 0) {
1372 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1376 kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1377 #endif
1380 return 0;
1383 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1385 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1387 if (irq != PPC_INTERRUPT_EXT) {
1388 return 0;
1391 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1392 return 0;
1395 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1397 return 0;
1400 #if defined(TARGET_PPCEMB)
1401 #define PPC_INPUT_INT PPC40x_INPUT_INT
1402 #elif defined(TARGET_PPC64)
1403 #define PPC_INPUT_INT PPC970_INPUT_INT
1404 #else
1405 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1406 #endif
1408 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1410 PowerPCCPU *cpu = POWERPC_CPU(cs);
1411 CPUPPCState *env = &cpu->env;
1412 int r;
1413 unsigned irq;
1415 qemu_mutex_lock_iothread();
1417 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1418 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1419 if (!cap_interrupt_level &&
1420 run->ready_for_interrupt_injection &&
1421 (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1422 (env->irq_input_state & (1<<PPC_INPUT_INT)))
1424 /* For now KVM disregards the 'irq' argument. However, in the
1425 * future KVM could cache it in-kernel to avoid a heavyweight exit
1426 * when reading the UIC.
1428 irq = KVM_INTERRUPT_SET;
1430 DPRINTF("injected interrupt %d\n", irq);
1431 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1432 if (r < 0) {
1433 printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1436 /* Always wake up soon in case the interrupt was level based */
1437 timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1438 (NANOSECONDS_PER_SECOND / 50));
1441 /* We don't know if there are more interrupts pending after this. However,
1442 * the guest will return to userspace in the course of handling this one
1443 * anyways, so we will get a chance to deliver the rest. */
1445 qemu_mutex_unlock_iothread();
1448 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1450 return MEMTXATTRS_UNSPECIFIED;
1453 int kvm_arch_process_async_events(CPUState *cs)
1455 return cs->halted;
1458 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1460 CPUState *cs = CPU(cpu);
1461 CPUPPCState *env = &cpu->env;
1463 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1464 cs->halted = 1;
1465 cs->exception_index = EXCP_HLT;
1468 return 0;
1471 /* map dcr access to existing qemu dcr emulation */
1472 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1474 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1475 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1477 return 0;
1480 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1482 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1483 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1485 return 0;
1488 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1490 /* Mixed endian case is not handled */
1491 uint32_t sc = debug_inst_opcode;
1493 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1494 sizeof(sc), 0) ||
1495 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1496 return -EINVAL;
1499 return 0;
1502 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1504 uint32_t sc;
1506 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1507 sc != debug_inst_opcode ||
1508 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1509 sizeof(sc), 1)) {
1510 return -EINVAL;
1513 return 0;
1516 static int find_hw_breakpoint(target_ulong addr, int type)
1518 int n;
1520 assert((nb_hw_breakpoint + nb_hw_watchpoint)
1521 <= ARRAY_SIZE(hw_debug_points));
1523 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1524 if (hw_debug_points[n].addr == addr &&
1525 hw_debug_points[n].type == type) {
1526 return n;
1530 return -1;
1533 static int find_hw_watchpoint(target_ulong addr, int *flag)
1535 int n;
1537 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1538 if (n >= 0) {
1539 *flag = BP_MEM_ACCESS;
1540 return n;
1543 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1544 if (n >= 0) {
1545 *flag = BP_MEM_WRITE;
1546 return n;
1549 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1550 if (n >= 0) {
1551 *flag = BP_MEM_READ;
1552 return n;
1555 return -1;
1558 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1559 target_ulong len, int type)
1561 if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1562 return -ENOBUFS;
1565 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1566 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1568 switch (type) {
1569 case GDB_BREAKPOINT_HW:
1570 if (nb_hw_breakpoint >= max_hw_breakpoint) {
1571 return -ENOBUFS;
1574 if (find_hw_breakpoint(addr, type) >= 0) {
1575 return -EEXIST;
1578 nb_hw_breakpoint++;
1579 break;
1581 case GDB_WATCHPOINT_WRITE:
1582 case GDB_WATCHPOINT_READ:
1583 case GDB_WATCHPOINT_ACCESS:
1584 if (nb_hw_watchpoint >= max_hw_watchpoint) {
1585 return -ENOBUFS;
1588 if (find_hw_breakpoint(addr, type) >= 0) {
1589 return -EEXIST;
1592 nb_hw_watchpoint++;
1593 break;
1595 default:
1596 return -ENOSYS;
1599 return 0;
1602 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1603 target_ulong len, int type)
1605 int n;
1607 n = find_hw_breakpoint(addr, type);
1608 if (n < 0) {
1609 return -ENOENT;
1612 switch (type) {
1613 case GDB_BREAKPOINT_HW:
1614 nb_hw_breakpoint--;
1615 break;
1617 case GDB_WATCHPOINT_WRITE:
1618 case GDB_WATCHPOINT_READ:
1619 case GDB_WATCHPOINT_ACCESS:
1620 nb_hw_watchpoint--;
1621 break;
1623 default:
1624 return -ENOSYS;
1626 hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1628 return 0;
1631 void kvm_arch_remove_all_hw_breakpoints(void)
1633 nb_hw_breakpoint = nb_hw_watchpoint = 0;
1636 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1638 int n;
1640 /* Software Breakpoint updates */
1641 if (kvm_sw_breakpoints_active(cs)) {
1642 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1645 assert((nb_hw_breakpoint + nb_hw_watchpoint)
1646 <= ARRAY_SIZE(hw_debug_points));
1647 assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1649 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1650 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1651 memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1652 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1653 switch (hw_debug_points[n].type) {
1654 case GDB_BREAKPOINT_HW:
1655 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1656 break;
1657 case GDB_WATCHPOINT_WRITE:
1658 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1659 break;
1660 case GDB_WATCHPOINT_READ:
1661 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1662 break;
1663 case GDB_WATCHPOINT_ACCESS:
1664 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1665 KVMPPC_DEBUG_WATCH_READ;
1666 break;
1667 default:
1668 cpu_abort(cs, "Unsupported breakpoint type\n");
1670 dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1675 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1677 CPUState *cs = CPU(cpu);
1678 CPUPPCState *env = &cpu->env;
1679 struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1680 int handle = 0;
1681 int n;
1682 int flag = 0;
1684 if (cs->singlestep_enabled) {
1685 handle = 1;
1686 } else if (arch_info->status) {
1687 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1688 if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1689 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1690 if (n >= 0) {
1691 handle = 1;
1693 } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1694 KVMPPC_DEBUG_WATCH_WRITE)) {
1695 n = find_hw_watchpoint(arch_info->address, &flag);
1696 if (n >= 0) {
1697 handle = 1;
1698 cs->watchpoint_hit = &hw_watchpoint;
1699 hw_watchpoint.vaddr = hw_debug_points[n].addr;
1700 hw_watchpoint.flags = flag;
1704 } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1705 handle = 1;
1706 } else {
1707 /* QEMU is not able to handle debug exception, so inject
1708 * program exception to guest;
1709 * Yes program exception NOT debug exception !!
1710 * When QEMU is using debug resources then debug exception must
1711 * be always set. To achieve this we set MSR_DE and also set
1712 * MSRP_DEP so guest cannot change MSR_DE.
1713 * When emulating debug resource for guest we want guest
1714 * to control MSR_DE (enable/disable debug interrupt on need).
1715 * Supporting both configurations are NOT possible.
1716 * So the result is that we cannot share debug resources
1717 * between QEMU and Guest on BOOKE architecture.
1718 * In the current design QEMU gets the priority over guest,
1719 * this means that if QEMU is using debug resources then guest
1720 * cannot use them;
1721 * For software breakpoint QEMU uses a privileged instruction;
1722 * So there cannot be any reason that we are here for guest
1723 * set debug exception, only possibility is guest executed a
1724 * privileged / illegal instruction and that's why we are
1725 * injecting a program interrupt.
1728 cpu_synchronize_state(cs);
1729 /* env->nip is PC, so increment this by 4 to use
1730 * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1732 env->nip += 4;
1733 cs->exception_index = POWERPC_EXCP_PROGRAM;
1734 env->error_code = POWERPC_EXCP_INVAL;
1735 ppc_cpu_do_interrupt(cs);
1738 return handle;
1741 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1743 PowerPCCPU *cpu = POWERPC_CPU(cs);
1744 CPUPPCState *env = &cpu->env;
1745 int ret;
1747 qemu_mutex_lock_iothread();
1749 switch (run->exit_reason) {
1750 case KVM_EXIT_DCR:
1751 if (run->dcr.is_write) {
1752 DPRINTF("handle dcr write\n");
1753 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1754 } else {
1755 DPRINTF("handle dcr read\n");
1756 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1758 break;
1759 case KVM_EXIT_HLT:
1760 DPRINTF("handle halt\n");
1761 ret = kvmppc_handle_halt(cpu);
1762 break;
1763 #if defined(TARGET_PPC64)
1764 case KVM_EXIT_PAPR_HCALL:
1765 DPRINTF("handle PAPR hypercall\n");
1766 run->papr_hcall.ret = spapr_hypercall(cpu,
1767 run->papr_hcall.nr,
1768 run->papr_hcall.args);
1769 ret = 0;
1770 break;
1771 #endif
1772 case KVM_EXIT_EPR:
1773 DPRINTF("handle epr\n");
1774 run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1775 ret = 0;
1776 break;
1777 case KVM_EXIT_WATCHDOG:
1778 DPRINTF("handle watchdog expiry\n");
1779 watchdog_perform_action();
1780 ret = 0;
1781 break;
1783 case KVM_EXIT_DEBUG:
1784 DPRINTF("handle debug exception\n");
1785 if (kvm_handle_debug(cpu, run)) {
1786 ret = EXCP_DEBUG;
1787 break;
1789 /* re-enter, this exception was guest-internal */
1790 ret = 0;
1791 break;
1793 default:
1794 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1795 ret = -1;
1796 break;
1799 qemu_mutex_unlock_iothread();
1800 return ret;
1803 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1805 CPUState *cs = CPU(cpu);
1806 uint32_t bits = tsr_bits;
1807 struct kvm_one_reg reg = {
1808 .id = KVM_REG_PPC_OR_TSR,
1809 .addr = (uintptr_t) &bits,
1812 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1815 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1818 CPUState *cs = CPU(cpu);
1819 uint32_t bits = tsr_bits;
1820 struct kvm_one_reg reg = {
1821 .id = KVM_REG_PPC_CLEAR_TSR,
1822 .addr = (uintptr_t) &bits,
1825 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1828 int kvmppc_set_tcr(PowerPCCPU *cpu)
1830 CPUState *cs = CPU(cpu);
1831 CPUPPCState *env = &cpu->env;
1832 uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1834 struct kvm_one_reg reg = {
1835 .id = KVM_REG_PPC_TCR,
1836 .addr = (uintptr_t) &tcr,
1839 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1842 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1844 CPUState *cs = CPU(cpu);
1845 int ret;
1847 if (!kvm_enabled()) {
1848 return -1;
1851 if (!cap_ppc_watchdog) {
1852 printf("warning: KVM does not support watchdog");
1853 return -1;
1856 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1857 if (ret < 0) {
1858 fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1859 __func__, strerror(-ret));
1860 return ret;
1863 return ret;
1866 static int read_cpuinfo(const char *field, char *value, int len)
1868 FILE *f;
1869 int ret = -1;
1870 int field_len = strlen(field);
1871 char line[512];
1873 f = fopen("/proc/cpuinfo", "r");
1874 if (!f) {
1875 return -1;
1878 do {
1879 if (!fgets(line, sizeof(line), f)) {
1880 break;
1882 if (!strncmp(line, field, field_len)) {
1883 pstrcpy(value, len, line);
1884 ret = 0;
1885 break;
1887 } while(*line);
1889 fclose(f);
1891 return ret;
1894 uint32_t kvmppc_get_tbfreq(void)
1896 char line[512];
1897 char *ns;
1898 uint32_t retval = NANOSECONDS_PER_SECOND;
1900 if (read_cpuinfo("timebase", line, sizeof(line))) {
1901 return retval;
1904 if (!(ns = strchr(line, ':'))) {
1905 return retval;
1908 ns++;
1910 return atoi(ns);
1913 bool kvmppc_get_host_serial(char **value)
1915 return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1916 NULL);
1919 bool kvmppc_get_host_model(char **value)
1921 return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1924 /* Try to find a device tree node for a CPU with clock-frequency property */
1925 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1927 struct dirent *dirp;
1928 DIR *dp;
1930 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1931 printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1932 return -1;
1935 buf[0] = '\0';
1936 while ((dirp = readdir(dp)) != NULL) {
1937 FILE *f;
1938 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1939 dirp->d_name);
1940 f = fopen(buf, "r");
1941 if (f) {
1942 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1943 fclose(f);
1944 break;
1946 buf[0] = '\0';
1948 closedir(dp);
1949 if (buf[0] == '\0') {
1950 printf("Unknown host!\n");
1951 return -1;
1954 return 0;
1957 static uint64_t kvmppc_read_int_dt(const char *filename)
1959 union {
1960 uint32_t v32;
1961 uint64_t v64;
1962 } u;
1963 FILE *f;
1964 int len;
1966 f = fopen(filename, "rb");
1967 if (!f) {
1968 return -1;
1971 len = fread(&u, 1, sizeof(u), f);
1972 fclose(f);
1973 switch (len) {
1974 case 4:
1975 /* property is a 32-bit quantity */
1976 return be32_to_cpu(u.v32);
1977 case 8:
1978 return be64_to_cpu(u.v64);
1981 return 0;
1984 /* Read a CPU node property from the host device tree that's a single
1985 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1986 * (can't find or open the property, or doesn't understand the
1987 * format) */
1988 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1990 char buf[PATH_MAX], *tmp;
1991 uint64_t val;
1993 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1994 return -1;
1997 tmp = g_strdup_printf("%s/%s", buf, propname);
1998 val = kvmppc_read_int_dt(tmp);
1999 g_free(tmp);
2001 return val;
2004 uint64_t kvmppc_get_clockfreq(void)
2006 return kvmppc_read_int_cpu_dt("clock-frequency");
2009 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
2011 PowerPCCPU *cpu = ppc_env_get_cpu(env);
2012 CPUState *cs = CPU(cpu);
2014 if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
2015 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
2016 return 0;
2019 return 1;
2022 int kvmppc_get_hasidle(CPUPPCState *env)
2024 struct kvm_ppc_pvinfo pvinfo;
2026 if (!kvmppc_get_pvinfo(env, &pvinfo) &&
2027 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
2028 return 1;
2031 return 0;
2034 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
2036 uint32_t *hc = (uint32_t*)buf;
2037 struct kvm_ppc_pvinfo pvinfo;
2039 if (!kvmppc_get_pvinfo(env, &pvinfo)) {
2040 memcpy(buf, pvinfo.hcall, buf_len);
2041 return 0;
2045 * Fallback to always fail hypercalls regardless of endianness:
2047 * tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
2048 * li r3, -1
2049 * b .+8 (becomes nop in wrong endian)
2050 * bswap32(li r3, -1)
2053 hc[0] = cpu_to_be32(0x08000048);
2054 hc[1] = cpu_to_be32(0x3860ffff);
2055 hc[2] = cpu_to_be32(0x48000008);
2056 hc[3] = cpu_to_be32(bswap32(0x3860ffff));
2058 return 1;
2061 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
2063 return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2066 void kvmppc_enable_logical_ci_hcalls(void)
2069 * FIXME: it would be nice if we could detect the cases where
2070 * we're using a device which requires the in kernel
2071 * implementation of these hcalls, but the kernel lacks them and
2072 * produce a warning.
2074 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2075 kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2078 void kvmppc_enable_set_mode_hcall(void)
2080 kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2083 void kvmppc_enable_clear_ref_mod_hcalls(void)
2085 kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
2086 kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
2089 void kvmppc_set_papr(PowerPCCPU *cpu)
2091 CPUState *cs = CPU(cpu);
2092 int ret;
2094 if (!kvm_enabled()) {
2095 return;
2098 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2099 if (ret) {
2100 error_report("This vCPU type or KVM version does not support PAPR");
2101 exit(1);
2104 /* Update the capability flag so we sync the right information
2105 * with kvm */
2106 cap_papr = 1;
2109 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr)
2111 return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr);
2114 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2116 CPUState *cs = CPU(cpu);
2117 int ret;
2119 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2120 if (ret && mpic_proxy) {
2121 error_report("This KVM version does not support EPR");
2122 exit(1);
2126 int kvmppc_smt_threads(void)
2128 return cap_ppc_smt ? cap_ppc_smt : 1;
2131 int kvmppc_set_smt_threads(int smt)
2133 int ret;
2135 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_SMT, 0, smt, 0);
2136 if (!ret) {
2137 cap_ppc_smt = smt;
2139 return ret;
2142 void kvmppc_hint_smt_possible(Error **errp)
2144 int i;
2145 GString *g;
2146 char *s;
2148 assert(kvm_enabled());
2149 if (cap_ppc_smt_possible) {
2150 g = g_string_new("Available VSMT modes:");
2151 for (i = 63; i >= 0; i--) {
2152 if ((1UL << i) & cap_ppc_smt_possible) {
2153 g_string_append_printf(g, " %lu", (1UL << i));
2156 s = g_string_free(g, false);
2157 error_append_hint(errp, "%s.\n", s);
2158 g_free(s);
2159 } else {
2160 error_append_hint(errp,
2161 "This KVM seems to be too old to support VSMT.\n");
2166 #ifdef TARGET_PPC64
2167 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2169 struct kvm_ppc_smmu_info info;
2170 long rampagesize, best_page_shift;
2171 int i;
2173 /* Find the largest hardware supported page size that's less than
2174 * or equal to the (logical) backing page size of guest RAM */
2175 kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2176 rampagesize = qemu_getrampagesize();
2177 best_page_shift = 0;
2179 for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2180 struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2182 if (!sps->page_shift) {
2183 continue;
2186 if ((sps->page_shift > best_page_shift)
2187 && ((1UL << sps->page_shift) <= rampagesize)) {
2188 best_page_shift = sps->page_shift;
2192 return MIN(current_size,
2193 1ULL << (best_page_shift + hash_shift - 7));
2195 #endif
2197 bool kvmppc_spapr_use_multitce(void)
2199 return cap_spapr_multitce;
2202 int kvmppc_spapr_enable_inkernel_multitce(void)
2204 int ret;
2206 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2207 H_PUT_TCE_INDIRECT, 1);
2208 if (!ret) {
2209 ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2210 H_STUFF_TCE, 1);
2213 return ret;
2216 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
2217 uint64_t bus_offset, uint32_t nb_table,
2218 int *pfd, bool need_vfio)
2220 long len;
2221 int fd;
2222 void *table;
2224 /* Must set fd to -1 so we don't try to munmap when called for
2225 * destroying the table, which the upper layers -will- do
2227 *pfd = -1;
2228 if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2229 return NULL;
2232 if (cap_spapr_tce_64) {
2233 struct kvm_create_spapr_tce_64 args = {
2234 .liobn = liobn,
2235 .page_shift = page_shift,
2236 .offset = bus_offset >> page_shift,
2237 .size = nb_table,
2238 .flags = 0
2240 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args);
2241 if (fd < 0) {
2242 fprintf(stderr,
2243 "KVM: Failed to create TCE64 table for liobn 0x%x\n",
2244 liobn);
2245 return NULL;
2247 } else if (cap_spapr_tce) {
2248 uint64_t window_size = (uint64_t) nb_table << page_shift;
2249 struct kvm_create_spapr_tce args = {
2250 .liobn = liobn,
2251 .window_size = window_size,
2253 if ((window_size != args.window_size) || bus_offset) {
2254 return NULL;
2256 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2257 if (fd < 0) {
2258 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2259 liobn);
2260 return NULL;
2262 } else {
2263 return NULL;
2266 len = nb_table * sizeof(uint64_t);
2267 /* FIXME: round this up to page size */
2269 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2270 if (table == MAP_FAILED) {
2271 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2272 liobn);
2273 close(fd);
2274 return NULL;
2277 *pfd = fd;
2278 return table;
2281 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2283 long len;
2285 if (fd < 0) {
2286 return -1;
2289 len = nb_table * sizeof(uint64_t);
2290 if ((munmap(table, len) < 0) ||
2291 (close(fd) < 0)) {
2292 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2293 strerror(errno));
2294 /* Leak the table */
2297 return 0;
2300 int kvmppc_reset_htab(int shift_hint)
2302 uint32_t shift = shift_hint;
2304 if (!kvm_enabled()) {
2305 /* Full emulation, tell caller to allocate htab itself */
2306 return 0;
2308 if (kvm_vm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2309 int ret;
2310 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2311 if (ret == -ENOTTY) {
2312 /* At least some versions of PR KVM advertise the
2313 * capability, but don't implement the ioctl(). Oops.
2314 * Return 0 so that we allocate the htab in qemu, as is
2315 * correct for PR. */
2316 return 0;
2317 } else if (ret < 0) {
2318 return ret;
2320 return shift;
2323 /* We have a kernel that predates the htab reset calls. For PR
2324 * KVM, we need to allocate the htab ourselves, for an HV KVM of
2325 * this era, it has allocated a 16MB fixed size hash table already. */
2326 if (kvmppc_is_pr(kvm_state)) {
2327 /* PR - tell caller to allocate htab */
2328 return 0;
2329 } else {
2330 /* HV - assume 16MB kernel allocated htab */
2331 return 24;
2335 static inline uint32_t mfpvr(void)
2337 uint32_t pvr;
2339 asm ("mfpvr %0"
2340 : "=r"(pvr));
2341 return pvr;
2344 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2346 if (on) {
2347 *word |= flags;
2348 } else {
2349 *word &= ~flags;
2353 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2355 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2356 uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2357 uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2359 /* Now fix up the class with information we can query from the host */
2360 pcc->pvr = mfpvr();
2362 alter_insns(&pcc->insns_flags, PPC_ALTIVEC,
2363 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_ALTIVEC);
2364 alter_insns(&pcc->insns_flags2, PPC2_VSX,
2365 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_VSX);
2366 alter_insns(&pcc->insns_flags2, PPC2_DFP,
2367 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_DFP);
2369 if (dcache_size != -1) {
2370 pcc->l1_dcache_size = dcache_size;
2373 if (icache_size != -1) {
2374 pcc->l1_icache_size = icache_size;
2377 #if defined(TARGET_PPC64)
2378 pcc->radix_page_info = kvm_get_radix_page_info();
2380 if ((pcc->pvr & 0xffffff00) == CPU_POWERPC_POWER9_DD1) {
2382 * POWER9 DD1 has some bugs which make it not really ISA 3.00
2383 * compliant. More importantly, advertising ISA 3.00
2384 * architected mode may prevent guests from activating
2385 * necessary DD1 workarounds.
2387 pcc->pcr_supported &= ~(PCR_COMPAT_3_00 | PCR_COMPAT_2_07
2388 | PCR_COMPAT_2_06 | PCR_COMPAT_2_05);
2390 #endif /* defined(TARGET_PPC64) */
2393 bool kvmppc_has_cap_epr(void)
2395 return cap_epr;
2398 bool kvmppc_has_cap_fixup_hcalls(void)
2400 return cap_fixup_hcalls;
2403 bool kvmppc_has_cap_htm(void)
2405 return cap_htm;
2408 bool kvmppc_has_cap_mmu_radix(void)
2410 return cap_mmu_radix;
2413 bool kvmppc_has_cap_mmu_hash_v3(void)
2415 return cap_mmu_hash_v3;
2418 static bool kvmppc_power8_host(void)
2420 bool ret = false;
2421 #ifdef TARGET_PPC64
2423 uint32_t base_pvr = CPU_POWERPC_POWER_SERVER_MASK & mfpvr();
2424 ret = (base_pvr == CPU_POWERPC_POWER8E_BASE) ||
2425 (base_pvr == CPU_POWERPC_POWER8NVL_BASE) ||
2426 (base_pvr == CPU_POWERPC_POWER8_BASE);
2428 #endif /* TARGET_PPC64 */
2429 return ret;
2432 static int parse_cap_ppc_safe_cache(struct kvm_ppc_cpu_char c)
2434 bool l1d_thread_priv_req = !kvmppc_power8_host();
2436 if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_L1D_FLUSH_PR) {
2437 return 2;
2438 } else if ((!l1d_thread_priv_req ||
2439 c.character & c.character_mask & H_CPU_CHAR_L1D_THREAD_PRIV) &&
2440 (c.character & c.character_mask
2441 & (H_CPU_CHAR_L1D_FLUSH_ORI30 | H_CPU_CHAR_L1D_FLUSH_TRIG2))) {
2442 return 1;
2445 return 0;
2448 static int parse_cap_ppc_safe_bounds_check(struct kvm_ppc_cpu_char c)
2450 if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_BNDS_CHK_SPEC_BAR) {
2451 return 2;
2452 } else if (c.character & c.character_mask & H_CPU_CHAR_SPEC_BAR_ORI31) {
2453 return 1;
2456 return 0;
2459 static int parse_cap_ppc_safe_indirect_branch(struct kvm_ppc_cpu_char c)
2461 if (c.character & c.character_mask & H_CPU_CHAR_CACHE_COUNT_DIS) {
2462 return SPAPR_CAP_FIXED_CCD;
2463 } else if (c.character & c.character_mask & H_CPU_CHAR_BCCTRL_SERIALISED) {
2464 return SPAPR_CAP_FIXED_IBS;
2467 return 0;
2470 static void kvmppc_get_cpu_characteristics(KVMState *s)
2472 struct kvm_ppc_cpu_char c;
2473 int ret;
2475 /* Assume broken */
2476 cap_ppc_safe_cache = 0;
2477 cap_ppc_safe_bounds_check = 0;
2478 cap_ppc_safe_indirect_branch = 0;
2480 ret = kvm_vm_check_extension(s, KVM_CAP_PPC_GET_CPU_CHAR);
2481 if (!ret) {
2482 return;
2484 ret = kvm_vm_ioctl(s, KVM_PPC_GET_CPU_CHAR, &c);
2485 if (ret < 0) {
2486 return;
2489 cap_ppc_safe_cache = parse_cap_ppc_safe_cache(c);
2490 cap_ppc_safe_bounds_check = parse_cap_ppc_safe_bounds_check(c);
2491 cap_ppc_safe_indirect_branch = parse_cap_ppc_safe_indirect_branch(c);
2494 int kvmppc_get_cap_safe_cache(void)
2496 return cap_ppc_safe_cache;
2499 int kvmppc_get_cap_safe_bounds_check(void)
2501 return cap_ppc_safe_bounds_check;
2504 int kvmppc_get_cap_safe_indirect_branch(void)
2506 return cap_ppc_safe_indirect_branch;
2509 bool kvmppc_has_cap_spapr_vfio(void)
2511 return cap_spapr_vfio;
2514 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2516 uint32_t host_pvr = mfpvr();
2517 PowerPCCPUClass *pvr_pcc;
2519 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2520 if (pvr_pcc == NULL) {
2521 pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2524 return pvr_pcc;
2527 static int kvm_ppc_register_host_cpu_type(MachineState *ms)
2529 TypeInfo type_info = {
2530 .name = TYPE_HOST_POWERPC_CPU,
2531 .class_init = kvmppc_host_cpu_class_init,
2533 MachineClass *mc = MACHINE_GET_CLASS(ms);
2534 PowerPCCPUClass *pvr_pcc;
2535 ObjectClass *oc;
2536 DeviceClass *dc;
2537 int i;
2539 pvr_pcc = kvm_ppc_get_host_cpu_class();
2540 if (pvr_pcc == NULL) {
2541 return -1;
2543 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2544 type_register(&type_info);
2545 if (object_dynamic_cast(OBJECT(ms), TYPE_SPAPR_MACHINE)) {
2546 /* override TCG default cpu type with 'host' cpu model */
2547 mc->default_cpu_type = TYPE_HOST_POWERPC_CPU;
2550 oc = object_class_by_name(type_info.name);
2551 g_assert(oc);
2554 * Update generic CPU family class alias (e.g. on a POWER8NVL host,
2555 * we want "POWER8" to be a "family" alias that points to the current
2556 * host CPU type, too)
2558 dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc));
2559 for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) {
2560 if (strcasecmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) {
2561 char *suffix;
2563 ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc));
2564 suffix = strstr(ppc_cpu_aliases[i].model, POWERPC_CPU_TYPE_SUFFIX);
2565 if (suffix) {
2566 *suffix = 0;
2568 break;
2572 return 0;
2575 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2577 struct kvm_rtas_token_args args = {
2578 .token = token,
2581 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2582 return -ENOENT;
2585 strncpy(args.name, function, sizeof(args.name));
2587 return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2590 int kvmppc_get_htab_fd(bool write, uint64_t index, Error **errp)
2592 struct kvm_get_htab_fd s = {
2593 .flags = write ? KVM_GET_HTAB_WRITE : 0,
2594 .start_index = index,
2596 int ret;
2598 if (!cap_htab_fd) {
2599 error_setg(errp, "KVM version doesn't support %s the HPT",
2600 write ? "writing" : "reading");
2601 return -ENOTSUP;
2604 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2605 if (ret < 0) {
2606 error_setg(errp, "Unable to open fd for %s HPT %s KVM: %s",
2607 write ? "writing" : "reading", write ? "to" : "from",
2608 strerror(errno));
2609 return -errno;
2612 return ret;
2615 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2617 int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2618 uint8_t buf[bufsize];
2619 ssize_t rc;
2621 do {
2622 rc = read(fd, buf, bufsize);
2623 if (rc < 0) {
2624 fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2625 strerror(errno));
2626 return rc;
2627 } else if (rc) {
2628 uint8_t *buffer = buf;
2629 ssize_t n = rc;
2630 while (n) {
2631 struct kvm_get_htab_header *head =
2632 (struct kvm_get_htab_header *) buffer;
2633 size_t chunksize = sizeof(*head) +
2634 HASH_PTE_SIZE_64 * head->n_valid;
2636 qemu_put_be32(f, head->index);
2637 qemu_put_be16(f, head->n_valid);
2638 qemu_put_be16(f, head->n_invalid);
2639 qemu_put_buffer(f, (void *)(head + 1),
2640 HASH_PTE_SIZE_64 * head->n_valid);
2642 buffer += chunksize;
2643 n -= chunksize;
2646 } while ((rc != 0)
2647 && ((max_ns < 0)
2648 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2650 return (rc == 0) ? 1 : 0;
2653 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2654 uint16_t n_valid, uint16_t n_invalid)
2656 struct kvm_get_htab_header *buf;
2657 size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2658 ssize_t rc;
2660 buf = alloca(chunksize);
2661 buf->index = index;
2662 buf->n_valid = n_valid;
2663 buf->n_invalid = n_invalid;
2665 qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2667 rc = write(fd, buf, chunksize);
2668 if (rc < 0) {
2669 fprintf(stderr, "Error writing KVM hash table: %s\n",
2670 strerror(errno));
2671 return rc;
2673 if (rc != chunksize) {
2674 /* We should never get a short write on a single chunk */
2675 fprintf(stderr, "Short write, restoring KVM hash table\n");
2676 return -1;
2678 return 0;
2681 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2683 return true;
2686 void kvm_arch_init_irq_routing(KVMState *s)
2690 void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n)
2692 int fd, rc;
2693 int i;
2695 fd = kvmppc_get_htab_fd(false, ptex, &error_abort);
2697 i = 0;
2698 while (i < n) {
2699 struct kvm_get_htab_header *hdr;
2700 int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP;
2701 char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64];
2703 rc = read(fd, buf, sizeof(buf));
2704 if (rc < 0) {
2705 hw_error("kvmppc_read_hptes: Unable to read HPTEs");
2708 hdr = (struct kvm_get_htab_header *)buf;
2709 while ((i < n) && ((char *)hdr < (buf + rc))) {
2710 int invalid = hdr->n_invalid, valid = hdr->n_valid;
2712 if (hdr->index != (ptex + i)) {
2713 hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32
2714 " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i);
2717 if (n - i < valid) {
2718 valid = n - i;
2720 memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * valid);
2721 i += valid;
2723 if ((n - i) < invalid) {
2724 invalid = n - i;
2726 memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64);
2727 i += invalid;
2729 hdr = (struct kvm_get_htab_header *)
2730 ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid);
2734 close(fd);
2737 void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1)
2739 int fd, rc;
2740 struct {
2741 struct kvm_get_htab_header hdr;
2742 uint64_t pte0;
2743 uint64_t pte1;
2744 } buf;
2746 fd = kvmppc_get_htab_fd(true, 0 /* Ignored */, &error_abort);
2748 buf.hdr.n_valid = 1;
2749 buf.hdr.n_invalid = 0;
2750 buf.hdr.index = ptex;
2751 buf.pte0 = cpu_to_be64(pte0);
2752 buf.pte1 = cpu_to_be64(pte1);
2754 rc = write(fd, &buf, sizeof(buf));
2755 if (rc != sizeof(buf)) {
2756 hw_error("kvmppc_write_hpte: Unable to update KVM HPT");
2758 close(fd);
2761 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2762 uint64_t address, uint32_t data, PCIDevice *dev)
2764 return 0;
2767 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2768 int vector, PCIDevice *dev)
2770 return 0;
2773 int kvm_arch_release_virq_post(int virq)
2775 return 0;
2778 int kvm_arch_msi_data_to_gsi(uint32_t data)
2780 return data & 0xffff;
2783 int kvmppc_enable_hwrng(void)
2785 if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2786 return -1;
2789 return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2792 void kvmppc_check_papr_resize_hpt(Error **errp)
2794 if (!kvm_enabled()) {
2795 return; /* No KVM, we're good */
2798 if (cap_resize_hpt) {
2799 return; /* Kernel has explicit support, we're good */
2802 /* Otherwise fallback on looking for PR KVM */
2803 if (kvmppc_is_pr(kvm_state)) {
2804 return;
2807 error_setg(errp,
2808 "Hash page table resizing not available with this KVM version");
2811 int kvmppc_resize_hpt_prepare(PowerPCCPU *cpu, target_ulong flags, int shift)
2813 CPUState *cs = CPU(cpu);
2814 struct kvm_ppc_resize_hpt rhpt = {
2815 .flags = flags,
2816 .shift = shift,
2819 if (!cap_resize_hpt) {
2820 return -ENOSYS;
2823 return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_PREPARE, &rhpt);
2826 int kvmppc_resize_hpt_commit(PowerPCCPU *cpu, target_ulong flags, int shift)
2828 CPUState *cs = CPU(cpu);
2829 struct kvm_ppc_resize_hpt rhpt = {
2830 .flags = flags,
2831 .shift = shift,
2834 if (!cap_resize_hpt) {
2835 return -ENOSYS;
2838 return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_COMMIT, &rhpt);
2842 * This is a helper function to detect a post migration scenario
2843 * in which a guest, running as KVM-HV, freezes in cpu_post_load because
2844 * the guest kernel can't handle a PVR value other than the actual host
2845 * PVR in KVM_SET_SREGS, even if pvr_match() returns true.
2847 * If we don't have cap_ppc_pvr_compat and we're not running in PR
2848 * (so, we're HV), return true. The workaround itself is done in
2849 * cpu_post_load.
2851 * The order here is important: we'll only check for KVM PR as a
2852 * fallback if the guest kernel can't handle the situation itself.
2853 * We need to avoid as much as possible querying the running KVM type
2854 * in QEMU level.
2856 bool kvmppc_pvr_workaround_required(PowerPCCPU *cpu)
2858 CPUState *cs = CPU(cpu);
2860 if (!kvm_enabled()) {
2861 return false;
2864 if (cap_ppc_pvr_compat) {
2865 return false;
2868 return !kvmppc_is_pr(cs->kvm_state);