Merge remote-tracking branch 'qemu/master'
[qemu/ar7.git] / target-ppc / kvm.c
blob6843fa0b98a8b913962cf9f285a76122ea64983b
1 /*
2 * PowerPC implementation of KVM hooks
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
7 * Authors:
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
17 #include <dirent.h>
18 #include <sys/types.h>
19 #include <sys/ioctl.h>
20 #include <sys/mman.h>
21 #include <sys/vfs.h>
23 #include <linux/kvm.h>
25 #include "qemu-common.h"
26 #include "qemu/timer.h"
27 #include "sysemu/sysemu.h"
28 #include "sysemu/kvm.h"
29 #include "kvm_ppc.h"
30 #include "cpu.h"
31 #include "sysemu/cpus.h"
32 #include "sysemu/device_tree.h"
33 #include "mmu-hash64.h"
35 #include "hw/sysbus.h"
36 #include "hw/ppc/spapr.h"
37 #include "hw/ppc/spapr_vio.h"
38 #include "hw/ppc/ppc.h"
39 #include "sysemu/watchdog.h"
40 #include "trace.h"
41 #include "exec/gdbstub.h"
43 //#define DEBUG_KVM
45 #ifdef DEBUG_KVM
46 #define DPRINTF(fmt, ...) \
47 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
48 #else
49 #define DPRINTF(fmt, ...) \
50 do { } while (0)
51 #endif
53 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
55 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
56 KVM_CAP_LAST_INFO
59 static int cap_interrupt_unset = false;
60 static int cap_interrupt_level = false;
61 static int cap_segstate;
62 static int cap_booke_sregs;
63 static int cap_ppc_smt;
64 static int cap_ppc_rma;
65 static int cap_spapr_tce;
66 static int cap_spapr_multitce;
67 static int cap_spapr_vfio;
68 static int cap_hior;
69 static int cap_one_reg;
70 static int cap_epr;
71 static int cap_ppc_watchdog;
72 static int cap_papr;
73 static int cap_htab_fd;
74 static int cap_fixup_hcalls;
76 static uint32_t debug_inst_opcode;
78 /* XXX We have a race condition where we actually have a level triggered
79 * interrupt, but the infrastructure can't expose that yet, so the guest
80 * takes but ignores it, goes to sleep and never gets notified that there's
81 * still an interrupt pending.
83 * As a quick workaround, let's just wake up again 20 ms after we injected
84 * an interrupt. That way we can assure that we're always reinjecting
85 * interrupts in case the guest swallowed them.
87 static QEMUTimer *idle_timer;
89 static void kvm_kick_cpu(void *opaque)
91 PowerPCCPU *cpu = opaque;
93 qemu_cpu_kick(CPU(cpu));
96 static int kvm_ppc_register_host_cpu_type(void);
98 int kvm_arch_init(KVMState *s)
100 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
101 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
102 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
103 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
104 cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
105 cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
106 cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
107 cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
108 cap_spapr_vfio = false;
109 cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
110 cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
111 cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
112 cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
113 /* Note: we don't set cap_papr here, because this capability is
114 * only activated after this by kvmppc_set_papr() */
115 cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
116 cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
118 if (!cap_interrupt_level) {
119 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
120 "VM to stall at times!\n");
123 kvm_ppc_register_host_cpu_type();
125 return 0;
128 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
130 CPUPPCState *cenv = &cpu->env;
131 CPUState *cs = CPU(cpu);
132 struct kvm_sregs sregs;
133 int ret;
135 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
136 /* What we're really trying to say is "if we're on BookE, we use
137 the native PVR for now". This is the only sane way to check
138 it though, so we potentially confuse users that they can run
139 BookE guests on BookS. Let's hope nobody dares enough :) */
140 return 0;
141 } else {
142 if (!cap_segstate) {
143 fprintf(stderr, "kvm error: missing PVR setting capability\n");
144 return -ENOSYS;
148 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
149 if (ret) {
150 return ret;
153 sregs.pvr = cenv->spr[SPR_PVR];
154 return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
157 /* Set up a shared TLB array with KVM */
158 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
160 CPUPPCState *env = &cpu->env;
161 CPUState *cs = CPU(cpu);
162 struct kvm_book3e_206_tlb_params params = {};
163 struct kvm_config_tlb cfg = {};
164 unsigned int entries = 0;
165 int ret, i;
167 if (!kvm_enabled() ||
168 !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
169 return 0;
172 assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
174 for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
175 params.tlb_sizes[i] = booke206_tlb_size(env, i);
176 params.tlb_ways[i] = booke206_tlb_ways(env, i);
177 entries += params.tlb_sizes[i];
180 assert(entries == env->nb_tlb);
181 assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
183 env->tlb_dirty = true;
185 cfg.array = (uintptr_t)env->tlb.tlbm;
186 cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
187 cfg.params = (uintptr_t)&params;
188 cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
190 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
191 if (ret < 0) {
192 fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
193 __func__, strerror(-ret));
194 return ret;
197 env->kvm_sw_tlb = true;
198 return 0;
202 #if defined(TARGET_PPC64)
203 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
204 struct kvm_ppc_smmu_info *info)
206 CPUPPCState *env = &cpu->env;
207 CPUState *cs = CPU(cpu);
209 memset(info, 0, sizeof(*info));
211 /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
212 * need to "guess" what the supported page sizes are.
214 * For that to work we make a few assumptions:
216 * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
217 * KVM which only supports 4K and 16M pages, but supports them
218 * regardless of the backing store characteritics. We also don't
219 * support 1T segments.
221 * This is safe as if HV KVM ever supports that capability or PR
222 * KVM grows supports for more page/segment sizes, those versions
223 * will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
224 * will not hit this fallback
226 * - Else we are running HV KVM. This means we only support page
227 * sizes that fit in the backing store. Additionally we only
228 * advertize 64K pages if the processor is ARCH 2.06 and we assume
229 * P7 encodings for the SLB and hash table. Here too, we assume
230 * support for any newer processor will mean a kernel that
231 * implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
232 * this fallback.
234 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
235 /* No flags */
236 info->flags = 0;
237 info->slb_size = 64;
239 /* Standard 4k base page size segment */
240 info->sps[0].page_shift = 12;
241 info->sps[0].slb_enc = 0;
242 info->sps[0].enc[0].page_shift = 12;
243 info->sps[0].enc[0].pte_enc = 0;
245 /* Standard 16M large page size segment */
246 info->sps[1].page_shift = 24;
247 info->sps[1].slb_enc = SLB_VSID_L;
248 info->sps[1].enc[0].page_shift = 24;
249 info->sps[1].enc[0].pte_enc = 0;
250 } else {
251 int i = 0;
253 /* HV KVM has backing store size restrictions */
254 info->flags = KVM_PPC_PAGE_SIZES_REAL;
256 if (env->mmu_model & POWERPC_MMU_1TSEG) {
257 info->flags |= KVM_PPC_1T_SEGMENTS;
260 if (env->mmu_model == POWERPC_MMU_2_06) {
261 info->slb_size = 32;
262 } else {
263 info->slb_size = 64;
266 /* Standard 4k base page size segment */
267 info->sps[i].page_shift = 12;
268 info->sps[i].slb_enc = 0;
269 info->sps[i].enc[0].page_shift = 12;
270 info->sps[i].enc[0].pte_enc = 0;
271 i++;
273 /* 64K on MMU 2.06 */
274 if (env->mmu_model == POWERPC_MMU_2_06) {
275 info->sps[i].page_shift = 16;
276 info->sps[i].slb_enc = 0x110;
277 info->sps[i].enc[0].page_shift = 16;
278 info->sps[i].enc[0].pte_enc = 1;
279 i++;
282 /* Standard 16M large page size segment */
283 info->sps[i].page_shift = 24;
284 info->sps[i].slb_enc = SLB_VSID_L;
285 info->sps[i].enc[0].page_shift = 24;
286 info->sps[i].enc[0].pte_enc = 0;
290 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
292 CPUState *cs = CPU(cpu);
293 int ret;
295 if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
296 ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
297 if (ret == 0) {
298 return;
302 kvm_get_fallback_smmu_info(cpu, info);
305 static long getrampagesize(void)
307 struct statfs fs;
308 int ret;
310 if (!mem_path) {
311 /* guest RAM is backed by normal anonymous pages */
312 return getpagesize();
315 do {
316 ret = statfs(mem_path, &fs);
317 } while (ret != 0 && errno == EINTR);
319 if (ret != 0) {
320 fprintf(stderr, "Couldn't statfs() memory path: %s\n",
321 strerror(errno));
322 exit(1);
325 #define HUGETLBFS_MAGIC 0x958458f6
327 if (fs.f_type != HUGETLBFS_MAGIC) {
328 /* Explicit mempath, but it's ordinary pages */
329 return getpagesize();
332 /* It's hugepage, return the huge page size */
333 return fs.f_bsize;
336 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
338 if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
339 return true;
342 return (1ul << shift) <= rampgsize;
345 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
347 static struct kvm_ppc_smmu_info smmu_info;
348 static bool has_smmu_info;
349 CPUPPCState *env = &cpu->env;
350 long rampagesize;
351 int iq, ik, jq, jk;
353 /* We only handle page sizes for 64-bit server guests for now */
354 if (!(env->mmu_model & POWERPC_MMU_64)) {
355 return;
358 /* Collect MMU info from kernel if not already */
359 if (!has_smmu_info) {
360 kvm_get_smmu_info(cpu, &smmu_info);
361 has_smmu_info = true;
364 rampagesize = getrampagesize();
366 /* Convert to QEMU form */
367 memset(&env->sps, 0, sizeof(env->sps));
370 * XXX This loop should be an entry wide AND of the capabilities that
371 * the selected CPU has with the capabilities that KVM supports.
373 for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
374 struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
375 struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
377 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
378 ksps->page_shift)) {
379 continue;
381 qsps->page_shift = ksps->page_shift;
382 qsps->slb_enc = ksps->slb_enc;
383 for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
384 if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
385 ksps->enc[jk].page_shift)) {
386 continue;
388 qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
389 qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
390 if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
391 break;
394 if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
395 break;
398 env->slb_nr = smmu_info.slb_size;
399 if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
400 env->mmu_model &= ~POWERPC_MMU_1TSEG;
403 #else /* defined (TARGET_PPC64) */
405 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
409 #endif /* !defined (TARGET_PPC64) */
411 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
413 return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
416 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
417 * book3s supports only 1 watchpoint, so array size
418 * of 4 is sufficient for now.
420 #define MAX_HW_BKPTS 4
422 static struct HWBreakpoint {
423 target_ulong addr;
424 int type;
425 } hw_debug_points[MAX_HW_BKPTS];
427 static CPUWatchpoint hw_watchpoint;
429 /* Default there is no breakpoint and watchpoint supported */
430 static int max_hw_breakpoint;
431 static int max_hw_watchpoint;
432 static int nb_hw_breakpoint;
433 static int nb_hw_watchpoint;
435 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
437 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
438 max_hw_breakpoint = 2;
439 max_hw_watchpoint = 2;
442 if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
443 fprintf(stderr, "Error initializing h/w breakpoints\n");
444 return;
448 int kvm_arch_init_vcpu(CPUState *cs)
450 PowerPCCPU *cpu = POWERPC_CPU(cs);
451 CPUPPCState *cenv = &cpu->env;
452 int ret;
454 /* Gather server mmu info from KVM and update the CPU state */
455 kvm_fixup_page_sizes(cpu);
457 /* Synchronize sregs with kvm */
458 ret = kvm_arch_sync_sregs(cpu);
459 if (ret) {
460 return ret;
463 idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
465 /* Some targets support access to KVM's guest TLB. */
466 switch (cenv->mmu_model) {
467 case POWERPC_MMU_BOOKE206:
468 ret = kvm_booke206_tlb_init(cpu);
469 break;
470 default:
471 break;
474 kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
475 kvmppc_hw_debug_points_init(cenv);
477 return ret;
480 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
482 CPUPPCState *env = &cpu->env;
483 CPUState *cs = CPU(cpu);
484 struct kvm_dirty_tlb dirty_tlb;
485 unsigned char *bitmap;
486 int ret;
488 if (!env->kvm_sw_tlb) {
489 return;
492 bitmap = g_malloc((env->nb_tlb + 7) / 8);
493 memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
495 dirty_tlb.bitmap = (uintptr_t)bitmap;
496 dirty_tlb.num_dirty = env->nb_tlb;
498 ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
499 if (ret) {
500 fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
501 __func__, strerror(-ret));
504 g_free(bitmap);
507 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
509 PowerPCCPU *cpu = POWERPC_CPU(cs);
510 CPUPPCState *env = &cpu->env;
511 union {
512 uint32_t u32;
513 uint64_t u64;
514 } val;
515 struct kvm_one_reg reg = {
516 .id = id,
517 .addr = (uintptr_t) &val,
519 int ret;
521 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
522 if (ret != 0) {
523 trace_kvm_failed_spr_get(spr, strerror(errno));
524 } else {
525 switch (id & KVM_REG_SIZE_MASK) {
526 case KVM_REG_SIZE_U32:
527 env->spr[spr] = val.u32;
528 break;
530 case KVM_REG_SIZE_U64:
531 env->spr[spr] = val.u64;
532 break;
534 default:
535 /* Don't handle this size yet */
536 abort();
541 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
543 PowerPCCPU *cpu = POWERPC_CPU(cs);
544 CPUPPCState *env = &cpu->env;
545 union {
546 uint32_t u32;
547 uint64_t u64;
548 } val;
549 struct kvm_one_reg reg = {
550 .id = id,
551 .addr = (uintptr_t) &val,
553 int ret;
555 switch (id & KVM_REG_SIZE_MASK) {
556 case KVM_REG_SIZE_U32:
557 val.u32 = env->spr[spr];
558 break;
560 case KVM_REG_SIZE_U64:
561 val.u64 = env->spr[spr];
562 break;
564 default:
565 /* Don't handle this size yet */
566 abort();
569 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
570 if (ret != 0) {
571 trace_kvm_failed_spr_set(spr, strerror(errno));
575 static int kvm_put_fp(CPUState *cs)
577 PowerPCCPU *cpu = POWERPC_CPU(cs);
578 CPUPPCState *env = &cpu->env;
579 struct kvm_one_reg reg;
580 int i;
581 int ret;
583 if (env->insns_flags & PPC_FLOAT) {
584 uint64_t fpscr = env->fpscr;
585 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
587 reg.id = KVM_REG_PPC_FPSCR;
588 reg.addr = (uintptr_t)&fpscr;
589 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
590 if (ret < 0) {
591 DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
592 return ret;
595 for (i = 0; i < 32; i++) {
596 uint64_t vsr[2];
598 vsr[0] = float64_val(env->fpr[i]);
599 vsr[1] = env->vsr[i];
600 reg.addr = (uintptr_t) &vsr;
601 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
603 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
604 if (ret < 0) {
605 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
606 i, strerror(errno));
607 return ret;
612 if (env->insns_flags & PPC_ALTIVEC) {
613 reg.id = KVM_REG_PPC_VSCR;
614 reg.addr = (uintptr_t)&env->vscr;
615 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
616 if (ret < 0) {
617 DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
618 return ret;
621 for (i = 0; i < 32; i++) {
622 reg.id = KVM_REG_PPC_VR(i);
623 reg.addr = (uintptr_t)&env->avr[i];
624 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
625 if (ret < 0) {
626 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
627 return ret;
632 return 0;
635 static int kvm_get_fp(CPUState *cs)
637 PowerPCCPU *cpu = POWERPC_CPU(cs);
638 CPUPPCState *env = &cpu->env;
639 struct kvm_one_reg reg;
640 int i;
641 int ret;
643 if (env->insns_flags & PPC_FLOAT) {
644 uint64_t fpscr;
645 bool vsx = !!(env->insns_flags2 & PPC2_VSX);
647 reg.id = KVM_REG_PPC_FPSCR;
648 reg.addr = (uintptr_t)&fpscr;
649 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
650 if (ret < 0) {
651 DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
652 return ret;
653 } else {
654 env->fpscr = fpscr;
657 for (i = 0; i < 32; i++) {
658 uint64_t vsr[2];
660 reg.addr = (uintptr_t) &vsr;
661 reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
663 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
664 if (ret < 0) {
665 DPRINTF("Unable to get %s%d from KVM: %s\n",
666 vsx ? "VSR" : "FPR", i, strerror(errno));
667 return ret;
668 } else {
669 env->fpr[i] = vsr[0];
670 if (vsx) {
671 env->vsr[i] = vsr[1];
677 if (env->insns_flags & PPC_ALTIVEC) {
678 reg.id = KVM_REG_PPC_VSCR;
679 reg.addr = (uintptr_t)&env->vscr;
680 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
681 if (ret < 0) {
682 DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
683 return ret;
686 for (i = 0; i < 32; i++) {
687 reg.id = KVM_REG_PPC_VR(i);
688 reg.addr = (uintptr_t)&env->avr[i];
689 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
690 if (ret < 0) {
691 DPRINTF("Unable to get VR%d from KVM: %s\n",
692 i, strerror(errno));
693 return ret;
698 return 0;
701 #if defined(TARGET_PPC64)
702 static int kvm_get_vpa(CPUState *cs)
704 PowerPCCPU *cpu = POWERPC_CPU(cs);
705 CPUPPCState *env = &cpu->env;
706 struct kvm_one_reg reg;
707 int ret;
709 reg.id = KVM_REG_PPC_VPA_ADDR;
710 reg.addr = (uintptr_t)&env->vpa_addr;
711 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
712 if (ret < 0) {
713 DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
714 return ret;
717 assert((uintptr_t)&env->slb_shadow_size
718 == ((uintptr_t)&env->slb_shadow_addr + 8));
719 reg.id = KVM_REG_PPC_VPA_SLB;
720 reg.addr = (uintptr_t)&env->slb_shadow_addr;
721 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
722 if (ret < 0) {
723 DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
724 strerror(errno));
725 return ret;
728 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
729 reg.id = KVM_REG_PPC_VPA_DTL;
730 reg.addr = (uintptr_t)&env->dtl_addr;
731 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
732 if (ret < 0) {
733 DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
734 strerror(errno));
735 return ret;
738 return 0;
741 static int kvm_put_vpa(CPUState *cs)
743 PowerPCCPU *cpu = POWERPC_CPU(cs);
744 CPUPPCState *env = &cpu->env;
745 struct kvm_one_reg reg;
746 int ret;
748 /* SLB shadow or DTL can't be registered unless a master VPA is
749 * registered. That means when restoring state, if a VPA *is*
750 * registered, we need to set that up first. If not, we need to
751 * deregister the others before deregistering the master VPA */
752 assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
754 if (env->vpa_addr) {
755 reg.id = KVM_REG_PPC_VPA_ADDR;
756 reg.addr = (uintptr_t)&env->vpa_addr;
757 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
758 if (ret < 0) {
759 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
760 return ret;
764 assert((uintptr_t)&env->slb_shadow_size
765 == ((uintptr_t)&env->slb_shadow_addr + 8));
766 reg.id = KVM_REG_PPC_VPA_SLB;
767 reg.addr = (uintptr_t)&env->slb_shadow_addr;
768 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
769 if (ret < 0) {
770 DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
771 return ret;
774 assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
775 reg.id = KVM_REG_PPC_VPA_DTL;
776 reg.addr = (uintptr_t)&env->dtl_addr;
777 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
778 if (ret < 0) {
779 DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
780 strerror(errno));
781 return ret;
784 if (!env->vpa_addr) {
785 reg.id = KVM_REG_PPC_VPA_ADDR;
786 reg.addr = (uintptr_t)&env->vpa_addr;
787 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
788 if (ret < 0) {
789 DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
790 return ret;
794 return 0;
796 #endif /* TARGET_PPC64 */
798 int kvm_arch_put_registers(CPUState *cs, int level)
800 PowerPCCPU *cpu = POWERPC_CPU(cs);
801 CPUPPCState *env = &cpu->env;
802 struct kvm_regs regs;
803 int ret;
804 int i;
806 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
807 if (ret < 0) {
808 return ret;
811 regs.ctr = env->ctr;
812 regs.lr = env->lr;
813 regs.xer = cpu_read_xer(env);
814 regs.msr = env->msr;
815 regs.pc = env->nip;
817 regs.srr0 = env->spr[SPR_SRR0];
818 regs.srr1 = env->spr[SPR_SRR1];
820 regs.sprg0 = env->spr[SPR_SPRG0];
821 regs.sprg1 = env->spr[SPR_SPRG1];
822 regs.sprg2 = env->spr[SPR_SPRG2];
823 regs.sprg3 = env->spr[SPR_SPRG3];
824 regs.sprg4 = env->spr[SPR_SPRG4];
825 regs.sprg5 = env->spr[SPR_SPRG5];
826 regs.sprg6 = env->spr[SPR_SPRG6];
827 regs.sprg7 = env->spr[SPR_SPRG7];
829 regs.pid = env->spr[SPR_BOOKE_PID];
831 for (i = 0;i < 32; i++)
832 regs.gpr[i] = env->gpr[i];
834 regs.cr = 0;
835 for (i = 0; i < 8; i++) {
836 regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
839 ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
840 if (ret < 0)
841 return ret;
843 kvm_put_fp(cs);
845 if (env->tlb_dirty) {
846 kvm_sw_tlb_put(cpu);
847 env->tlb_dirty = false;
850 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
851 struct kvm_sregs sregs;
853 sregs.pvr = env->spr[SPR_PVR];
855 sregs.u.s.sdr1 = env->spr[SPR_SDR1];
857 /* Sync SLB */
858 #ifdef TARGET_PPC64
859 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
860 sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
861 if (env->slb[i].esid & SLB_ESID_V) {
862 sregs.u.s.ppc64.slb[i].slbe |= i;
864 sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
866 #endif
868 /* Sync SRs */
869 for (i = 0; i < 16; i++) {
870 sregs.u.s.ppc32.sr[i] = env->sr[i];
873 /* Sync BATs */
874 for (i = 0; i < 8; i++) {
875 /* Beware. We have to swap upper and lower bits here */
876 sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
877 | env->DBAT[1][i];
878 sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
879 | env->IBAT[1][i];
882 ret = kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
883 if (ret) {
884 return ret;
888 if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
889 kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
892 if (cap_one_reg) {
893 int i;
895 /* We deliberately ignore errors here, for kernels which have
896 * the ONE_REG calls, but don't support the specific
897 * registers, there's a reasonable chance things will still
898 * work, at least until we try to migrate. */
899 for (i = 0; i < 1024; i++) {
900 uint64_t id = env->spr_cb[i].one_reg_id;
902 if (id != 0) {
903 kvm_put_one_spr(cs, id, i);
907 #ifdef TARGET_PPC64
908 if (msr_ts) {
909 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
910 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
912 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
913 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
915 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
916 kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
917 kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
918 kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
919 kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
920 kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
921 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
922 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
923 kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
924 kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
927 if (cap_papr) {
928 if (kvm_put_vpa(cs) < 0) {
929 DPRINTF("Warning: Unable to set VPA information to KVM\n");
933 kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
934 #endif /* TARGET_PPC64 */
937 return ret;
940 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
942 env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
945 int kvm_arch_get_registers(CPUState *cs)
947 PowerPCCPU *cpu = POWERPC_CPU(cs);
948 CPUPPCState *env = &cpu->env;
949 struct kvm_regs regs;
950 struct kvm_sregs sregs;
951 uint32_t cr;
952 int i, ret;
954 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
955 if (ret < 0)
956 return ret;
958 cr = regs.cr;
959 for (i = 7; i >= 0; i--) {
960 env->crf[i] = cr & 15;
961 cr >>= 4;
964 env->ctr = regs.ctr;
965 env->lr = regs.lr;
966 cpu_write_xer(env, regs.xer);
967 env->msr = regs.msr;
968 env->nip = regs.pc;
970 env->spr[SPR_SRR0] = regs.srr0;
971 env->spr[SPR_SRR1] = regs.srr1;
973 env->spr[SPR_SPRG0] = regs.sprg0;
974 env->spr[SPR_SPRG1] = regs.sprg1;
975 env->spr[SPR_SPRG2] = regs.sprg2;
976 env->spr[SPR_SPRG3] = regs.sprg3;
977 env->spr[SPR_SPRG4] = regs.sprg4;
978 env->spr[SPR_SPRG5] = regs.sprg5;
979 env->spr[SPR_SPRG6] = regs.sprg6;
980 env->spr[SPR_SPRG7] = regs.sprg7;
982 env->spr[SPR_BOOKE_PID] = regs.pid;
984 for (i = 0;i < 32; i++)
985 env->gpr[i] = regs.gpr[i];
987 kvm_get_fp(cs);
989 if (cap_booke_sregs) {
990 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
991 if (ret < 0) {
992 return ret;
995 if (sregs.u.e.features & KVM_SREGS_E_BASE) {
996 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
997 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
998 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
999 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1000 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1001 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1002 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1003 env->spr[SPR_DECR] = sregs.u.e.dec;
1004 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1005 env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1006 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1009 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1010 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1011 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1012 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1013 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1014 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1017 if (sregs.u.e.features & KVM_SREGS_E_64) {
1018 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1021 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1022 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1025 if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1026 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1027 kvm_sync_excp(env, POWERPC_EXCP_CRITICAL, SPR_BOOKE_IVOR0);
1028 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1029 kvm_sync_excp(env, POWERPC_EXCP_MCHECK, SPR_BOOKE_IVOR1);
1030 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1031 kvm_sync_excp(env, POWERPC_EXCP_DSI, SPR_BOOKE_IVOR2);
1032 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1033 kvm_sync_excp(env, POWERPC_EXCP_ISI, SPR_BOOKE_IVOR3);
1034 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1035 kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL, SPR_BOOKE_IVOR4);
1036 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1037 kvm_sync_excp(env, POWERPC_EXCP_ALIGN, SPR_BOOKE_IVOR5);
1038 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1039 kvm_sync_excp(env, POWERPC_EXCP_PROGRAM, SPR_BOOKE_IVOR6);
1040 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1041 kvm_sync_excp(env, POWERPC_EXCP_FPU, SPR_BOOKE_IVOR7);
1042 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1043 kvm_sync_excp(env, POWERPC_EXCP_SYSCALL, SPR_BOOKE_IVOR8);
1044 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1045 kvm_sync_excp(env, POWERPC_EXCP_APU, SPR_BOOKE_IVOR9);
1046 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1047 kvm_sync_excp(env, POWERPC_EXCP_DECR, SPR_BOOKE_IVOR10);
1048 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1049 kvm_sync_excp(env, POWERPC_EXCP_FIT, SPR_BOOKE_IVOR11);
1050 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1051 kvm_sync_excp(env, POWERPC_EXCP_WDT, SPR_BOOKE_IVOR12);
1052 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1053 kvm_sync_excp(env, POWERPC_EXCP_DTLB, SPR_BOOKE_IVOR13);
1054 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1055 kvm_sync_excp(env, POWERPC_EXCP_ITLB, SPR_BOOKE_IVOR14);
1056 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1057 kvm_sync_excp(env, POWERPC_EXCP_DEBUG, SPR_BOOKE_IVOR15);
1059 if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1060 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1061 kvm_sync_excp(env, POWERPC_EXCP_SPEU, SPR_BOOKE_IVOR32);
1062 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1063 kvm_sync_excp(env, POWERPC_EXCP_EFPDI, SPR_BOOKE_IVOR33);
1064 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1065 kvm_sync_excp(env, POWERPC_EXCP_EFPRI, SPR_BOOKE_IVOR34);
1068 if (sregs.u.e.features & KVM_SREGS_E_PM) {
1069 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1070 kvm_sync_excp(env, POWERPC_EXCP_EPERFM, SPR_BOOKE_IVOR35);
1073 if (sregs.u.e.features & KVM_SREGS_E_PC) {
1074 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1075 kvm_sync_excp(env, POWERPC_EXCP_DOORI, SPR_BOOKE_IVOR36);
1076 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1077 kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1081 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1082 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1083 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1084 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1085 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1086 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1087 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1088 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1089 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1090 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1091 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1094 if (sregs.u.e.features & KVM_SREGS_EXP) {
1095 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1098 if (sregs.u.e.features & KVM_SREGS_E_PD) {
1099 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1100 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1103 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1104 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1105 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1106 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1108 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1109 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1110 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1115 if (cap_segstate) {
1116 ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
1117 if (ret < 0) {
1118 return ret;
1121 if (!env->external_htab) {
1122 ppc_store_sdr1(env, sregs.u.s.sdr1);
1125 /* Sync SLB */
1126 #ifdef TARGET_PPC64
1128 * The packed SLB array we get from KVM_GET_SREGS only contains
1129 * information about valid entries. So we flush our internal
1130 * copy to get rid of stale ones, then put all valid SLB entries
1131 * back in.
1133 memset(env->slb, 0, sizeof(env->slb));
1134 for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1135 target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1136 target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1138 * Only restore valid entries
1140 if (rb & SLB_ESID_V) {
1141 ppc_store_slb(env, rb, rs);
1144 #endif
1146 /* Sync SRs */
1147 for (i = 0; i < 16; i++) {
1148 env->sr[i] = sregs.u.s.ppc32.sr[i];
1151 /* Sync BATs */
1152 for (i = 0; i < 8; i++) {
1153 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1154 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1155 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1156 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1160 if (cap_hior) {
1161 kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1164 if (cap_one_reg) {
1165 int i;
1167 /* We deliberately ignore errors here, for kernels which have
1168 * the ONE_REG calls, but don't support the specific
1169 * registers, there's a reasonable chance things will still
1170 * work, at least until we try to migrate. */
1171 for (i = 0; i < 1024; i++) {
1172 uint64_t id = env->spr_cb[i].one_reg_id;
1174 if (id != 0) {
1175 kvm_get_one_spr(cs, id, i);
1179 #ifdef TARGET_PPC64
1180 if (msr_ts) {
1181 for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1182 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1184 for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1185 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1187 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1188 kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1189 kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1190 kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1191 kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1192 kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1193 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1194 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1195 kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1196 kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1199 if (cap_papr) {
1200 if (kvm_get_vpa(cs) < 0) {
1201 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1205 kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1206 #endif
1209 return 0;
1212 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1214 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1216 if (irq != PPC_INTERRUPT_EXT) {
1217 return 0;
1220 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1221 return 0;
1224 kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1226 return 0;
1229 #if defined(TARGET_PPCEMB)
1230 #define PPC_INPUT_INT PPC40x_INPUT_INT
1231 #elif defined(TARGET_PPC64)
1232 #define PPC_INPUT_INT PPC970_INPUT_INT
1233 #else
1234 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1235 #endif
1237 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1239 PowerPCCPU *cpu = POWERPC_CPU(cs);
1240 CPUPPCState *env = &cpu->env;
1241 int r;
1242 unsigned irq;
1244 /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1245 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1246 if (!cap_interrupt_level &&
1247 run->ready_for_interrupt_injection &&
1248 (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1249 (env->irq_input_state & (1<<PPC_INPUT_INT)))
1251 /* For now KVM disregards the 'irq' argument. However, in the
1252 * future KVM could cache it in-kernel to avoid a heavyweight exit
1253 * when reading the UIC.
1255 irq = KVM_INTERRUPT_SET;
1257 DPRINTF("injected interrupt %d\n", irq);
1258 r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1259 if (r < 0) {
1260 printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1263 /* Always wake up soon in case the interrupt was level based */
1264 timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1265 (get_ticks_per_sec() / 50));
1268 /* We don't know if there are more interrupts pending after this. However,
1269 * the guest will return to userspace in the course of handling this one
1270 * anyways, so we will get a chance to deliver the rest. */
1273 void kvm_arch_post_run(CPUState *cpu, struct kvm_run *run)
1277 int kvm_arch_process_async_events(CPUState *cs)
1279 return cs->halted;
1282 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1284 CPUState *cs = CPU(cpu);
1285 CPUPPCState *env = &cpu->env;
1287 if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1288 cs->halted = 1;
1289 cs->exception_index = EXCP_HLT;
1292 return 0;
1295 /* map dcr access to existing qemu dcr emulation */
1296 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1298 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1299 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1301 return 0;
1304 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1306 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1307 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1309 return 0;
1312 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1314 /* Mixed endian case is not handled */
1315 uint32_t sc = debug_inst_opcode;
1317 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1318 sizeof(sc), 0) ||
1319 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1320 return -EINVAL;
1323 return 0;
1326 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1328 uint32_t sc;
1330 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1331 sc != debug_inst_opcode ||
1332 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1333 sizeof(sc), 1)) {
1334 return -EINVAL;
1337 return 0;
1340 static int find_hw_breakpoint(target_ulong addr, int type)
1342 int n;
1344 assert((nb_hw_breakpoint + nb_hw_watchpoint)
1345 <= ARRAY_SIZE(hw_debug_points));
1347 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1348 if (hw_debug_points[n].addr == addr &&
1349 hw_debug_points[n].type == type) {
1350 return n;
1354 return -1;
1357 static int find_hw_watchpoint(target_ulong addr, int *flag)
1359 int n;
1361 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1362 if (n >= 0) {
1363 *flag = BP_MEM_ACCESS;
1364 return n;
1367 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1368 if (n >= 0) {
1369 *flag = BP_MEM_WRITE;
1370 return n;
1373 n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1374 if (n >= 0) {
1375 *flag = BP_MEM_READ;
1376 return n;
1379 return -1;
1382 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1383 target_ulong len, int type)
1385 if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1386 return -ENOBUFS;
1389 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1390 hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1392 switch (type) {
1393 case GDB_BREAKPOINT_HW:
1394 if (nb_hw_breakpoint >= max_hw_breakpoint) {
1395 return -ENOBUFS;
1398 if (find_hw_breakpoint(addr, type) >= 0) {
1399 return -EEXIST;
1402 nb_hw_breakpoint++;
1403 break;
1405 case GDB_WATCHPOINT_WRITE:
1406 case GDB_WATCHPOINT_READ:
1407 case GDB_WATCHPOINT_ACCESS:
1408 if (nb_hw_watchpoint >= max_hw_watchpoint) {
1409 return -ENOBUFS;
1412 if (find_hw_breakpoint(addr, type) >= 0) {
1413 return -EEXIST;
1416 nb_hw_watchpoint++;
1417 break;
1419 default:
1420 return -ENOSYS;
1423 return 0;
1426 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1427 target_ulong len, int type)
1429 int n;
1431 n = find_hw_breakpoint(addr, type);
1432 if (n < 0) {
1433 return -ENOENT;
1436 switch (type) {
1437 case GDB_BREAKPOINT_HW:
1438 nb_hw_breakpoint--;
1439 break;
1441 case GDB_WATCHPOINT_WRITE:
1442 case GDB_WATCHPOINT_READ:
1443 case GDB_WATCHPOINT_ACCESS:
1444 nb_hw_watchpoint--;
1445 break;
1447 default:
1448 return -ENOSYS;
1450 hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1452 return 0;
1455 void kvm_arch_remove_all_hw_breakpoints(void)
1457 nb_hw_breakpoint = nb_hw_watchpoint = 0;
1460 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1462 int n;
1464 /* Software Breakpoint updates */
1465 if (kvm_sw_breakpoints_active(cs)) {
1466 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1469 assert((nb_hw_breakpoint + nb_hw_watchpoint)
1470 <= ARRAY_SIZE(hw_debug_points));
1471 assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1473 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1474 dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1475 memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1476 for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1477 switch (hw_debug_points[n].type) {
1478 case GDB_BREAKPOINT_HW:
1479 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1480 break;
1481 case GDB_WATCHPOINT_WRITE:
1482 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1483 break;
1484 case GDB_WATCHPOINT_READ:
1485 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1486 break;
1487 case GDB_WATCHPOINT_ACCESS:
1488 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1489 KVMPPC_DEBUG_WATCH_READ;
1490 break;
1491 default:
1492 cpu_abort(cs, "Unsupported breakpoint type\n");
1494 dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1499 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1501 CPUState *cs = CPU(cpu);
1502 CPUPPCState *env = &cpu->env;
1503 struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1504 int handle = 0;
1505 int n;
1506 int flag = 0;
1508 if (cs->singlestep_enabled) {
1509 handle = 1;
1510 } else if (arch_info->status) {
1511 if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1512 if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1513 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1514 if (n >= 0) {
1515 handle = 1;
1517 } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1518 KVMPPC_DEBUG_WATCH_WRITE)) {
1519 n = find_hw_watchpoint(arch_info->address, &flag);
1520 if (n >= 0) {
1521 handle = 1;
1522 cs->watchpoint_hit = &hw_watchpoint;
1523 hw_watchpoint.vaddr = hw_debug_points[n].addr;
1524 hw_watchpoint.flags = flag;
1528 } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1529 handle = 1;
1530 } else {
1531 /* QEMU is not able to handle debug exception, so inject
1532 * program exception to guest;
1533 * Yes program exception NOT debug exception !!
1534 * When QEMU is using debug resources then debug exception must
1535 * be always set. To achieve this we set MSR_DE and also set
1536 * MSRP_DEP so guest cannot change MSR_DE.
1537 * When emulating debug resource for guest we want guest
1538 * to control MSR_DE (enable/disable debug interrupt on need).
1539 * Supporting both configurations are NOT possible.
1540 * So the result is that we cannot share debug resources
1541 * between QEMU and Guest on BOOKE architecture.
1542 * In the current design QEMU gets the priority over guest,
1543 * this means that if QEMU is using debug resources then guest
1544 * cannot use them;
1545 * For software breakpoint QEMU uses a privileged instruction;
1546 * So there cannot be any reason that we are here for guest
1547 * set debug exception, only possibility is guest executed a
1548 * privileged / illegal instruction and that's why we are
1549 * injecting a program interrupt.
1552 cpu_synchronize_state(cs);
1553 /* env->nip is PC, so increment this by 4 to use
1554 * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1556 env->nip += 4;
1557 cs->exception_index = POWERPC_EXCP_PROGRAM;
1558 env->error_code = POWERPC_EXCP_INVAL;
1559 ppc_cpu_do_interrupt(cs);
1562 return handle;
1565 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1567 PowerPCCPU *cpu = POWERPC_CPU(cs);
1568 CPUPPCState *env = &cpu->env;
1569 int ret;
1571 switch (run->exit_reason) {
1572 case KVM_EXIT_DCR:
1573 if (run->dcr.is_write) {
1574 DPRINTF("handle dcr write\n");
1575 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1576 } else {
1577 DPRINTF("handle dcr read\n");
1578 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1580 break;
1581 case KVM_EXIT_HLT:
1582 DPRINTF("handle halt\n");
1583 ret = kvmppc_handle_halt(cpu);
1584 break;
1585 #if defined(TARGET_PPC64)
1586 case KVM_EXIT_PAPR_HCALL:
1587 DPRINTF("handle PAPR hypercall\n");
1588 run->papr_hcall.ret = spapr_hypercall(cpu,
1589 run->papr_hcall.nr,
1590 run->papr_hcall.args);
1591 ret = 0;
1592 break;
1593 #endif
1594 case KVM_EXIT_EPR:
1595 DPRINTF("handle epr\n");
1596 run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1597 ret = 0;
1598 break;
1599 case KVM_EXIT_WATCHDOG:
1600 DPRINTF("handle watchdog expiry\n");
1601 watchdog_perform_action();
1602 ret = 0;
1603 break;
1605 case KVM_EXIT_DEBUG:
1606 DPRINTF("handle debug exception\n");
1607 if (kvm_handle_debug(cpu, run)) {
1608 ret = EXCP_DEBUG;
1609 break;
1611 /* re-enter, this exception was guest-internal */
1612 ret = 0;
1613 break;
1615 default:
1616 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1617 ret = -1;
1618 break;
1621 return ret;
1624 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1626 CPUState *cs = CPU(cpu);
1627 uint32_t bits = tsr_bits;
1628 struct kvm_one_reg reg = {
1629 .id = KVM_REG_PPC_OR_TSR,
1630 .addr = (uintptr_t) &bits,
1633 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1636 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1639 CPUState *cs = CPU(cpu);
1640 uint32_t bits = tsr_bits;
1641 struct kvm_one_reg reg = {
1642 .id = KVM_REG_PPC_CLEAR_TSR,
1643 .addr = (uintptr_t) &bits,
1646 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1649 int kvmppc_set_tcr(PowerPCCPU *cpu)
1651 CPUState *cs = CPU(cpu);
1652 CPUPPCState *env = &cpu->env;
1653 uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1655 struct kvm_one_reg reg = {
1656 .id = KVM_REG_PPC_TCR,
1657 .addr = (uintptr_t) &tcr,
1660 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1663 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1665 CPUState *cs = CPU(cpu);
1666 int ret;
1668 if (!kvm_enabled()) {
1669 return -1;
1672 if (!cap_ppc_watchdog) {
1673 printf("warning: KVM does not support watchdog");
1674 return -1;
1677 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1678 if (ret < 0) {
1679 fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1680 __func__, strerror(-ret));
1681 return ret;
1684 return ret;
1687 static int read_cpuinfo(const char *field, char *value, int len)
1689 FILE *f;
1690 int ret = -1;
1691 int field_len = strlen(field);
1692 char line[512];
1694 f = fopen("/proc/cpuinfo", "r");
1695 if (!f) {
1696 return -1;
1699 do {
1700 if (!fgets(line, sizeof(line), f)) {
1701 break;
1703 if (!strncmp(line, field, field_len)) {
1704 pstrcpy(value, len, line);
1705 ret = 0;
1706 break;
1708 } while(*line);
1710 fclose(f);
1712 return ret;
1715 uint32_t kvmppc_get_tbfreq(void)
1717 char line[512];
1718 char *ns;
1719 uint32_t retval = get_ticks_per_sec();
1721 if (read_cpuinfo("timebase", line, sizeof(line))) {
1722 return retval;
1725 if (!(ns = strchr(line, ':'))) {
1726 return retval;
1729 ns++;
1731 retval = atoi(ns);
1732 return retval;
1735 bool kvmppc_get_host_serial(char **value)
1737 return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1738 NULL);
1741 bool kvmppc_get_host_model(char **value)
1743 return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1746 /* Try to find a device tree node for a CPU with clock-frequency property */
1747 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1749 struct dirent *dirp;
1750 DIR *dp;
1752 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1753 printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1754 return -1;
1757 buf[0] = '\0';
1758 while ((dirp = readdir(dp)) != NULL) {
1759 FILE *f;
1760 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1761 dirp->d_name);
1762 f = fopen(buf, "r");
1763 if (f) {
1764 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1765 fclose(f);
1766 break;
1768 buf[0] = '\0';
1770 closedir(dp);
1771 if (buf[0] == '\0') {
1772 printf("Unknown host!\n");
1773 return -1;
1776 return 0;
1779 /* Read a CPU node property from the host device tree that's a single
1780 * integer (32-bit or 64-bit). Returns 0 if anything goes wrong
1781 * (can't find or open the property, or doesn't understand the
1782 * format) */
1783 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1785 char buf[PATH_MAX], *tmp;
1786 union {
1787 uint32_t v32;
1788 uint64_t v64;
1789 } u;
1790 FILE *f;
1791 int len;
1793 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1794 return -1;
1797 tmp = g_strdup_printf("%s/%s", buf, propname);
1799 f = fopen(tmp, "rb");
1800 g_free(tmp);
1801 if (!f) {
1802 return -1;
1805 len = fread(&u, 1, sizeof(u), f);
1806 fclose(f);
1807 switch (len) {
1808 case 4:
1809 /* property is a 32-bit quantity */
1810 return be32_to_cpu(u.v32);
1811 case 8:
1812 return be64_to_cpu(u.v64);
1815 return 0;
1818 uint64_t kvmppc_get_clockfreq(void)
1820 return kvmppc_read_int_cpu_dt("clock-frequency");
1823 uint32_t kvmppc_get_vmx(void)
1825 return kvmppc_read_int_cpu_dt("ibm,vmx");
1828 uint32_t kvmppc_get_dfp(void)
1830 return kvmppc_read_int_cpu_dt("ibm,dfp");
1833 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1835 PowerPCCPU *cpu = ppc_env_get_cpu(env);
1836 CPUState *cs = CPU(cpu);
1838 if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1839 !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1840 return 0;
1843 return 1;
1846 int kvmppc_get_hasidle(CPUPPCState *env)
1848 struct kvm_ppc_pvinfo pvinfo;
1850 if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1851 (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1852 return 1;
1855 return 0;
1858 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1860 uint32_t *hc = (uint32_t*)buf;
1861 struct kvm_ppc_pvinfo pvinfo;
1863 if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1864 memcpy(buf, pvinfo.hcall, buf_len);
1865 return 0;
1869 * Fallback to always fail hypercalls regardless of endianness:
1871 * tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
1872 * li r3, -1
1873 * b .+8 (becomes nop in wrong endian)
1874 * bswap32(li r3, -1)
1877 hc[0] = cpu_to_be32(0x08000048);
1878 hc[1] = cpu_to_be32(0x3860ffff);
1879 hc[2] = cpu_to_be32(0x48000008);
1880 hc[3] = cpu_to_be32(bswap32(0x3860ffff));
1882 return 0;
1885 void kvmppc_set_papr(PowerPCCPU *cpu)
1887 CPUState *cs = CPU(cpu);
1888 int ret;
1890 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
1891 if (ret) {
1892 cpu_abort(cs, "This KVM version does not support PAPR\n");
1895 /* Update the capability flag so we sync the right information
1896 * with kvm */
1897 cap_papr = 1;
1900 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t cpu_version)
1902 return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &cpu_version);
1905 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
1907 CPUState *cs = CPU(cpu);
1908 int ret;
1910 ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
1911 if (ret && mpic_proxy) {
1912 cpu_abort(cs, "This KVM version does not support EPR\n");
1916 int kvmppc_smt_threads(void)
1918 return cap_ppc_smt ? cap_ppc_smt : 1;
1921 #ifdef TARGET_PPC64
1922 off_t kvmppc_alloc_rma(void **rma)
1924 off_t size;
1925 int fd;
1926 struct kvm_allocate_rma ret;
1928 /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
1929 * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
1930 * not necessary on this hardware
1931 * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
1933 * FIXME: We should allow the user to force contiguous RMA
1934 * allocation in the cap_ppc_rma==1 case.
1936 if (cap_ppc_rma < 2) {
1937 return 0;
1940 fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
1941 if (fd < 0) {
1942 fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
1943 strerror(errno));
1944 return -1;
1947 size = MIN(ret.rma_size, 256ul << 20);
1949 *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1950 if (*rma == MAP_FAILED) {
1951 fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
1952 return -1;
1955 return size;
1958 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
1960 struct kvm_ppc_smmu_info info;
1961 long rampagesize, best_page_shift;
1962 int i;
1964 if (cap_ppc_rma >= 2) {
1965 return current_size;
1968 /* Find the largest hardware supported page size that's less than
1969 * or equal to the (logical) backing page size of guest RAM */
1970 kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
1971 rampagesize = getrampagesize();
1972 best_page_shift = 0;
1974 for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
1975 struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
1977 if (!sps->page_shift) {
1978 continue;
1981 if ((sps->page_shift > best_page_shift)
1982 && ((1UL << sps->page_shift) <= rampagesize)) {
1983 best_page_shift = sps->page_shift;
1987 return MIN(current_size,
1988 1ULL << (best_page_shift + hash_shift - 7));
1990 #endif
1992 bool kvmppc_spapr_use_multitce(void)
1994 return cap_spapr_multitce;
1997 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
1998 bool vfio_accel)
2000 struct kvm_create_spapr_tce args = {
2001 .liobn = liobn,
2002 .window_size = window_size,
2004 long len;
2005 int fd;
2006 void *table;
2008 /* Must set fd to -1 so we don't try to munmap when called for
2009 * destroying the table, which the upper layers -will- do
2011 *pfd = -1;
2012 if (!cap_spapr_tce || (vfio_accel && !cap_spapr_vfio)) {
2013 return NULL;
2016 fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2017 if (fd < 0) {
2018 fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2019 liobn);
2020 return NULL;
2023 len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
2024 /* FIXME: round this up to page size */
2026 table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2027 if (table == MAP_FAILED) {
2028 fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2029 liobn);
2030 close(fd);
2031 return NULL;
2034 *pfd = fd;
2035 return table;
2038 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2040 long len;
2042 if (fd < 0) {
2043 return -1;
2046 len = nb_table * sizeof(uint64_t);
2047 if ((munmap(table, len) < 0) ||
2048 (close(fd) < 0)) {
2049 fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2050 strerror(errno));
2051 /* Leak the table */
2054 return 0;
2057 int kvmppc_reset_htab(int shift_hint)
2059 uint32_t shift = shift_hint;
2061 if (!kvm_enabled()) {
2062 /* Full emulation, tell caller to allocate htab itself */
2063 return 0;
2065 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2066 int ret;
2067 ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2068 if (ret == -ENOTTY) {
2069 /* At least some versions of PR KVM advertise the
2070 * capability, but don't implement the ioctl(). Oops.
2071 * Return 0 so that we allocate the htab in qemu, as is
2072 * correct for PR. */
2073 return 0;
2074 } else if (ret < 0) {
2075 return ret;
2077 return shift;
2080 /* We have a kernel that predates the htab reset calls. For PR
2081 * KVM, we need to allocate the htab ourselves, for an HV KVM of
2082 * this era, it has allocated a 16MB fixed size hash table
2083 * already. Kernels of this era have the GET_PVINFO capability
2084 * only on PR, so we use this hack to determine the right
2085 * answer */
2086 if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
2087 /* PR - tell caller to allocate htab */
2088 return 0;
2089 } else {
2090 /* HV - assume 16MB kernel allocated htab */
2091 return 24;
2095 static inline uint32_t mfpvr(void)
2097 uint32_t pvr;
2099 asm ("mfpvr %0"
2100 : "=r"(pvr));
2101 return pvr;
2104 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2106 if (on) {
2107 *word |= flags;
2108 } else {
2109 *word &= ~flags;
2113 static void kvmppc_host_cpu_initfn(Object *obj)
2115 assert(kvm_enabled());
2118 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2120 PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2121 uint32_t vmx = kvmppc_get_vmx();
2122 uint32_t dfp = kvmppc_get_dfp();
2123 uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2124 uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2126 /* Now fix up the class with information we can query from the host */
2127 pcc->pvr = mfpvr();
2129 if (vmx != -1) {
2130 /* Only override when we know what the host supports */
2131 alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
2132 alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
2134 if (dfp != -1) {
2135 /* Only override when we know what the host supports */
2136 alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
2139 if (dcache_size != -1) {
2140 pcc->l1_dcache_size = dcache_size;
2143 if (icache_size != -1) {
2144 pcc->l1_icache_size = icache_size;
2148 bool kvmppc_has_cap_epr(void)
2150 return cap_epr;
2153 bool kvmppc_has_cap_htab_fd(void)
2155 return cap_htab_fd;
2158 bool kvmppc_has_cap_fixup_hcalls(void)
2160 return cap_fixup_hcalls;
2163 static PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc)
2165 ObjectClass *oc = OBJECT_CLASS(pcc);
2167 while (oc && !object_class_is_abstract(oc)) {
2168 oc = object_class_get_parent(oc);
2170 assert(oc);
2172 return POWERPC_CPU_CLASS(oc);
2175 static int kvm_ppc_register_host_cpu_type(void)
2177 TypeInfo type_info = {
2178 .name = TYPE_HOST_POWERPC_CPU,
2179 .instance_init = kvmppc_host_cpu_initfn,
2180 .class_init = kvmppc_host_cpu_class_init,
2182 uint32_t host_pvr = mfpvr();
2183 PowerPCCPUClass *pvr_pcc;
2184 DeviceClass *dc;
2186 pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2187 if (pvr_pcc == NULL) {
2188 pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2190 if (pvr_pcc == NULL) {
2191 return -1;
2193 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2194 type_register(&type_info);
2196 /* Register generic family CPU class for a family */
2197 pvr_pcc = ppc_cpu_get_family_class(pvr_pcc);
2198 dc = DEVICE_CLASS(pvr_pcc);
2199 type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2200 type_info.name = g_strdup_printf("%s-"TYPE_POWERPC_CPU, dc->desc);
2201 type_register(&type_info);
2203 return 0;
2206 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2208 struct kvm_rtas_token_args args = {
2209 .token = token,
2212 if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2213 return -ENOENT;
2216 strncpy(args.name, function, sizeof(args.name));
2218 return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2221 int kvmppc_get_htab_fd(bool write)
2223 struct kvm_get_htab_fd s = {
2224 .flags = write ? KVM_GET_HTAB_WRITE : 0,
2225 .start_index = 0,
2228 if (!cap_htab_fd) {
2229 fprintf(stderr, "KVM version doesn't support saving the hash table\n");
2230 return -1;
2233 return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2236 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2238 int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2239 uint8_t buf[bufsize];
2240 ssize_t rc;
2242 do {
2243 rc = read(fd, buf, bufsize);
2244 if (rc < 0) {
2245 fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2246 strerror(errno));
2247 return rc;
2248 } else if (rc) {
2249 /* Kernel already retuns data in BE format for the file */
2250 qemu_put_buffer(f, buf, rc);
2252 } while ((rc != 0)
2253 && ((max_ns < 0)
2254 || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2256 return (rc == 0) ? 1 : 0;
2259 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2260 uint16_t n_valid, uint16_t n_invalid)
2262 struct kvm_get_htab_header *buf;
2263 size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2264 ssize_t rc;
2266 buf = alloca(chunksize);
2267 /* This is KVM on ppc, so this is all big-endian */
2268 buf->index = index;
2269 buf->n_valid = n_valid;
2270 buf->n_invalid = n_invalid;
2272 qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2274 rc = write(fd, buf, chunksize);
2275 if (rc < 0) {
2276 fprintf(stderr, "Error writing KVM hash table: %s\n",
2277 strerror(errno));
2278 return rc;
2280 if (rc != chunksize) {
2281 /* We should never get a short write on a single chunk */
2282 fprintf(stderr, "Short write, restoring KVM hash table\n");
2283 return -1;
2285 return 0;
2288 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2290 return true;
2293 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
2295 return 1;
2298 int kvm_arch_on_sigbus(int code, void *addr)
2300 return 1;
2303 void kvm_arch_init_irq_routing(KVMState *s)
2307 struct kvm_get_htab_buf {
2308 struct kvm_get_htab_header header;
2310 * We require one extra byte for read
2312 target_ulong hpte[(HPTES_PER_GROUP * 2) + 1];
2315 uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index)
2317 int htab_fd;
2318 struct kvm_get_htab_fd ghf;
2319 struct kvm_get_htab_buf *hpte_buf;
2321 ghf.flags = 0;
2322 ghf.start_index = pte_index;
2323 htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2324 if (htab_fd < 0) {
2325 goto error_out;
2328 hpte_buf = g_malloc0(sizeof(*hpte_buf));
2330 * Read the hpte group
2332 if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) {
2333 goto out_close;
2336 close(htab_fd);
2337 return (uint64_t)(uintptr_t) hpte_buf->hpte;
2339 out_close:
2340 g_free(hpte_buf);
2341 close(htab_fd);
2342 error_out:
2343 return 0;
2346 void kvmppc_hash64_free_pteg(uint64_t token)
2348 struct kvm_get_htab_buf *htab_buf;
2350 htab_buf = container_of((void *)(uintptr_t) token, struct kvm_get_htab_buf,
2351 hpte);
2352 g_free(htab_buf);
2353 return;
2356 void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
2357 target_ulong pte0, target_ulong pte1)
2359 int htab_fd;
2360 struct kvm_get_htab_fd ghf;
2361 struct kvm_get_htab_buf hpte_buf;
2363 ghf.flags = 0;
2364 ghf.start_index = 0; /* Ignored */
2365 htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2366 if (htab_fd < 0) {
2367 goto error_out;
2370 hpte_buf.header.n_valid = 1;
2371 hpte_buf.header.n_invalid = 0;
2372 hpte_buf.header.index = pte_index;
2373 hpte_buf.hpte[0] = pte0;
2374 hpte_buf.hpte[1] = pte1;
2376 * Write the hpte entry.
2377 * CAUTION: write() has the warn_unused_result attribute. Hence we
2378 * need to check the return value, even though we do nothing.
2380 if (write(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) {
2381 goto out_close;
2384 out_close:
2385 close(htab_fd);
2386 return;
2388 error_out:
2389 return;