PPC: Enable to use PAPR with PR style KVM
[qemu/kevin.git] / target-ppc / kvm.c
blobf65b6e1f4abc31695e2187b72f116cf87b57228f
1 /*
2 * PowerPC implementation of KVM hooks
4 * Copyright IBM Corp. 2007
5 * Copyright (C) 2011 Freescale Semiconductor, Inc.
7 * Authors:
8 * Jerone Young <jyoung5@us.ibm.com>
9 * Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
10 * Hollis Blanchard <hollisb@us.ibm.com>
12 * This work is licensed under the terms of the GNU GPL, version 2 or later.
13 * See the COPYING file in the top-level directory.
17 #include <dirent.h>
18 #include <sys/types.h>
19 #include <sys/ioctl.h>
20 #include <sys/mman.h>
22 #include <linux/kvm.h>
24 #include "qemu-common.h"
25 #include "qemu-timer.h"
26 #include "sysemu.h"
27 #include "kvm.h"
28 #include "kvm_ppc.h"
29 #include "cpu.h"
30 #include "device_tree.h"
32 #include "hw/sysbus.h"
33 #include "hw/spapr.h"
34 #include "hw/spapr_vio.h"
36 //#define DEBUG_KVM
38 #ifdef DEBUG_KVM
39 #define dprintf(fmt, ...) \
40 do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
41 #else
42 #define dprintf(fmt, ...) \
43 do { } while (0)
44 #endif
46 #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/"
48 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
49 KVM_CAP_LAST_INFO
52 static int cap_interrupt_unset = false;
53 static int cap_interrupt_level = false;
54 static int cap_segstate;
55 static int cap_booke_sregs;
57 /* XXX We have a race condition where we actually have a level triggered
58 * interrupt, but the infrastructure can't expose that yet, so the guest
59 * takes but ignores it, goes to sleep and never gets notified that there's
60 * still an interrupt pending.
62 * As a quick workaround, let's just wake up again 20 ms after we injected
63 * an interrupt. That way we can assure that we're always reinjecting
64 * interrupts in case the guest swallowed them.
66 static QEMUTimer *idle_timer;
68 static void kvm_kick_env(void *env)
70 qemu_cpu_kick(env);
73 int kvm_arch_init(KVMState *s)
75 cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
76 cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
77 cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
78 cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
80 if (!cap_interrupt_level) {
81 fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
82 "VM to stall at times!\n");
85 return 0;
88 static int kvm_arch_sync_sregs(CPUState *cenv)
90 struct kvm_sregs sregs;
91 int ret;
93 if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
94 /* What we're really trying to say is "if we're on BookE, we use
95 the native PVR for now". This is the only sane way to check
96 it though, so we potentially confuse users that they can run
97 BookE guests on BookS. Let's hope nobody dares enough :) */
98 return 0;
99 } else {
100 if (!cap_segstate) {
101 fprintf(stderr, "kvm error: missing PVR setting capability\n");
102 return -ENOSYS;
106 ret = kvm_vcpu_ioctl(cenv, KVM_GET_SREGS, &sregs);
107 if (ret) {
108 return ret;
111 sregs.pvr = cenv->spr[SPR_PVR];
112 return kvm_vcpu_ioctl(cenv, KVM_SET_SREGS, &sregs);
115 int kvm_arch_init_vcpu(CPUState *cenv)
117 int ret;
119 ret = kvm_arch_sync_sregs(cenv);
120 if (ret) {
121 return ret;
124 idle_timer = qemu_new_timer_ns(vm_clock, kvm_kick_env, cenv);
126 return ret;
129 void kvm_arch_reset_vcpu(CPUState *env)
133 int kvm_arch_put_registers(CPUState *env, int level)
135 struct kvm_regs regs;
136 int ret;
137 int i;
139 ret = kvm_vcpu_ioctl(env, KVM_GET_REGS, &regs);
140 if (ret < 0)
141 return ret;
143 regs.ctr = env->ctr;
144 regs.lr = env->lr;
145 regs.xer = env->xer;
146 regs.msr = env->msr;
147 regs.pc = env->nip;
149 regs.srr0 = env->spr[SPR_SRR0];
150 regs.srr1 = env->spr[SPR_SRR1];
152 regs.sprg0 = env->spr[SPR_SPRG0];
153 regs.sprg1 = env->spr[SPR_SPRG1];
154 regs.sprg2 = env->spr[SPR_SPRG2];
155 regs.sprg3 = env->spr[SPR_SPRG3];
156 regs.sprg4 = env->spr[SPR_SPRG4];
157 regs.sprg5 = env->spr[SPR_SPRG5];
158 regs.sprg6 = env->spr[SPR_SPRG6];
159 regs.sprg7 = env->spr[SPR_SPRG7];
161 regs.pid = env->spr[SPR_BOOKE_PID];
163 for (i = 0;i < 32; i++)
164 regs.gpr[i] = env->gpr[i];
166 ret = kvm_vcpu_ioctl(env, KVM_SET_REGS, &regs);
167 if (ret < 0)
168 return ret;
170 return ret;
173 int kvm_arch_get_registers(CPUState *env)
175 struct kvm_regs regs;
176 struct kvm_sregs sregs;
177 uint32_t cr;
178 int i, ret;
180 ret = kvm_vcpu_ioctl(env, KVM_GET_REGS, &regs);
181 if (ret < 0)
182 return ret;
184 cr = regs.cr;
185 for (i = 7; i >= 0; i--) {
186 env->crf[i] = cr & 15;
187 cr >>= 4;
190 env->ctr = regs.ctr;
191 env->lr = regs.lr;
192 env->xer = regs.xer;
193 env->msr = regs.msr;
194 env->nip = regs.pc;
196 env->spr[SPR_SRR0] = regs.srr0;
197 env->spr[SPR_SRR1] = regs.srr1;
199 env->spr[SPR_SPRG0] = regs.sprg0;
200 env->spr[SPR_SPRG1] = regs.sprg1;
201 env->spr[SPR_SPRG2] = regs.sprg2;
202 env->spr[SPR_SPRG3] = regs.sprg3;
203 env->spr[SPR_SPRG4] = regs.sprg4;
204 env->spr[SPR_SPRG5] = regs.sprg5;
205 env->spr[SPR_SPRG6] = regs.sprg6;
206 env->spr[SPR_SPRG7] = regs.sprg7;
208 env->spr[SPR_BOOKE_PID] = regs.pid;
210 for (i = 0;i < 32; i++)
211 env->gpr[i] = regs.gpr[i];
213 if (cap_booke_sregs) {
214 ret = kvm_vcpu_ioctl(env, KVM_GET_SREGS, &sregs);
215 if (ret < 0) {
216 return ret;
219 if (sregs.u.e.features & KVM_SREGS_E_BASE) {
220 env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
221 env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
222 env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
223 env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
224 env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
225 env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
226 env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
227 env->spr[SPR_DECR] = sregs.u.e.dec;
228 env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
229 env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
230 env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
233 if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
234 env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
235 env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
236 env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
237 env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
238 env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
241 if (sregs.u.e.features & KVM_SREGS_E_64) {
242 env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
245 if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
246 env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
249 if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
250 env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
251 env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
252 env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
253 env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
254 env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
255 env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
256 env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
257 env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
258 env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
259 env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
260 env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
261 env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
262 env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
263 env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
264 env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
265 env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
267 if (sregs.u.e.features & KVM_SREGS_E_SPE) {
268 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
269 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
270 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
273 if (sregs.u.e.features & KVM_SREGS_E_PM) {
274 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
277 if (sregs.u.e.features & KVM_SREGS_E_PC) {
278 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
279 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
283 if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
284 env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
285 env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
286 env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
287 env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
288 env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
289 env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
290 env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
291 env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
292 env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
293 env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
296 if (sregs.u.e.features & KVM_SREGS_EXP) {
297 env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
300 if (sregs.u.e.features & KVM_SREGS_E_PD) {
301 env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
302 env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
305 if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
306 env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
307 env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
308 env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
310 if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
311 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
312 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
317 if (cap_segstate) {
318 ret = kvm_vcpu_ioctl(env, KVM_GET_SREGS, &sregs);
319 if (ret < 0) {
320 return ret;
323 ppc_store_sdr1(env, sregs.u.s.sdr1);
325 /* Sync SLB */
326 #ifdef TARGET_PPC64
327 for (i = 0; i < 64; i++) {
328 ppc_store_slb(env, sregs.u.s.ppc64.slb[i].slbe,
329 sregs.u.s.ppc64.slb[i].slbv);
331 #endif
333 /* Sync SRs */
334 for (i = 0; i < 16; i++) {
335 env->sr[i] = sregs.u.s.ppc32.sr[i];
338 /* Sync BATs */
339 for (i = 0; i < 8; i++) {
340 env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
341 env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
342 env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
343 env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
347 return 0;
350 int kvmppc_set_interrupt(CPUState *env, int irq, int level)
352 unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
354 if (irq != PPC_INTERRUPT_EXT) {
355 return 0;
358 if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
359 return 0;
362 kvm_vcpu_ioctl(env, KVM_INTERRUPT, &virq);
364 return 0;
367 #if defined(TARGET_PPCEMB)
368 #define PPC_INPUT_INT PPC40x_INPUT_INT
369 #elif defined(TARGET_PPC64)
370 #define PPC_INPUT_INT PPC970_INPUT_INT
371 #else
372 #define PPC_INPUT_INT PPC6xx_INPUT_INT
373 #endif
375 void kvm_arch_pre_run(CPUState *env, struct kvm_run *run)
377 int r;
378 unsigned irq;
380 /* PowerPC Qemu tracks the various core input pins (interrupt, critical
381 * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
382 if (!cap_interrupt_level &&
383 run->ready_for_interrupt_injection &&
384 (env->interrupt_request & CPU_INTERRUPT_HARD) &&
385 (env->irq_input_state & (1<<PPC_INPUT_INT)))
387 /* For now KVM disregards the 'irq' argument. However, in the
388 * future KVM could cache it in-kernel to avoid a heavyweight exit
389 * when reading the UIC.
391 irq = KVM_INTERRUPT_SET;
393 dprintf("injected interrupt %d\n", irq);
394 r = kvm_vcpu_ioctl(env, KVM_INTERRUPT, &irq);
395 if (r < 0)
396 printf("cpu %d fail inject %x\n", env->cpu_index, irq);
398 /* Always wake up soon in case the interrupt was level based */
399 qemu_mod_timer(idle_timer, qemu_get_clock_ns(vm_clock) +
400 (get_ticks_per_sec() / 50));
403 /* We don't know if there are more interrupts pending after this. However,
404 * the guest will return to userspace in the course of handling this one
405 * anyways, so we will get a chance to deliver the rest. */
408 void kvm_arch_post_run(CPUState *env, struct kvm_run *run)
412 int kvm_arch_process_async_events(CPUState *env)
414 return 0;
417 static int kvmppc_handle_halt(CPUState *env)
419 if (!(env->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
420 env->halted = 1;
421 env->exception_index = EXCP_HLT;
424 return 0;
427 /* map dcr access to existing qemu dcr emulation */
428 static int kvmppc_handle_dcr_read(CPUState *env, uint32_t dcrn, uint32_t *data)
430 if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
431 fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
433 return 0;
436 static int kvmppc_handle_dcr_write(CPUState *env, uint32_t dcrn, uint32_t data)
438 if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
439 fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
441 return 0;
444 int kvm_arch_handle_exit(CPUState *env, struct kvm_run *run)
446 int ret;
448 switch (run->exit_reason) {
449 case KVM_EXIT_DCR:
450 if (run->dcr.is_write) {
451 dprintf("handle dcr write\n");
452 ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
453 } else {
454 dprintf("handle dcr read\n");
455 ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
457 break;
458 case KVM_EXIT_HLT:
459 dprintf("handle halt\n");
460 ret = kvmppc_handle_halt(env);
461 break;
462 #ifdef CONFIG_PSERIES
463 case KVM_EXIT_PAPR_HCALL:
464 dprintf("handle PAPR hypercall\n");
465 run->papr_hcall.ret = spapr_hypercall(env, run->papr_hcall.nr,
466 run->papr_hcall.args);
467 ret = 1;
468 break;
469 #endif
470 default:
471 fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
472 ret = -1;
473 break;
476 return ret;
479 static int read_cpuinfo(const char *field, char *value, int len)
481 FILE *f;
482 int ret = -1;
483 int field_len = strlen(field);
484 char line[512];
486 f = fopen("/proc/cpuinfo", "r");
487 if (!f) {
488 return -1;
491 do {
492 if(!fgets(line, sizeof(line), f)) {
493 break;
495 if (!strncmp(line, field, field_len)) {
496 strncpy(value, line, len);
497 ret = 0;
498 break;
500 } while(*line);
502 fclose(f);
504 return ret;
507 uint32_t kvmppc_get_tbfreq(void)
509 char line[512];
510 char *ns;
511 uint32_t retval = get_ticks_per_sec();
513 if (read_cpuinfo("timebase", line, sizeof(line))) {
514 return retval;
517 if (!(ns = strchr(line, ':'))) {
518 return retval;
521 ns++;
523 retval = atoi(ns);
524 return retval;
527 /* Try to find a device tree node for a CPU with clock-frequency property */
528 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
530 struct dirent *dirp;
531 DIR *dp;
533 if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
534 printf("Can't open directory " PROC_DEVTREE_CPU "\n");
535 return -1;
538 buf[0] = '\0';
539 while ((dirp = readdir(dp)) != NULL) {
540 FILE *f;
541 snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
542 dirp->d_name);
543 f = fopen(buf, "r");
544 if (f) {
545 snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
546 fclose(f);
547 break;
549 buf[0] = '\0';
551 closedir(dp);
552 if (buf[0] == '\0') {
553 printf("Unknown host!\n");
554 return -1;
557 return 0;
560 uint64_t kvmppc_get_clockfreq(void)
562 char buf[512];
563 uint32_t tb[2];
564 FILE *f;
565 int len;
567 if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
568 return 0;
571 strncat(buf, "/clock-frequency", sizeof(buf) - strlen(buf));
573 f = fopen(buf, "rb");
574 if (!f) {
575 return -1;
578 len = fread(tb, sizeof(tb[0]), 2, f);
579 fclose(f);
580 switch (len) {
581 case 1:
582 /* freq is only a single cell */
583 return tb[0];
584 case 2:
585 return *(uint64_t*)tb;
588 return 0;
591 int kvmppc_get_hypercall(CPUState *env, uint8_t *buf, int buf_len)
593 uint32_t *hc = (uint32_t*)buf;
595 struct kvm_ppc_pvinfo pvinfo;
597 if (kvm_check_extension(env->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
598 !kvm_vm_ioctl(env->kvm_state, KVM_PPC_GET_PVINFO, &pvinfo)) {
599 memcpy(buf, pvinfo.hcall, buf_len);
601 return 0;
605 * Fallback to always fail hypercalls:
607 * li r3, -1
608 * nop
609 * nop
610 * nop
613 hc[0] = 0x3860ffff;
614 hc[1] = 0x60000000;
615 hc[2] = 0x60000000;
616 hc[3] = 0x60000000;
618 return 0;
621 void kvmppc_set_papr(CPUState *env)
623 struct kvm_enable_cap cap;
624 int ret;
626 memset(&cap, 0, sizeof(cap));
627 cap.cap = KVM_CAP_PPC_PAPR;
628 ret = kvm_vcpu_ioctl(env, KVM_ENABLE_CAP, &cap);
630 if (ret) {
631 goto fail;
635 * XXX We set HIOR here. It really should be a qdev property of
636 * the CPU node, but we don't have CPUs converted to qdev yet.
638 * Once we have qdev CPUs, move HIOR to a qdev property and
639 * remove this chunk.
641 /* XXX Set HIOR using new ioctl */
643 return;
645 fail:
646 cpu_abort(env, "This KVM version does not support PAPR\n");
649 bool kvm_arch_stop_on_emulation_error(CPUState *env)
651 return true;
654 int kvm_arch_on_sigbus_vcpu(CPUState *env, int code, void *addr)
656 return 1;
659 int kvm_arch_on_sigbus(int code, void *addr)
661 return 1;