target-ppc/kvm.c

   1 /*
   2  * PowerPC implementation of KVM hooks
   3  *
   4  * Copyright IBM Corp. 2007
   5  * Copyright (C) 2011 Freescale Semiconductor, Inc.
   6  *
   7  * Authors:
   8  *  Jerone Young <jyoung5@us.ibm.com>
   9  *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
  10  *  Hollis Blanchard <hollisb@us.ibm.com>
  11  *
  12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  13  * See the COPYING file in the top-level directory.
  14  *
  15  */
  16
  17 #include <dirent.h>
  18 #include <sys/types.h>
  19 #include <sys/ioctl.h>
  20 #include <sys/mman.h>
  21 #include <sys/vfs.h>
  22
  23 #include <linux/kvm.h>
  24
  25 #include "qemu-common.h"
  26 #include "qemu/timer.h"
  27 #include "sysemu/sysemu.h"
  28 #include "sysemu/kvm.h"
  29 #include "kvm_ppc.h"
  30 #include "cpu.h"
  31 #include "sysemu/cpus.h"
  32 #include "sysemu/device_tree.h"
  33 #include "hw/sysbus.h"
  34 #include "hw/spapr.h"
  35
  36 #include "hw/sysbus.h"
  37 #include "hw/spapr.h"
  38 #include "hw/spapr_vio.h"
  39
  40 //#define DEBUG_KVM
  41
  42 #ifdef DEBUG_KVM
  43 #define dprintf(fmt, ...) \
  44     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
  45 #else
  46 #define dprintf(fmt, ...) \
  47     do { } while (0)
  48 #endif
  49
  50 #define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
  51
  52 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
  53     KVM_CAP_LAST_INFO
  54 };
  55
  56 static int cap_interrupt_unset = false;
  57 static int cap_interrupt_level = false;
  58 static int cap_segstate;
  59 static int cap_booke_sregs;
  60 static int cap_ppc_smt;
  61 static int cap_ppc_rma;
  62 static int cap_spapr_tce;
  63 static int cap_hior;
  64 static int cap_one_reg;
  65
  66 /* XXX We have a race condition where we actually have a level triggered
  67  *     interrupt, but the infrastructure can't expose that yet, so the guest
  68  *     takes but ignores it, goes to sleep and never gets notified that there's
  69  *     still an interrupt pending.
  70  *
  71  *     As a quick workaround, let's just wake up again 20 ms after we injected
  72  *     an interrupt. That way we can assure that we're always reinjecting
  73  *     interrupts in case the guest swallowed them.
  74  */
  75 static QEMUTimer *idle_timer;
  76
  77 static void kvm_kick_cpu(void *opaque)
  78 {
  79     PowerPCCPU *cpu = opaque;
  80
  81     qemu_cpu_kick(CPU(cpu));
  82 }
  83
  84 static int kvm_ppc_register_host_cpu_type(void);
  85
  86 int kvm_arch_init(KVMState *s)
  87 {
  88     cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
  89     cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
  90     cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
  91     cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
  92     cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
  93     cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
  94     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
  95     cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
  96     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
  97
  98     if (!cap_interrupt_level) {
  99         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
 100                         "VM to stall at times!\n");
 101     }
 102
 103     kvm_ppc_register_host_cpu_type();
 104
 105     return 0;
 106 }
 107
 108 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
 109 {
 110     CPUPPCState *cenv = &cpu->env;
 111     CPUState *cs = CPU(cpu);
 112     struct kvm_sregs sregs;
 113     int ret;
 114
 115     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 116         /* What we're really trying to say is "if we're on BookE, we use
 117            the native PVR for now". This is the only sane way to check
 118            it though, so we potentially confuse users that they can run
 119            BookE guests on BookS. Let's hope nobody dares enough :) */
 120         return 0;
 121     } else {
 122         if (!cap_segstate) {
 123             fprintf(stderr, "kvm error: missing PVR setting capability\n");
 124             return -ENOSYS;
 125         }
 126     }
 127
 128     ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
 129     if (ret) {
 130         return ret;
 131     }
 132
 133     sregs.pvr = cenv->spr[SPR_PVR];
 134     return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
 135 }
 136
 137 /* Set up a shared TLB array with KVM */
 138 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
 139 {
 140     CPUPPCState *env = &cpu->env;
 141     CPUState *cs = CPU(cpu);
 142     struct kvm_book3e_206_tlb_params params = {};
 143     struct kvm_config_tlb cfg = {};
 144     struct kvm_enable_cap encap = {};
 145     unsigned int entries = 0;
 146     int ret, i;
 147
 148     if (!kvm_enabled() ||
 149         !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
 150         return 0;
 151     }
 152
 153     assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
 154
 155     for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
 156         params.tlb_sizes[i] = booke206_tlb_size(env, i);
 157         params.tlb_ways[i] = booke206_tlb_ways(env, i);
 158         entries += params.tlb_sizes[i];
 159     }
 160
 161     assert(entries == env->nb_tlb);
 162     assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
 163
 164     env->tlb_dirty = true;
 165
 166     cfg.array = (uintptr_t)env->tlb.tlbm;
 167     cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
 168     cfg.params = (uintptr_t)&params;
 169     cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
 170
 171     encap.cap = KVM_CAP_SW_TLB;
 172     encap.args[0] = (uintptr_t)&cfg;
 173
 174     ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &encap);
 175     if (ret < 0) {
 176         fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
 177                 __func__, strerror(-ret));
 178         return ret;
 179     }
 180
 181     env->kvm_sw_tlb = true;
 182     return 0;
 183 }
 184
 185
 186 #if defined(TARGET_PPC64)
 187 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
 188                                        struct kvm_ppc_smmu_info *info)
 189 {
 190     CPUPPCState *env = &cpu->env;
 191     CPUState *cs = CPU(cpu);
 192
 193     memset(info, 0, sizeof(*info));
 194
 195     /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
 196      * need to "guess" what the supported page sizes are.
 197      *
 198      * For that to work we make a few assumptions:
 199      *
 200      * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
 201      *   KVM which only supports 4K and 16M pages, but supports them
 202      *   regardless of the backing store characteritics. We also don't
 203      *   support 1T segments.
 204      *
 205      *   This is safe as if HV KVM ever supports that capability or PR
 206      *   KVM grows supports for more page/segment sizes, those versions
 207      *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
 208      *   will not hit this fallback
 209      *
 210      * - Else we are running HV KVM. This means we only support page
 211      *   sizes that fit in the backing store. Additionally we only
 212      *   advertize 64K pages if the processor is ARCH 2.06 and we assume
 213      *   P7 encodings for the SLB and hash table. Here too, we assume
 214      *   support for any newer processor will mean a kernel that
 215      *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
 216      *   this fallback.
 217      */
 218     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
 219         /* No flags */
 220         info->flags = 0;
 221         info->slb_size = 64;
 222
 223         /* Standard 4k base page size segment */
 224         info->sps[0].page_shift = 12;
 225         info->sps[0].slb_enc = 0;
 226         info->sps[0].enc[0].page_shift = 12;
 227         info->sps[0].enc[0].pte_enc = 0;
 228
 229         /* Standard 16M large page size segment */
 230         info->sps[1].page_shift = 24;
 231         info->sps[1].slb_enc = SLB_VSID_L;
 232         info->sps[1].enc[0].page_shift = 24;
 233         info->sps[1].enc[0].pte_enc = 0;
 234     } else {
 235         int i = 0;
 236
 237         /* HV KVM has backing store size restrictions */
 238         info->flags = KVM_PPC_PAGE_SIZES_REAL;
 239
 240         if (env->mmu_model & POWERPC_MMU_1TSEG) {
 241             info->flags |= KVM_PPC_1T_SEGMENTS;
 242         }
 243
 244         if (env->mmu_model == POWERPC_MMU_2_06) {
 245             info->slb_size = 32;
 246         } else {
 247             info->slb_size = 64;
 248         }
 249
 250         /* Standard 4k base page size segment */
 251         info->sps[i].page_shift = 12;
 252         info->sps[i].slb_enc = 0;
 253         info->sps[i].enc[0].page_shift = 12;
 254         info->sps[i].enc[0].pte_enc = 0;
 255         i++;
 256
 257         /* 64K on MMU 2.06 */
 258         if (env->mmu_model == POWERPC_MMU_2_06) {
 259             info->sps[i].page_shift = 16;
 260             info->sps[i].slb_enc = 0x110;
 261             info->sps[i].enc[0].page_shift = 16;
 262             info->sps[i].enc[0].pte_enc = 1;
 263             i++;
 264         }
 265
 266         /* Standard 16M large page size segment */
 267         info->sps[i].page_shift = 24;
 268         info->sps[i].slb_enc = SLB_VSID_L;
 269         info->sps[i].enc[0].page_shift = 24;
 270         info->sps[i].enc[0].pte_enc = 0;
 271     }
 272 }
 273
 274 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
 275 {
 276     CPUState *cs = CPU(cpu);
 277     int ret;
 278
 279     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
 280         ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
 281         if (ret == 0) {
 282             return;
 283         }
 284     }
 285
 286     kvm_get_fallback_smmu_info(cpu, info);
 287 }
 288
 289 static long getrampagesize(void)
 290 {
 291     struct statfs fs;
 292     int ret;
 293
 294     if (!mem_path) {
 295         /* guest RAM is backed by normal anonymous pages */
 296         return getpagesize();
 297     }
 298
 299     do {
 300         ret = statfs(mem_path, &fs);
 301     } while (ret != 0 && errno == EINTR);
 302
 303     if (ret != 0) {
 304         fprintf(stderr, "Couldn't statfs() memory path: %s\n",
 305                 strerror(errno));
 306         exit(1);
 307     }
 308
 309 #define HUGETLBFS_MAGIC       0x958458f6
 310
 311     if (fs.f_type != HUGETLBFS_MAGIC) {
 312         /* Explicit mempath, but it's ordinary pages */
 313         return getpagesize();
 314     }
 315
 316     /* It's hugepage, return the huge page size */
 317     return fs.f_bsize;
 318 }
 319
 320 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
 321 {
 322     if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
 323         return true;
 324     }
 325
 326     return (1ul << shift) <= rampgsize;
 327 }
 328
 329 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 330 {
 331     static struct kvm_ppc_smmu_info smmu_info;
 332     static bool has_smmu_info;
 333     CPUPPCState *env = &cpu->env;
 334     long rampagesize;
 335     int iq, ik, jq, jk;
 336
 337     /* We only handle page sizes for 64-bit server guests for now */
 338     if (!(env->mmu_model & POWERPC_MMU_64)) {
 339         return;
 340     }
 341
 342     /* Collect MMU info from kernel if not already */
 343     if (!has_smmu_info) {
 344         kvm_get_smmu_info(cpu, &smmu_info);
 345         has_smmu_info = true;
 346     }
 347
 348     rampagesize = getrampagesize();
 349
 350     /* Convert to QEMU form */
 351     memset(&env->sps, 0, sizeof(env->sps));
 352
 353     for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
 354         struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
 355         struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
 356
 357         if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 358                                  ksps->page_shift)) {
 359             continue;
 360         }
 361         qsps->page_shift = ksps->page_shift;
 362         qsps->slb_enc = ksps->slb_enc;
 363         for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
 364             if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 365                                      ksps->enc[jk].page_shift)) {
 366                 continue;
 367             }
 368             qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
 369             qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
 370             if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
 371                 break;
 372             }
 373         }
 374         if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
 375             break;
 376         }
 377     }
 378     env->slb_nr = smmu_info.slb_size;
 379     if (smmu_info.flags & KVM_PPC_1T_SEGMENTS) {
 380         env->mmu_model |= POWERPC_MMU_1TSEG;
 381     } else {
 382         env->mmu_model &= ~POWERPC_MMU_1TSEG;
 383     }
 384 }
 385 #else /* defined (TARGET_PPC64) */
 386
 387 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 388 {
 389 }
 390
 391 #endif /* !defined (TARGET_PPC64) */
 392
 393 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
 394 {
 395     return cpu->cpu_index;
 396 }
 397
 398 int kvm_arch_init_vcpu(CPUState *cs)
 399 {
 400     PowerPCCPU *cpu = POWERPC_CPU(cs);
 401     CPUPPCState *cenv = &cpu->env;
 402     int ret;
 403
 404     /* Gather server mmu info from KVM and update the CPU state */
 405     kvm_fixup_page_sizes(cpu);
 406
 407     /* Synchronize sregs with kvm */
 408     ret = kvm_arch_sync_sregs(cpu);
 409     if (ret) {
 410         return ret;
 411     }
 412
 413     idle_timer = qemu_new_timer_ns(vm_clock, kvm_kick_cpu, cpu);
 414
 415     /* Some targets support access to KVM's guest TLB. */
 416     switch (cenv->mmu_model) {
 417     case POWERPC_MMU_BOOKE206:
 418         ret = kvm_booke206_tlb_init(cpu);
 419         break;
 420     default:
 421         break;
 422     }
 423
 424     return ret;
 425 }
 426
 427 void kvm_arch_reset_vcpu(CPUState *cpu)
 428 {
 429 }
 430
 431 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
 432 {
 433     CPUPPCState *env = &cpu->env;
 434     CPUState *cs = CPU(cpu);
 435     struct kvm_dirty_tlb dirty_tlb;
 436     unsigned char *bitmap;
 437     int ret;
 438
 439     if (!env->kvm_sw_tlb) {
 440         return;
 441     }
 442
 443     bitmap = g_malloc((env->nb_tlb + 7) / 8);
 444     memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
 445
 446     dirty_tlb.bitmap = (uintptr_t)bitmap;
 447     dirty_tlb.num_dirty = env->nb_tlb;
 448
 449     ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
 450     if (ret) {
 451         fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
 452                 __func__, strerror(-ret));
 453     }
 454
 455     g_free(bitmap);
 456 }
 457
 458 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
 459 {
 460     PowerPCCPU *cpu = POWERPC_CPU(cs);
 461     CPUPPCState *env = &cpu->env;
 462     union {
 463         uint32_t u32;
 464         uint64_t u64;
 465     } val;
 466     struct kvm_one_reg reg = {
 467         .id = id,
 468         .addr = (uintptr_t) &val,
 469     };
 470     int ret;
 471
 472     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 473     if (ret != 0) {
 474         fprintf(stderr, "Warning: Unable to retrieve SPR %d from KVM: %s\n",
 475                 spr, strerror(errno));
 476     } else {
 477         switch (id & KVM_REG_SIZE_MASK) {
 478         case KVM_REG_SIZE_U32:
 479             env->spr[spr] = val.u32;
 480             break;
 481
 482         case KVM_REG_SIZE_U64:
 483             env->spr[spr] = val.u64;
 484             break;
 485
 486         default:
 487             /* Don't handle this size yet */
 488             abort();
 489         }
 490     }
 491 }
 492
 493 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
 494 {
 495     PowerPCCPU *cpu = POWERPC_CPU(cs);
 496     CPUPPCState *env = &cpu->env;
 497     union {
 498         uint32_t u32;
 499         uint64_t u64;
 500     } val;
 501     struct kvm_one_reg reg = {
 502         .id = id,
 503         .addr = (uintptr_t) &val,
 504     };
 505     int ret;
 506
 507     switch (id & KVM_REG_SIZE_MASK) {
 508     case KVM_REG_SIZE_U32:
 509         val.u32 = env->spr[spr];
 510         break;
 511
 512     case KVM_REG_SIZE_U64:
 513         val.u64 = env->spr[spr];
 514         break;
 515
 516     default:
 517         /* Don't handle this size yet */
 518         abort();
 519     }
 520
 521     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 522     if (ret != 0) {
 523         fprintf(stderr, "Warning: Unable to set SPR %d to KVM: %s\n",
 524                 spr, strerror(errno));
 525     }
 526 }
 527
 528 static int kvm_put_fp(CPUState *cs)
 529 {
 530     PowerPCCPU *cpu = POWERPC_CPU(cs);
 531     CPUPPCState *env = &cpu->env;
 532     struct kvm_one_reg reg;
 533     int i;
 534     int ret;
 535
 536     if (env->insns_flags & PPC_FLOAT) {
 537         uint64_t fpscr = env->fpscr;
 538         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 539
 540         reg.id = KVM_REG_PPC_FPSCR;
 541         reg.addr = (uintptr_t)&fpscr;
 542         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 543         if (ret < 0) {
 544             dprintf("Unable to set FPSCR to KVM: %s\n", strerror(errno));
 545             return ret;
 546         }
 547
 548         for (i = 0; i < 32; i++) {
 549             uint64_t vsr[2];
 550
 551             vsr[0] = float64_val(env->fpr[i]);
 552             vsr[1] = env->vsr[i];
 553             reg.addr = (uintptr_t) &vsr;
 554             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 555
 556             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 557             if (ret < 0) {
 558                 dprintf("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
 559                         i, strerror(errno));
 560                 return ret;
 561             }
 562         }
 563     }
 564
 565     if (env->insns_flags & PPC_ALTIVEC) {
 566         reg.id = KVM_REG_PPC_VSCR;
 567         reg.addr = (uintptr_t)&env->vscr;
 568         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 569         if (ret < 0) {
 570             dprintf("Unable to set VSCR to KVM: %s\n", strerror(errno));
 571             return ret;
 572         }
 573
 574         for (i = 0; i < 32; i++) {
 575             reg.id = KVM_REG_PPC_VR(i);
 576             reg.addr = (uintptr_t)&env->avr[i];
 577             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 578             if (ret < 0) {
 579                 dprintf("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
 580                 return ret;
 581             }
 582         }
 583     }
 584
 585     return 0;
 586 }
 587
 588 static int kvm_get_fp(CPUState *cs)
 589 {
 590     PowerPCCPU *cpu = POWERPC_CPU(cs);
 591     CPUPPCState *env = &cpu->env;
 592     struct kvm_one_reg reg;
 593     int i;
 594     int ret;
 595
 596     if (env->insns_flags & PPC_FLOAT) {
 597         uint64_t fpscr;
 598         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 599
 600         reg.id = KVM_REG_PPC_FPSCR;
 601         reg.addr = (uintptr_t)&fpscr;
 602         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 603         if (ret < 0) {
 604             dprintf("Unable to get FPSCR from KVM: %s\n", strerror(errno));
 605             return ret;
 606         } else {
 607             env->fpscr = fpscr;
 608         }
 609
 610         for (i = 0; i < 32; i++) {
 611             uint64_t vsr[2];
 612
 613             reg.addr = (uintptr_t) &vsr;
 614             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 615
 616             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 617             if (ret < 0) {
 618                 dprintf("Unable to get %s%d from KVM: %s\n",
 619                         vsx ? "VSR" : "FPR", i, strerror(errno));
 620                 return ret;
 621             } else {
 622                 env->fpr[i] = vsr[0];
 623                 if (vsx) {
 624                     env->vsr[i] = vsr[1];
 625                 }
 626             }
 627         }
 628     }
 629
 630     if (env->insns_flags & PPC_ALTIVEC) {
 631         reg.id = KVM_REG_PPC_VSCR;
 632         reg.addr = (uintptr_t)&env->vscr;
 633         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 634         if (ret < 0) {
 635             dprintf("Unable to get VSCR from KVM: %s\n", strerror(errno));
 636             return ret;
 637         }
 638
 639         for (i = 0; i < 32; i++) {
 640             reg.id = KVM_REG_PPC_VR(i);
 641             reg.addr = (uintptr_t)&env->avr[i];
 642             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 643             if (ret < 0) {
 644                 dprintf("Unable to get VR%d from KVM: %s\n",
 645                         i, strerror(errno));
 646                 return ret;
 647             }
 648         }
 649     }
 650
 651     return 0;
 652 }
 653
 654 int kvm_arch_put_registers(CPUState *cs, int level)
 655 {
 656     PowerPCCPU *cpu = POWERPC_CPU(cs);
 657     CPUPPCState *env = &cpu->env;
 658     struct kvm_regs regs;
 659     int ret;
 660     int i;
 661
 662     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
 663     if (ret < 0) {
 664         return ret;
 665     }
 666
 667     regs.ctr = env->ctr;
 668     regs.lr  = env->lr;
 669     regs.xer = cpu_read_xer(env);
 670     regs.msr = env->msr;
 671     regs.pc = env->nip;
 672
 673     regs.srr0 = env->spr[SPR_SRR0];
 674     regs.srr1 = env->spr[SPR_SRR1];
 675
 676     regs.sprg0 = env->spr[SPR_SPRG0];
 677     regs.sprg1 = env->spr[SPR_SPRG1];
 678     regs.sprg2 = env->spr[SPR_SPRG2];
 679     regs.sprg3 = env->spr[SPR_SPRG3];
 680     regs.sprg4 = env->spr[SPR_SPRG4];
 681     regs.sprg5 = env->spr[SPR_SPRG5];
 682     regs.sprg6 = env->spr[SPR_SPRG6];
 683     regs.sprg7 = env->spr[SPR_SPRG7];
 684
 685     regs.pid = env->spr[SPR_BOOKE_PID];
 686
 687     for (i = 0;i < 32; i++)
 688         regs.gpr[i] = env->gpr[i];
 689
 690     ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
 691     if (ret < 0)
 692         return ret;
 693
 694     kvm_put_fp(cs);
 695
 696     if (env->tlb_dirty) {
 697         kvm_sw_tlb_put(cpu);
 698         env->tlb_dirty = false;
 699     }
 700
 701     if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
 702         struct kvm_sregs sregs;
 703
 704         sregs.pvr = env->spr[SPR_PVR];
 705
 706         sregs.u.s.sdr1 = env->spr[SPR_SDR1];
 707
 708         /* Sync SLB */
 709 #ifdef TARGET_PPC64
 710         for (i = 0; i < 64; i++) {
 711             sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
 712             sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
 713         }
 714 #endif
 715
 716         /* Sync SRs */
 717         for (i = 0; i < 16; i++) {
 718             sregs.u.s.ppc32.sr[i] = env->sr[i];
 719         }
 720
 721         /* Sync BATs */
 722         for (i = 0; i < 8; i++) {
 723             /* Beware. We have to swap upper and lower bits here */
 724             sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
 725                 | env->DBAT[1][i];
 726             sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
 727                 | env->IBAT[1][i];
 728         }
 729
 730         ret = kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
 731         if (ret) {
 732             return ret;
 733         }
 734     }
 735
 736     if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
 737         kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
 738     }
 739
 740     if (cap_one_reg) {
 741         int i;
 742
 743         /* We deliberately ignore errors here, for kernels which have
 744          * the ONE_REG calls, but don't support the specific
 745          * registers, there's a reasonable chance things will still
 746          * work, at least until we try to migrate. */
 747         for (i = 0; i < 1024; i++) {
 748             uint64_t id = env->spr_cb[i].one_reg_id;
 749
 750             if (id != 0) {
 751                 kvm_put_one_spr(cs, id, i);
 752             }
 753         }
 754     }
 755
 756     return ret;
 757 }
 758
 759 int kvm_arch_get_registers(CPUState *cs)
 760 {
 761     PowerPCCPU *cpu = POWERPC_CPU(cs);
 762     CPUPPCState *env = &cpu->env;
 763     struct kvm_regs regs;
 764     struct kvm_sregs sregs;
 765     uint32_t cr;
 766     int i, ret;
 767
 768     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
 769     if (ret < 0)
 770         return ret;
 771
 772     cr = regs.cr;
 773     for (i = 7; i >= 0; i--) {
 774         env->crf[i] = cr & 15;
 775         cr >>= 4;
 776     }
 777
 778     env->ctr = regs.ctr;
 779     env->lr = regs.lr;
 780     cpu_write_xer(env, regs.xer);
 781     env->msr = regs.msr;
 782     env->nip = regs.pc;
 783
 784     env->spr[SPR_SRR0] = regs.srr0;
 785     env->spr[SPR_SRR1] = regs.srr1;
 786
 787     env->spr[SPR_SPRG0] = regs.sprg0;
 788     env->spr[SPR_SPRG1] = regs.sprg1;
 789     env->spr[SPR_SPRG2] = regs.sprg2;
 790     env->spr[SPR_SPRG3] = regs.sprg3;
 791     env->spr[SPR_SPRG4] = regs.sprg4;
 792     env->spr[SPR_SPRG5] = regs.sprg5;
 793     env->spr[SPR_SPRG6] = regs.sprg6;
 794     env->spr[SPR_SPRG7] = regs.sprg7;
 795
 796     env->spr[SPR_BOOKE_PID] = regs.pid;
 797
 798     for (i = 0;i < 32; i++)
 799         env->gpr[i] = regs.gpr[i];
 800
 801     kvm_get_fp(cs);
 802
 803     if (cap_booke_sregs) {
 804         ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
 805         if (ret < 0) {
 806             return ret;
 807         }
 808
 809         if (sregs.u.e.features & KVM_SREGS_E_BASE) {
 810             env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
 811             env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
 812             env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
 813             env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
 814             env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
 815             env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
 816             env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
 817             env->spr[SPR_DECR] = sregs.u.e.dec;
 818             env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
 819             env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
 820             env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
 821         }
 822
 823         if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
 824             env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
 825             env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
 826             env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
 827             env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
 828             env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
 829         }
 830
 831         if (sregs.u.e.features & KVM_SREGS_E_64) {
 832             env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
 833         }
 834
 835         if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
 836             env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
 837         }
 838
 839         if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
 840             env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
 841             env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
 842             env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
 843             env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
 844             env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
 845             env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
 846             env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
 847             env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
 848             env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
 849             env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
 850             env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
 851             env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
 852             env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
 853             env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
 854             env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
 855             env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
 856
 857             if (sregs.u.e.features & KVM_SREGS_E_SPE) {
 858                 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
 859                 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
 860                 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
 861             }
 862
 863             if (sregs.u.e.features & KVM_SREGS_E_PM) {
 864                 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
 865             }
 866
 867             if (sregs.u.e.features & KVM_SREGS_E_PC) {
 868                 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
 869                 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
 870             }
 871         }
 872
 873         if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
 874             env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
 875             env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
 876             env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
 877             env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
 878             env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
 879             env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
 880             env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
 881             env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
 882             env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
 883             env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
 884         }
 885
 886         if (sregs.u.e.features & KVM_SREGS_EXP) {
 887             env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
 888         }
 889
 890         if (sregs.u.e.features & KVM_SREGS_E_PD) {
 891             env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
 892             env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
 893         }
 894
 895         if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
 896             env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
 897             env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
 898             env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
 899
 900             if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
 901                 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
 902                 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
 903             }
 904         }
 905     }
 906
 907     if (cap_segstate) {
 908         ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
 909         if (ret < 0) {
 910             return ret;
 911         }
 912
 913         ppc_store_sdr1(env, sregs.u.s.sdr1);
 914
 915         /* Sync SLB */
 916 #ifdef TARGET_PPC64
 917         for (i = 0; i < 64; i++) {
 918             ppc_store_slb(env, sregs.u.s.ppc64.slb[i].slbe,
 919                                sregs.u.s.ppc64.slb[i].slbv);
 920         }
 921 #endif
 922
 923         /* Sync SRs */
 924         for (i = 0; i < 16; i++) {
 925             env->sr[i] = sregs.u.s.ppc32.sr[i];
 926         }
 927
 928         /* Sync BATs */
 929         for (i = 0; i < 8; i++) {
 930             env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
 931             env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
 932             env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
 933             env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
 934         }
 935     }
 936
 937     if (cap_hior) {
 938         kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
 939     }
 940
 941     if (cap_one_reg) {
 942         int i;
 943
 944         /* We deliberately ignore errors here, for kernels which have
 945          * the ONE_REG calls, but don't support the specific
 946          * registers, there's a reasonable chance things will still
 947          * work, at least until we try to migrate. */
 948         for (i = 0; i < 1024; i++) {
 949             uint64_t id = env->spr_cb[i].one_reg_id;
 950
 951             if (id != 0) {
 952                 kvm_get_one_spr(cs, id, i);
 953             }
 954         }
 955     }
 956
 957     return 0;
 958 }
 959
 960 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
 961 {
 962     unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
 963
 964     if (irq != PPC_INTERRUPT_EXT) {
 965         return 0;
 966     }
 967
 968     if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
 969         return 0;
 970     }
 971
 972     kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
 973
 974     return 0;
 975 }
 976
 977 #if defined(TARGET_PPCEMB)
 978 #define PPC_INPUT_INT PPC40x_INPUT_INT
 979 #elif defined(TARGET_PPC64)
 980 #define PPC_INPUT_INT PPC970_INPUT_INT
 981 #else
 982 #define PPC_INPUT_INT PPC6xx_INPUT_INT
 983 #endif
 984
 985 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
 986 {
 987     PowerPCCPU *cpu = POWERPC_CPU(cs);
 988     CPUPPCState *env = &cpu->env;
 989     int r;
 990     unsigned irq;
 991
 992     /* PowerPC QEMU tracks the various core input pins (interrupt, critical
 993      * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
 994     if (!cap_interrupt_level &&
 995         run->ready_for_interrupt_injection &&
 996         (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
 997         (env->irq_input_state & (1<<PPC_INPUT_INT)))
 998     {
 999         /* For now KVM disregards the 'irq' argument. However, in the
1000          * future KVM could cache it in-kernel to avoid a heavyweight exit
1001          * when reading the UIC.
1002          */
1003         irq = KVM_INTERRUPT_SET;
1004
1005         dprintf("injected interrupt %d\n", irq);
1006         r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1007         if (r < 0) {
1008             printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1009         }
1010
1011         /* Always wake up soon in case the interrupt was level based */
1012         qemu_mod_timer(idle_timer, qemu_get_clock_ns(vm_clock) +
1013                        (get_ticks_per_sec() / 50));
1014     }
1015
1016     /* We don't know if there are more interrupts pending after this. However,
1017      * the guest will return to userspace in the course of handling this one
1018      * anyways, so we will get a chance to deliver the rest. */
1019 }
1020
1021 void kvm_arch_post_run(CPUState *cpu, struct kvm_run *run)
1022 {
1023 }
1024
1025 int kvm_arch_process_async_events(CPUState *cs)
1026 {
1027     return cs->halted;
1028 }
1029
1030 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1031 {
1032     CPUState *cs = CPU(cpu);
1033     CPUPPCState *env = &cpu->env;
1034
1035     if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1036         cs->halted = 1;
1037         env->exception_index = EXCP_HLT;
1038     }
1039
1040     return 0;
1041 }
1042
1043 /* map dcr access to existing qemu dcr emulation */
1044 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1045 {
1046     if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1047         fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1048
1049     return 0;
1050 }
1051
1052 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1053 {
1054     if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1055         fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1056
1057     return 0;
1058 }
1059
1060 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1061 {
1062     PowerPCCPU *cpu = POWERPC_CPU(cs);
1063     CPUPPCState *env = &cpu->env;
1064     int ret;
1065
1066     switch (run->exit_reason) {
1067     case KVM_EXIT_DCR:
1068         if (run->dcr.is_write) {
1069             dprintf("handle dcr write\n");
1070             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1071         } else {
1072             dprintf("handle dcr read\n");
1073             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1074         }
1075         break;
1076     case KVM_EXIT_HLT:
1077         dprintf("handle halt\n");
1078         ret = kvmppc_handle_halt(cpu);
1079         break;
1080 #ifdef CONFIG_PSERIES
1081     case KVM_EXIT_PAPR_HCALL:
1082         dprintf("handle PAPR hypercall\n");
1083         run->papr_hcall.ret = spapr_hypercall(cpu,
1084                                               run->papr_hcall.nr,
1085                                               run->papr_hcall.args);
1086         ret = 0;
1087         break;
1088 #endif
1089     case KVM_EXIT_EPR:
1090         dprintf("handle epr\n");
1091         run->epr.epr = ldl_phys(env->mpic_iack);
1092         ret = 0;
1093         break;
1094     default:
1095         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1096         ret = -1;
1097         break;
1098     }
1099
1100     return ret;
1101 }
1102
1103 static int read_cpuinfo(const char *field, char *value, int len)
1104 {
1105     FILE *f;
1106     int ret = -1;
1107     int field_len = strlen(field);
1108     char line[512];
1109
1110     f = fopen("/proc/cpuinfo", "r");
1111     if (!f) {
1112         return -1;
1113     }
1114
1115     do {
1116         if(!fgets(line, sizeof(line), f)) {
1117             break;
1118         }
1119         if (!strncmp(line, field, field_len)) {
1120             pstrcpy(value, len, line);
1121             ret = 0;
1122             break;
1123         }
1124     } while(*line);
1125
1126     fclose(f);
1127
1128     return ret;
1129 }
1130
1131 uint32_t kvmppc_get_tbfreq(void)
1132 {
1133     char line[512];
1134     char *ns;
1135     uint32_t retval = get_ticks_per_sec();
1136
1137     if (read_cpuinfo("timebase", line, sizeof(line))) {
1138         return retval;
1139     }
1140
1141     if (!(ns = strchr(line, ':'))) {
1142         return retval;
1143     }
1144
1145     ns++;
1146
1147     retval = atoi(ns);
1148     return retval;
1149 }
1150
1151 /* Try to find a device tree node for a CPU with clock-frequency property */
1152 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1153 {
1154     struct dirent *dirp;
1155     DIR *dp;
1156
1157     if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1158         printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1159         return -1;
1160     }
1161
1162     buf[0] = '\0';
1163     while ((dirp = readdir(dp)) != NULL) {
1164         FILE *f;
1165         snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1166                  dirp->d_name);
1167         f = fopen(buf, "r");
1168         if (f) {
1169             snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1170             fclose(f);
1171             break;
1172         }
1173         buf[0] = '\0';
1174     }
1175     closedir(dp);
1176     if (buf[0] == '\0') {
1177         printf("Unknown host!\n");
1178         return -1;
1179     }
1180
1181     return 0;
1182 }
1183
1184 /* Read a CPU node property from the host device tree that's a single
1185  * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1186  * (can't find or open the property, or doesn't understand the
1187  * format) */
1188 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1189 {
1190     char buf[PATH_MAX];
1191     union {
1192         uint32_t v32;
1193         uint64_t v64;
1194     } u;
1195     FILE *f;
1196     int len;
1197
1198     if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1199         return -1;
1200     }
1201
1202     strncat(buf, "/", sizeof(buf) - strlen(buf));
1203     strncat(buf, propname, sizeof(buf) - strlen(buf));
1204
1205     f = fopen(buf, "rb");
1206     if (!f) {
1207         return -1;
1208     }
1209
1210     len = fread(&u, 1, sizeof(u), f);
1211     fclose(f);
1212     switch (len) {
1213     case 4:
1214         /* property is a 32-bit quantity */
1215         return be32_to_cpu(u.v32);
1216     case 8:
1217         return be64_to_cpu(u.v64);
1218     }
1219
1220     return 0;
1221 }
1222
1223 uint64_t kvmppc_get_clockfreq(void)
1224 {
1225     return kvmppc_read_int_cpu_dt("clock-frequency");
1226 }
1227
1228 uint32_t kvmppc_get_vmx(void)
1229 {
1230     return kvmppc_read_int_cpu_dt("ibm,vmx");
1231 }
1232
1233 uint32_t kvmppc_get_dfp(void)
1234 {
1235     return kvmppc_read_int_cpu_dt("ibm,dfp");
1236 }
1237
1238 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1239  {
1240      PowerPCCPU *cpu = ppc_env_get_cpu(env);
1241      CPUState *cs = CPU(cpu);
1242
1243     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1244         !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1245         return 0;
1246     }
1247
1248     return 1;
1249 }
1250
1251 int kvmppc_get_hasidle(CPUPPCState *env)
1252 {
1253     struct kvm_ppc_pvinfo pvinfo;
1254
1255     if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1256         (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1257         return 1;
1258     }
1259
1260     return 0;
1261 }
1262
1263 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1264 {
1265     uint32_t *hc = (uint32_t*)buf;
1266     struct kvm_ppc_pvinfo pvinfo;
1267
1268     if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1269         memcpy(buf, pvinfo.hcall, buf_len);
1270         return 0;
1271     }
1272
1273     /*
1274      * Fallback to always fail hypercalls:
1275      *
1276      *     li r3, -1
1277      *     nop
1278      *     nop
1279      *     nop
1280      */
1281
1282     hc[0] = 0x3860ffff;
1283     hc[1] = 0x60000000;
1284     hc[2] = 0x60000000;
1285     hc[3] = 0x60000000;
1286
1287     return 0;
1288 }
1289
1290 void kvmppc_set_papr(PowerPCCPU *cpu)
1291 {
1292     CPUPPCState *env = &cpu->env;
1293     CPUState *cs = CPU(cpu);
1294     struct kvm_enable_cap cap = {};
1295     int ret;
1296
1297     cap.cap = KVM_CAP_PPC_PAPR;
1298     ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &cap);
1299
1300     if (ret) {
1301         cpu_abort(env, "This KVM version does not support PAPR\n");
1302     }
1303 }
1304
1305 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
1306 {
1307     CPUPPCState *env = &cpu->env;
1308     CPUState *cs = CPU(cpu);
1309     struct kvm_enable_cap cap = {};
1310     int ret;
1311
1312     cap.cap = KVM_CAP_PPC_EPR;
1313     cap.args[0] = mpic_proxy;
1314     ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &cap);
1315
1316     if (ret && mpic_proxy) {
1317         cpu_abort(env, "This KVM version does not support EPR\n");
1318     }
1319 }
1320
1321 int kvmppc_smt_threads(void)
1322 {
1323     return cap_ppc_smt ? cap_ppc_smt : 1;
1324 }
1325
1326 #ifdef TARGET_PPC64
1327 off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem)
1328 {
1329     void *rma;
1330     off_t size;
1331     int fd;
1332     struct kvm_allocate_rma ret;
1333     MemoryRegion *rma_region;
1334
1335     /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
1336      * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
1337      *                      not necessary on this hardware
1338      * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
1339      *
1340      * FIXME: We should allow the user to force contiguous RMA
1341      * allocation in the cap_ppc_rma==1 case.
1342      */
1343     if (cap_ppc_rma < 2) {
1344         return 0;
1345     }
1346
1347     fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
1348     if (fd < 0) {
1349         fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
1350                 strerror(errno));
1351         return -1;
1352     }
1353
1354     size = MIN(ret.rma_size, 256ul << 20);
1355
1356     rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1357     if (rma == MAP_FAILED) {
1358         fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
1359         return -1;
1360     };
1361
1362     rma_region = g_new(MemoryRegion, 1);
1363     memory_region_init_ram_ptr(rma_region, name, size, rma);
1364     vmstate_register_ram_global(rma_region);
1365     memory_region_add_subregion(sysmem, 0, rma_region);
1366
1367     return size;
1368 }
1369
1370 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
1371 {
1372     if (cap_ppc_rma >= 2) {
1373         return current_size;
1374     }
1375     return MIN(current_size,
1376                getrampagesize() << (hash_shift - 7));
1377 }
1378 #endif
1379
1380 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd)
1381 {
1382     struct kvm_create_spapr_tce args = {
1383         .liobn = liobn,
1384         .window_size = window_size,
1385     };
1386     long len;
1387     int fd;
1388     void *table;
1389
1390     /* Must set fd to -1 so we don't try to munmap when called for
1391      * destroying the table, which the upper layers -will- do
1392      */
1393     *pfd = -1;
1394     if (!cap_spapr_tce) {
1395         return NULL;
1396     }
1397
1398     fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
1399     if (fd < 0) {
1400         fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
1401                 liobn);
1402         return NULL;
1403     }
1404
1405     len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(sPAPRTCE);
1406     /* FIXME: round this up to page size */
1407
1408     table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1409     if (table == MAP_FAILED) {
1410         fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
1411                 liobn);
1412         close(fd);
1413         return NULL;
1414     }
1415
1416     *pfd = fd;
1417     return table;
1418 }
1419
1420 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t window_size)
1421 {
1422     long len;
1423
1424     if (fd < 0) {
1425         return -1;
1426     }
1427
1428     len = (window_size / SPAPR_TCE_PAGE_SIZE)*sizeof(sPAPRTCE);
1429     if ((munmap(table, len) < 0) ||
1430         (close(fd) < 0)) {
1431         fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
1432                 strerror(errno));
1433         /* Leak the table */
1434     }
1435
1436     return 0;
1437 }
1438
1439 int kvmppc_reset_htab(int shift_hint)
1440 {
1441     uint32_t shift = shift_hint;
1442
1443     if (!kvm_enabled()) {
1444         /* Full emulation, tell caller to allocate htab itself */
1445         return 0;
1446     }
1447     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
1448         int ret;
1449         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
1450         if (ret == -ENOTTY) {
1451             /* At least some versions of PR KVM advertise the
1452              * capability, but don't implement the ioctl().  Oops.
1453              * Return 0 so that we allocate the htab in qemu, as is
1454              * correct for PR. */
1455             return 0;
1456         } else if (ret < 0) {
1457             return ret;
1458         }
1459         return shift;
1460     }
1461
1462     /* We have a kernel that predates the htab reset calls.  For PR
1463      * KVM, we need to allocate the htab ourselves, for an HV KVM of
1464      * this era, it has allocated a 16MB fixed size hash table
1465      * already.  Kernels of this era have the GET_PVINFO capability
1466      * only on PR, so we use this hack to determine the right
1467      * answer */
1468     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
1469         /* PR - tell caller to allocate htab */
1470         return 0;
1471     } else {
1472         /* HV - assume 16MB kernel allocated htab */
1473         return 24;
1474     }
1475 }
1476
1477 static inline uint32_t mfpvr(void)
1478 {
1479     uint32_t pvr;
1480
1481     asm ("mfpvr %0"
1482          : "=r"(pvr));
1483     return pvr;
1484 }
1485
1486 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
1487 {
1488     if (on) {
1489         *word |= flags;
1490     } else {
1491         *word &= ~flags;
1492     }
1493 }
1494
1495 static void kvmppc_host_cpu_initfn(Object *obj)
1496 {
1497     assert(kvm_enabled());
1498 }
1499
1500 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
1501 {
1502     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
1503     uint32_t vmx = kvmppc_get_vmx();
1504     uint32_t dfp = kvmppc_get_dfp();
1505
1506     /* Now fix up the class with information we can query from the host */
1507
1508     if (vmx != -1) {
1509         /* Only override when we know what the host supports */
1510         alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
1511         alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
1512     }
1513     if (dfp != -1) {
1514         /* Only override when we know what the host supports */
1515         alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
1516     }
1517 }
1518
1519 int kvmppc_fixup_cpu(PowerPCCPU *cpu)
1520 {
1521     CPUState *cs = CPU(cpu);
1522     int smt;
1523
1524     /* Adjust cpu index for SMT */
1525     smt = kvmppc_smt_threads();
1526     cs->cpu_index = (cs->cpu_index / smp_threads) * smt
1527         + (cs->cpu_index % smp_threads);
1528
1529     return 0;
1530 }
1531
1532 static int kvm_ppc_register_host_cpu_type(void)
1533 {
1534     TypeInfo type_info = {
1535         .name = TYPE_HOST_POWERPC_CPU,
1536         .instance_init = kvmppc_host_cpu_initfn,
1537         .class_init = kvmppc_host_cpu_class_init,
1538     };
1539     uint32_t host_pvr = mfpvr();
1540     PowerPCCPUClass *pvr_pcc;
1541
1542     pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
1543     if (pvr_pcc == NULL) {
1544         return -1;
1545     }
1546     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
1547     type_register(&type_info);
1548     return 0;
1549 }
1550
1551
1552 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
1553 {
1554     return true;
1555 }
1556
1557 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
1558 {
1559     return 1;
1560 }
1561
1562 int kvm_arch_on_sigbus(int code, void *addr)
1563 {
1564     return 1;
1565 }