target-ppc/kvm.c

   1 /*
   2  * PowerPC implementation of KVM hooks
   3  *
   4  * Copyright IBM Corp. 2007
   5  * Copyright (C) 2011 Freescale Semiconductor, Inc.
   6  *
   7  * Authors:
   8  *  Jerone Young <jyoung5@us.ibm.com>
   9  *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
  10  *  Hollis Blanchard <hollisb@us.ibm.com>
  11  *
  12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  13  * See the COPYING file in the top-level directory.
  14  *
  15  */
  16
  17 #include <dirent.h>
  18 #include <sys/types.h>
  19 #include <sys/ioctl.h>
  20 #include <sys/mman.h>
  21 #include <sys/vfs.h>
  22
  23 #include <linux/kvm.h>
  24
  25 #include "qemu-common.h"
  26 #include "qemu-timer.h"
  27 #include "sysemu.h"
  28 #include "kvm.h"
  29 #include "kvm_ppc.h"
  30 #include "cpu.h"
  31 #include "cpus.h"
  32 #include "device_tree.h"
  33 #include "hw/sysbus.h"
  34 #include "hw/spapr.h"
  35
  36 #include "hw/sysbus.h"
  37 #include "hw/spapr.h"
  38 #include "hw/spapr_vio.h"
  39
  40 //#define DEBUG_KVM
  41
  42 #ifdef DEBUG_KVM
  43 #define dprintf(fmt, ...) \
  44     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
  45 #else
  46 #define dprintf(fmt, ...) \
  47     do { } while (0)
  48 #endif
  49
  50 #define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
  51
  52 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
  53     KVM_CAP_LAST_INFO
  54 };
  55
  56 static int cap_interrupt_unset = false;
  57 static int cap_interrupt_level = false;
  58 static int cap_segstate;
  59 static int cap_booke_sregs;
  60 static int cap_ppc_smt;
  61 static int cap_ppc_rma;
  62 static int cap_spapr_tce;
  63 static int cap_hior;
  64
  65 /* XXX We have a race condition where we actually have a level triggered
  66  *     interrupt, but the infrastructure can't expose that yet, so the guest
  67  *     takes but ignores it, goes to sleep and never gets notified that there's
  68  *     still an interrupt pending.
  69  *
  70  *     As a quick workaround, let's just wake up again 20 ms after we injected
  71  *     an interrupt. That way we can assure that we're always reinjecting
  72  *     interrupts in case the guest swallowed them.
  73  */
  74 static QEMUTimer *idle_timer;
  75
  76 static void kvm_kick_cpu(void *opaque)
  77 {
  78     PowerPCCPU *cpu = opaque;
  79
  80     qemu_cpu_kick(CPU(cpu));
  81 }
  82
  83 int kvm_arch_init(KVMState *s)
  84 {
  85     cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
  86     cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
  87     cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
  88     cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
  89     cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
  90     cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
  91     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
  92     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
  93
  94     if (!cap_interrupt_level) {
  95         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
  96                         "VM to stall at times!\n");
  97     }
  98
  99     return 0;
 100 }
 101
 102 static int kvm_arch_sync_sregs(CPUPPCState *cenv)
 103 {
 104     struct kvm_sregs sregs;
 105     int ret;
 106
 107     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 108         /* What we're really trying to say is "if we're on BookE, we use
 109            the native PVR for now". This is the only sane way to check
 110            it though, so we potentially confuse users that they can run
 111            BookE guests on BookS. Let's hope nobody dares enough :) */
 112         return 0;
 113     } else {
 114         if (!cap_segstate) {
 115             fprintf(stderr, "kvm error: missing PVR setting capability\n");
 116             return -ENOSYS;
 117         }
 118     }
 119
 120     ret = kvm_vcpu_ioctl(cenv, KVM_GET_SREGS, &sregs);
 121     if (ret) {
 122         return ret;
 123     }
 124
 125     sregs.pvr = cenv->spr[SPR_PVR];
 126     return kvm_vcpu_ioctl(cenv, KVM_SET_SREGS, &sregs);
 127 }
 128
 129 /* Set up a shared TLB array with KVM */
 130 static int kvm_booke206_tlb_init(CPUPPCState *env)
 131 {
 132     struct kvm_book3e_206_tlb_params params = {};
 133     struct kvm_config_tlb cfg = {};
 134     struct kvm_enable_cap encap = {};
 135     unsigned int entries = 0;
 136     int ret, i;
 137
 138     if (!kvm_enabled() ||
 139         !kvm_check_extension(env->kvm_state, KVM_CAP_SW_TLB)) {
 140         return 0;
 141     }
 142
 143     assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
 144
 145     for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
 146         params.tlb_sizes[i] = booke206_tlb_size(env, i);
 147         params.tlb_ways[i] = booke206_tlb_ways(env, i);
 148         entries += params.tlb_sizes[i];
 149     }
 150
 151     assert(entries == env->nb_tlb);
 152     assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
 153
 154     env->tlb_dirty = true;
 155
 156     cfg.array = (uintptr_t)env->tlb.tlbm;
 157     cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
 158     cfg.params = (uintptr_t)&params;
 159     cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
 160
 161     encap.cap = KVM_CAP_SW_TLB;
 162     encap.args[0] = (uintptr_t)&cfg;
 163
 164     ret = kvm_vcpu_ioctl(env, KVM_ENABLE_CAP, &encap);
 165     if (ret < 0) {
 166         fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
 167                 __func__, strerror(-ret));
 168         return ret;
 169     }
 170
 171     env->kvm_sw_tlb = true;
 172     return 0;
 173 }
 174
 175
 176 #if defined(TARGET_PPC64)
 177 static void kvm_get_fallback_smmu_info(CPUPPCState *env,
 178                                        struct kvm_ppc_smmu_info *info)
 179 {
 180     memset(info, 0, sizeof(*info));
 181
 182     /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
 183      * need to "guess" what the supported page sizes are.
 184      *
 185      * For that to work we make a few assumptions:
 186      *
 187      * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
 188      *   KVM which only supports 4K and 16M pages, but supports them
 189      *   regardless of the backing store characteritics. We also don't
 190      *   support 1T segments.
 191      *
 192      *   This is safe as if HV KVM ever supports that capability or PR
 193      *   KVM grows supports for more page/segment sizes, those versions
 194      *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
 195      *   will not hit this fallback
 196      *
 197      * - Else we are running HV KVM. This means we only support page
 198      *   sizes that fit in the backing store. Additionally we only
 199      *   advertize 64K pages if the processor is ARCH 2.06 and we assume
 200      *   P7 encodings for the SLB and hash table. Here too, we assume
 201      *   support for any newer processor will mean a kernel that
 202      *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
 203      *   this fallback.
 204      */
 205     if (kvm_check_extension(env->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
 206         /* No flags */
 207         info->flags = 0;
 208         info->slb_size = 64;
 209
 210         /* Standard 4k base page size segment */
 211         info->sps[0].page_shift = 12;
 212         info->sps[0].slb_enc = 0;
 213         info->sps[0].enc[0].page_shift = 12;
 214         info->sps[0].enc[0].pte_enc = 0;
 215
 216         /* Standard 16M large page size segment */
 217         info->sps[1].page_shift = 24;
 218         info->sps[1].slb_enc = SLB_VSID_L;
 219         info->sps[1].enc[0].page_shift = 24;
 220         info->sps[1].enc[0].pte_enc = 0;
 221     } else {
 222         int i = 0;
 223
 224         /* HV KVM has backing store size restrictions */
 225         info->flags = KVM_PPC_PAGE_SIZES_REAL;
 226
 227         if (env->mmu_model & POWERPC_MMU_1TSEG) {
 228             info->flags |= KVM_PPC_1T_SEGMENTS;
 229         }
 230
 231         if (env->mmu_model == POWERPC_MMU_2_06) {
 232             info->slb_size = 32;
 233         } else {
 234             info->slb_size = 64;
 235         }
 236
 237         /* Standard 4k base page size segment */
 238         info->sps[i].page_shift = 12;
 239         info->sps[i].slb_enc = 0;
 240         info->sps[i].enc[0].page_shift = 12;
 241         info->sps[i].enc[0].pte_enc = 0;
 242         i++;
 243
 244         /* 64K on MMU 2.06 */
 245         if (env->mmu_model == POWERPC_MMU_2_06) {
 246             info->sps[i].page_shift = 16;
 247             info->sps[i].slb_enc = 0x110;
 248             info->sps[i].enc[0].page_shift = 16;
 249             info->sps[i].enc[0].pte_enc = 1;
 250             i++;
 251         }
 252
 253         /* Standard 16M large page size segment */
 254         info->sps[i].page_shift = 24;
 255         info->sps[i].slb_enc = SLB_VSID_L;
 256         info->sps[i].enc[0].page_shift = 24;
 257         info->sps[i].enc[0].pte_enc = 0;
 258     }
 259 }
 260
 261 static void kvm_get_smmu_info(CPUPPCState *env, struct kvm_ppc_smmu_info *info)
 262 {
 263     int ret;
 264
 265     if (kvm_check_extension(env->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
 266         ret = kvm_vm_ioctl(env->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
 267         if (ret == 0) {
 268             return;
 269         }
 270     }
 271
 272     kvm_get_fallback_smmu_info(env, info);
 273 }
 274
 275 static long getrampagesize(void)
 276 {
 277     struct statfs fs;
 278     int ret;
 279
 280     if (!mem_path) {
 281         /* guest RAM is backed by normal anonymous pages */
 282         return getpagesize();
 283     }
 284
 285     do {
 286         ret = statfs(mem_path, &fs);
 287     } while (ret != 0 && errno == EINTR);
 288
 289     if (ret != 0) {
 290         fprintf(stderr, "Couldn't statfs() memory path: %s\n",
 291                 strerror(errno));
 292         exit(1);
 293     }
 294
 295 #define HUGETLBFS_MAGIC       0x958458f6
 296
 297     if (fs.f_type != HUGETLBFS_MAGIC) {
 298         /* Explicit mempath, but it's ordinary pages */
 299         return getpagesize();
 300     }
 301
 302     /* It's hugepage, return the huge page size */
 303     return fs.f_bsize;
 304 }
 305
 306 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
 307 {
 308     if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
 309         return true;
 310     }
 311
 312     return (1ul << shift) <= rampgsize;
 313 }
 314
 315 static void kvm_fixup_page_sizes(CPUPPCState *env)
 316 {
 317     static struct kvm_ppc_smmu_info smmu_info;
 318     static bool has_smmu_info;
 319     long rampagesize;
 320     int iq, ik, jq, jk;
 321
 322     /* We only handle page sizes for 64-bit server guests for now */
 323     if (!(env->mmu_model & POWERPC_MMU_64)) {
 324         return;
 325     }
 326
 327     /* Collect MMU info from kernel if not already */
 328     if (!has_smmu_info) {
 329         kvm_get_smmu_info(env, &smmu_info);
 330         has_smmu_info = true;
 331     }
 332
 333     rampagesize = getrampagesize();
 334
 335     /* Convert to QEMU form */
 336     memset(&env->sps, 0, sizeof(env->sps));
 337
 338     for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
 339         struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
 340         struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
 341
 342         if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 343                                  ksps->page_shift)) {
 344             continue;
 345         }
 346         qsps->page_shift = ksps->page_shift;
 347         qsps->slb_enc = ksps->slb_enc;
 348         for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
 349             if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 350                                      ksps->enc[jk].page_shift)) {
 351                 continue;
 352             }
 353             qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
 354             qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
 355             if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
 356                 break;
 357             }
 358         }
 359         if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
 360             break;
 361         }
 362     }
 363     env->slb_nr = smmu_info.slb_size;
 364     if (smmu_info.flags & KVM_PPC_1T_SEGMENTS) {
 365         env->mmu_model |= POWERPC_MMU_1TSEG;
 366     } else {
 367         env->mmu_model &= ~POWERPC_MMU_1TSEG;
 368     }
 369 }
 370 #else /* defined (TARGET_PPC64) */
 371
 372 static inline void kvm_fixup_page_sizes(CPUPPCState *env)
 373 {
 374 }
 375
 376 #endif /* !defined (TARGET_PPC64) */
 377
 378 int kvm_arch_init_vcpu(CPUPPCState *cenv)
 379 {
 380     PowerPCCPU *cpu = ppc_env_get_cpu(cenv);
 381     int ret;
 382
 383     /* Gather server mmu info from KVM and update the CPU state */
 384     kvm_fixup_page_sizes(cenv);
 385
 386     /* Synchronize sregs with kvm */
 387     ret = kvm_arch_sync_sregs(cenv);
 388     if (ret) {
 389         return ret;
 390     }
 391
 392     idle_timer = qemu_new_timer_ns(vm_clock, kvm_kick_cpu, cpu);
 393
 394     /* Some targets support access to KVM's guest TLB. */
 395     switch (cenv->mmu_model) {
 396     case POWERPC_MMU_BOOKE206:
 397         ret = kvm_booke206_tlb_init(cenv);
 398         break;
 399     default:
 400         break;
 401     }
 402
 403     return ret;
 404 }
 405
 406 void kvm_arch_reset_vcpu(CPUPPCState *env)
 407 {
 408 }
 409
 410 static void kvm_sw_tlb_put(CPUPPCState *env)
 411 {
 412     struct kvm_dirty_tlb dirty_tlb;
 413     unsigned char *bitmap;
 414     int ret;
 415
 416     if (!env->kvm_sw_tlb) {
 417         return;
 418     }
 419
 420     bitmap = g_malloc((env->nb_tlb + 7) / 8);
 421     memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
 422
 423     dirty_tlb.bitmap = (uintptr_t)bitmap;
 424     dirty_tlb.num_dirty = env->nb_tlb;
 425
 426     ret = kvm_vcpu_ioctl(env, KVM_DIRTY_TLB, &dirty_tlb);
 427     if (ret) {
 428         fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
 429                 __func__, strerror(-ret));
 430     }
 431
 432     g_free(bitmap);
 433 }
 434
 435 int kvm_arch_put_registers(CPUPPCState *env, int level)
 436 {
 437     struct kvm_regs regs;
 438     int ret;
 439     int i;
 440
 441     ret = kvm_vcpu_ioctl(env, KVM_GET_REGS, &regs);
 442     if (ret < 0)
 443         return ret;
 444
 445     regs.ctr = env->ctr;
 446     regs.lr  = env->lr;
 447     regs.xer = env->xer;
 448     regs.msr = env->msr;
 449     regs.pc = env->nip;
 450
 451     regs.srr0 = env->spr[SPR_SRR0];
 452     regs.srr1 = env->spr[SPR_SRR1];
 453
 454     regs.sprg0 = env->spr[SPR_SPRG0];
 455     regs.sprg1 = env->spr[SPR_SPRG1];
 456     regs.sprg2 = env->spr[SPR_SPRG2];
 457     regs.sprg3 = env->spr[SPR_SPRG3];
 458     regs.sprg4 = env->spr[SPR_SPRG4];
 459     regs.sprg5 = env->spr[SPR_SPRG5];
 460     regs.sprg6 = env->spr[SPR_SPRG6];
 461     regs.sprg7 = env->spr[SPR_SPRG7];
 462
 463     regs.pid = env->spr[SPR_BOOKE_PID];
 464
 465     for (i = 0;i < 32; i++)
 466         regs.gpr[i] = env->gpr[i];
 467
 468     ret = kvm_vcpu_ioctl(env, KVM_SET_REGS, &regs);
 469     if (ret < 0)
 470         return ret;
 471
 472     if (env->tlb_dirty) {
 473         kvm_sw_tlb_put(env);
 474         env->tlb_dirty = false;
 475     }
 476
 477     if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
 478         struct kvm_sregs sregs;
 479
 480         sregs.pvr = env->spr[SPR_PVR];
 481
 482         sregs.u.s.sdr1 = env->spr[SPR_SDR1];
 483
 484         /* Sync SLB */
 485 #ifdef TARGET_PPC64
 486         for (i = 0; i < 64; i++) {
 487             sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
 488             sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
 489         }
 490 #endif
 491
 492         /* Sync SRs */
 493         for (i = 0; i < 16; i++) {
 494             sregs.u.s.ppc32.sr[i] = env->sr[i];
 495         }
 496
 497         /* Sync BATs */
 498         for (i = 0; i < 8; i++) {
 499             /* Beware. We have to swap upper and lower bits here */
 500             sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
 501                 | env->DBAT[1][i];
 502             sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
 503                 | env->IBAT[1][i];
 504         }
 505
 506         ret = kvm_vcpu_ioctl(env, KVM_SET_SREGS, &sregs);
 507         if (ret) {
 508             return ret;
 509         }
 510     }
 511
 512     if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
 513         uint64_t hior = env->spr[SPR_HIOR];
 514         struct kvm_one_reg reg = {
 515             .id = KVM_REG_PPC_HIOR,
 516             .addr = (uintptr_t) &hior,
 517         };
 518
 519         ret = kvm_vcpu_ioctl(env, KVM_SET_ONE_REG, &reg);
 520         if (ret) {
 521             return ret;
 522         }
 523     }
 524
 525     return ret;
 526 }
 527
 528 int kvm_arch_get_registers(CPUPPCState *env)
 529 {
 530     struct kvm_regs regs;
 531     struct kvm_sregs sregs;
 532     uint32_t cr;
 533     int i, ret;
 534
 535     ret = kvm_vcpu_ioctl(env, KVM_GET_REGS, &regs);
 536     if (ret < 0)
 537         return ret;
 538
 539     cr = regs.cr;
 540     for (i = 7; i >= 0; i--) {
 541         env->crf[i] = cr & 15;
 542         cr >>= 4;
 543     }
 544
 545     env->ctr = regs.ctr;
 546     env->lr = regs.lr;
 547     env->xer = regs.xer;
 548     env->msr = regs.msr;
 549     env->nip = regs.pc;
 550
 551     env->spr[SPR_SRR0] = regs.srr0;
 552     env->spr[SPR_SRR1] = regs.srr1;
 553
 554     env->spr[SPR_SPRG0] = regs.sprg0;
 555     env->spr[SPR_SPRG1] = regs.sprg1;
 556     env->spr[SPR_SPRG2] = regs.sprg2;
 557     env->spr[SPR_SPRG3] = regs.sprg3;
 558     env->spr[SPR_SPRG4] = regs.sprg4;
 559     env->spr[SPR_SPRG5] = regs.sprg5;
 560     env->spr[SPR_SPRG6] = regs.sprg6;
 561     env->spr[SPR_SPRG7] = regs.sprg7;
 562
 563     env->spr[SPR_BOOKE_PID] = regs.pid;
 564
 565     for (i = 0;i < 32; i++)
 566         env->gpr[i] = regs.gpr[i];
 567
 568     if (cap_booke_sregs) {
 569         ret = kvm_vcpu_ioctl(env, KVM_GET_SREGS, &sregs);
 570         if (ret < 0) {
 571             return ret;
 572         }
 573
 574         if (sregs.u.e.features & KVM_SREGS_E_BASE) {
 575             env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
 576             env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
 577             env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
 578             env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
 579             env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
 580             env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
 581             env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
 582             env->spr[SPR_DECR] = sregs.u.e.dec;
 583             env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
 584             env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
 585             env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
 586         }
 587
 588         if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
 589             env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
 590             env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
 591             env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
 592             env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
 593             env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
 594         }
 595
 596         if (sregs.u.e.features & KVM_SREGS_E_64) {
 597             env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
 598         }
 599
 600         if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
 601             env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
 602         }
 603
 604         if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
 605             env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
 606             env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
 607             env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
 608             env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
 609             env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
 610             env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
 611             env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
 612             env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
 613             env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
 614             env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
 615             env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
 616             env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
 617             env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
 618             env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
 619             env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
 620             env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
 621
 622             if (sregs.u.e.features & KVM_SREGS_E_SPE) {
 623                 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
 624                 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
 625                 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
 626             }
 627
 628             if (sregs.u.e.features & KVM_SREGS_E_PM) {
 629                 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
 630             }
 631
 632             if (sregs.u.e.features & KVM_SREGS_E_PC) {
 633                 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
 634                 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
 635             }
 636         }
 637
 638         if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
 639             env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
 640             env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
 641             env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
 642             env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
 643             env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
 644             env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
 645             env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
 646             env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
 647             env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
 648             env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
 649         }
 650
 651         if (sregs.u.e.features & KVM_SREGS_EXP) {
 652             env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
 653         }
 654
 655         if (sregs.u.e.features & KVM_SREGS_E_PD) {
 656             env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
 657             env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
 658         }
 659
 660         if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
 661             env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
 662             env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
 663             env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
 664
 665             if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
 666                 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
 667                 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
 668             }
 669         }
 670     }
 671
 672     if (cap_segstate) {
 673         ret = kvm_vcpu_ioctl(env, KVM_GET_SREGS, &sregs);
 674         if (ret < 0) {
 675             return ret;
 676         }
 677
 678         ppc_store_sdr1(env, sregs.u.s.sdr1);
 679
 680         /* Sync SLB */
 681 #ifdef TARGET_PPC64
 682         for (i = 0; i < 64; i++) {
 683             ppc_store_slb(env, sregs.u.s.ppc64.slb[i].slbe,
 684                                sregs.u.s.ppc64.slb[i].slbv);
 685         }
 686 #endif
 687
 688         /* Sync SRs */
 689         for (i = 0; i < 16; i++) {
 690             env->sr[i] = sregs.u.s.ppc32.sr[i];
 691         }
 692
 693         /* Sync BATs */
 694         for (i = 0; i < 8; i++) {
 695             env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
 696             env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
 697             env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
 698             env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
 699         }
 700     }
 701
 702     return 0;
 703 }
 704
 705 int kvmppc_set_interrupt(CPUPPCState *env, int irq, int level)
 706 {
 707     unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
 708
 709     if (irq != PPC_INTERRUPT_EXT) {
 710         return 0;
 711     }
 712
 713     if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
 714         return 0;
 715     }
 716
 717     kvm_vcpu_ioctl(env, KVM_INTERRUPT, &virq);
 718
 719     return 0;
 720 }
 721
 722 #if defined(TARGET_PPCEMB)
 723 #define PPC_INPUT_INT PPC40x_INPUT_INT
 724 #elif defined(TARGET_PPC64)
 725 #define PPC_INPUT_INT PPC970_INPUT_INT
 726 #else
 727 #define PPC_INPUT_INT PPC6xx_INPUT_INT
 728 #endif
 729
 730 void kvm_arch_pre_run(CPUPPCState *env, struct kvm_run *run)
 731 {
 732     int r;
 733     unsigned irq;
 734
 735     /* PowerPC QEMU tracks the various core input pins (interrupt, critical
 736      * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
 737     if (!cap_interrupt_level &&
 738         run->ready_for_interrupt_injection &&
 739         (env->interrupt_request & CPU_INTERRUPT_HARD) &&
 740         (env->irq_input_state & (1<<PPC_INPUT_INT)))
 741     {
 742         /* For now KVM disregards the 'irq' argument. However, in the
 743          * future KVM could cache it in-kernel to avoid a heavyweight exit
 744          * when reading the UIC.
 745          */
 746         irq = KVM_INTERRUPT_SET;
 747
 748         dprintf("injected interrupt %d\n", irq);
 749         r = kvm_vcpu_ioctl(env, KVM_INTERRUPT, &irq);
 750         if (r < 0)
 751             printf("cpu %d fail inject %x\n", env->cpu_index, irq);
 752
 753         /* Always wake up soon in case the interrupt was level based */
 754         qemu_mod_timer(idle_timer, qemu_get_clock_ns(vm_clock) +
 755                        (get_ticks_per_sec() / 50));
 756     }
 757
 758     /* We don't know if there are more interrupts pending after this. However,
 759      * the guest will return to userspace in the course of handling this one
 760      * anyways, so we will get a chance to deliver the rest. */
 761 }
 762
 763 void kvm_arch_post_run(CPUPPCState *env, struct kvm_run *run)
 764 {
 765 }
 766
 767 int kvm_arch_process_async_events(CPUPPCState *env)
 768 {
 769     return env->halted;
 770 }
 771
 772 static int kvmppc_handle_halt(CPUPPCState *env)
 773 {
 774     if (!(env->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
 775         env->halted = 1;
 776         env->exception_index = EXCP_HLT;
 777     }
 778
 779     return 0;
 780 }
 781
 782 /* map dcr access to existing qemu dcr emulation */
 783 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
 784 {
 785     if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
 786         fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
 787
 788     return 0;
 789 }
 790
 791 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
 792 {
 793     if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
 794         fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
 795
 796     return 0;
 797 }
 798
 799 int kvm_arch_handle_exit(CPUPPCState *env, struct kvm_run *run)
 800 {
 801     int ret;
 802
 803     switch (run->exit_reason) {
 804     case KVM_EXIT_DCR:
 805         if (run->dcr.is_write) {
 806             dprintf("handle dcr write\n");
 807             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
 808         } else {
 809             dprintf("handle dcr read\n");
 810             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
 811         }
 812         break;
 813     case KVM_EXIT_HLT:
 814         dprintf("handle halt\n");
 815         ret = kvmppc_handle_halt(env);
 816         break;
 817 #ifdef CONFIG_PSERIES
 818     case KVM_EXIT_PAPR_HCALL:
 819         dprintf("handle PAPR hypercall\n");
 820         run->papr_hcall.ret = spapr_hypercall(ppc_env_get_cpu(env),
 821                                               run->papr_hcall.nr,
 822                                               run->papr_hcall.args);
 823         ret = 0;
 824         break;
 825 #endif
 826     default:
 827         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
 828         ret = -1;
 829         break;
 830     }
 831
 832     return ret;
 833 }
 834
 835 static int read_cpuinfo(const char *field, char *value, int len)
 836 {
 837     FILE *f;
 838     int ret = -1;
 839     int field_len = strlen(field);
 840     char line[512];
 841
 842     f = fopen("/proc/cpuinfo", "r");
 843     if (!f) {
 844         return -1;
 845     }
 846
 847     do {
 848         if(!fgets(line, sizeof(line), f)) {
 849             break;
 850         }
 851         if (!strncmp(line, field, field_len)) {
 852             pstrcpy(value, len, line);
 853             ret = 0;
 854             break;
 855         }
 856     } while(*line);
 857
 858     fclose(f);
 859
 860     return ret;
 861 }
 862
 863 uint32_t kvmppc_get_tbfreq(void)
 864 {
 865     char line[512];
 866     char *ns;
 867     uint32_t retval = get_ticks_per_sec();
 868
 869     if (read_cpuinfo("timebase", line, sizeof(line))) {
 870         return retval;
 871     }
 872
 873     if (!(ns = strchr(line, ':'))) {
 874         return retval;
 875     }
 876
 877     ns++;
 878
 879     retval = atoi(ns);
 880     return retval;
 881 }
 882
 883 /* Try to find a device tree node for a CPU with clock-frequency property */
 884 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
 885 {
 886     struct dirent *dirp;
 887     DIR *dp;
 888
 889     if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
 890         printf("Can't open directory " PROC_DEVTREE_CPU "\n");
 891         return -1;
 892     }
 893
 894     buf[0] = '\0';
 895     while ((dirp = readdir(dp)) != NULL) {
 896         FILE *f;
 897         snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
 898                  dirp->d_name);
 899         f = fopen(buf, "r");
 900         if (f) {
 901             snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
 902             fclose(f);
 903             break;
 904         }
 905         buf[0] = '\0';
 906     }
 907     closedir(dp);
 908     if (buf[0] == '\0') {
 909         printf("Unknown host!\n");
 910         return -1;
 911     }
 912
 913     return 0;
 914 }
 915
 916 /* Read a CPU node property from the host device tree that's a single
 917  * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
 918  * (can't find or open the property, or doesn't understand the
 919  * format) */
 920 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
 921 {
 922     char buf[PATH_MAX];
 923     union {
 924         uint32_t v32;
 925         uint64_t v64;
 926     } u;
 927     FILE *f;
 928     int len;
 929
 930     if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
 931         return -1;
 932     }
 933
 934     strncat(buf, "/", sizeof(buf) - strlen(buf));
 935     strncat(buf, propname, sizeof(buf) - strlen(buf));
 936
 937     f = fopen(buf, "rb");
 938     if (!f) {
 939         return -1;
 940     }
 941
 942     len = fread(&u, 1, sizeof(u), f);
 943     fclose(f);
 944     switch (len) {
 945     case 4:
 946         /* property is a 32-bit quantity */
 947         return be32_to_cpu(u.v32);
 948     case 8:
 949         return be64_to_cpu(u.v64);
 950     }
 951
 952     return 0;
 953 }
 954
 955 uint64_t kvmppc_get_clockfreq(void)
 956 {
 957     return kvmppc_read_int_cpu_dt("clock-frequency");
 958 }
 959
 960 uint32_t kvmppc_get_vmx(void)
 961 {
 962     return kvmppc_read_int_cpu_dt("ibm,vmx");
 963 }
 964
 965 uint32_t kvmppc_get_dfp(void)
 966 {
 967     return kvmppc_read_int_cpu_dt("ibm,dfp");
 968 }
 969
 970 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
 971 {
 972     uint32_t *hc = (uint32_t*)buf;
 973
 974     struct kvm_ppc_pvinfo pvinfo;
 975
 976     if (kvm_check_extension(env->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
 977         !kvm_vm_ioctl(env->kvm_state, KVM_PPC_GET_PVINFO, &pvinfo)) {
 978         memcpy(buf, pvinfo.hcall, buf_len);
 979
 980         return 0;
 981     }
 982
 983     /*
 984      * Fallback to always fail hypercalls:
 985      *
 986      *     li r3, -1
 987      *     nop
 988      *     nop
 989      *     nop
 990      */
 991
 992     hc[0] = 0x3860ffff;
 993     hc[1] = 0x60000000;
 994     hc[2] = 0x60000000;
 995     hc[3] = 0x60000000;
 996
 997     return 0;
 998 }
 999
1000 void kvmppc_set_papr(CPUPPCState *env)
1001 {
1002     struct kvm_enable_cap cap = {};
1003     int ret;
1004
1005     cap.cap = KVM_CAP_PPC_PAPR;
1006     ret = kvm_vcpu_ioctl(env, KVM_ENABLE_CAP, &cap);
1007
1008     if (ret) {
1009         cpu_abort(env, "This KVM version does not support PAPR\n");
1010     }
1011 }
1012
1013 int kvmppc_smt_threads(void)
1014 {
1015     return cap_ppc_smt ? cap_ppc_smt : 1;
1016 }
1017
1018 #ifdef TARGET_PPC64
1019 off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem)
1020 {
1021     void *rma;
1022     off_t size;
1023     int fd;
1024     struct kvm_allocate_rma ret;
1025     MemoryRegion *rma_region;
1026
1027     /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
1028      * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
1029      *                      not necessary on this hardware
1030      * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
1031      *
1032      * FIXME: We should allow the user to force contiguous RMA
1033      * allocation in the cap_ppc_rma==1 case.
1034      */
1035     if (cap_ppc_rma < 2) {
1036         return 0;
1037     }
1038
1039     fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
1040     if (fd < 0) {
1041         fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
1042                 strerror(errno));
1043         return -1;
1044     }
1045
1046     size = MIN(ret.rma_size, 256ul << 20);
1047
1048     rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1049     if (rma == MAP_FAILED) {
1050         fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
1051         return -1;
1052     };
1053
1054     rma_region = g_new(MemoryRegion, 1);
1055     memory_region_init_ram_ptr(rma_region, name, size, rma);
1056     vmstate_register_ram_global(rma_region);
1057     memory_region_add_subregion(sysmem, 0, rma_region);
1058
1059     return size;
1060 }
1061
1062 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
1063 {
1064     if (cap_ppc_rma >= 2) {
1065         return current_size;
1066     }
1067     return MIN(current_size,
1068                getrampagesize() << (hash_shift - 7));
1069 }
1070 #endif
1071
1072 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd)
1073 {
1074     struct kvm_create_spapr_tce args = {
1075         .liobn = liobn,
1076         .window_size = window_size,
1077     };
1078     long len;
1079     int fd;
1080     void *table;
1081
1082     /* Must set fd to -1 so we don't try to munmap when called for
1083      * destroying the table, which the upper layers -will- do
1084      */
1085     *pfd = -1;
1086     if (!cap_spapr_tce) {
1087         return NULL;
1088     }
1089
1090     fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
1091     if (fd < 0) {
1092         fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
1093                 liobn);
1094         return NULL;
1095     }
1096
1097     len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(sPAPRTCE);
1098     /* FIXME: round this up to page size */
1099
1100     table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1101     if (table == MAP_FAILED) {
1102         fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
1103                 liobn);
1104         close(fd);
1105         return NULL;
1106     }
1107
1108     *pfd = fd;
1109     return table;
1110 }
1111
1112 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t window_size)
1113 {
1114     long len;
1115
1116     if (fd < 0) {
1117         return -1;
1118     }
1119
1120     len = (window_size / SPAPR_TCE_PAGE_SIZE)*sizeof(sPAPRTCE);
1121     if ((munmap(table, len) < 0) ||
1122         (close(fd) < 0)) {
1123         fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
1124                 strerror(errno));
1125         /* Leak the table */
1126     }
1127
1128     return 0;
1129 }
1130
1131 int kvmppc_reset_htab(int shift_hint)
1132 {
1133     uint32_t shift = shift_hint;
1134
1135     if (!kvm_enabled()) {
1136         /* Full emulation, tell caller to allocate htab itself */
1137         return 0;
1138     }
1139     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
1140         int ret;
1141         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
1142         if (ret == -ENOTTY) {
1143             /* At least some versions of PR KVM advertise the
1144              * capability, but don't implement the ioctl().  Oops.
1145              * Return 0 so that we allocate the htab in qemu, as is
1146              * correct for PR. */
1147             return 0;
1148         } else if (ret < 0) {
1149             return ret;
1150         }
1151         return shift;
1152     }
1153
1154     /* We have a kernel that predates the htab reset calls.  For PR
1155      * KVM, we need to allocate the htab ourselves, for an HV KVM of
1156      * this era, it has allocated a 16MB fixed size hash table
1157      * already.  Kernels of this era have the GET_PVINFO capability
1158      * only on PR, so we use this hack to determine the right
1159      * answer */
1160     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
1161         /* PR - tell caller to allocate htab */
1162         return 0;
1163     } else {
1164         /* HV - assume 16MB kernel allocated htab */
1165         return 24;
1166     }
1167 }
1168
1169 static inline uint32_t mfpvr(void)
1170 {
1171     uint32_t pvr;
1172
1173     asm ("mfpvr %0"
1174          : "=r"(pvr));
1175     return pvr;
1176 }
1177
1178 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
1179 {
1180     if (on) {
1181         *word |= flags;
1182     } else {
1183         *word &= ~flags;
1184     }
1185 }
1186
1187 const ppc_def_t *kvmppc_host_cpu_def(void)
1188 {
1189     uint32_t host_pvr = mfpvr();
1190     const ppc_def_t *base_spec;
1191     ppc_def_t *spec;
1192     uint32_t vmx = kvmppc_get_vmx();
1193     uint32_t dfp = kvmppc_get_dfp();
1194
1195     base_spec = ppc_find_by_pvr(host_pvr);
1196
1197     spec = g_malloc0(sizeof(*spec));
1198     memcpy(spec, base_spec, sizeof(*spec));
1199
1200     /* Now fix up the spec with information we can query from the host */
1201
1202     if (vmx != -1) {
1203         /* Only override when we know what the host supports */
1204         alter_insns(&spec->insns_flags, PPC_ALTIVEC, vmx > 0);
1205         alter_insns(&spec->insns_flags2, PPC2_VSX, vmx > 1);
1206     }
1207     if (dfp != -1) {
1208         /* Only override when we know what the host supports */
1209         alter_insns(&spec->insns_flags2, PPC2_DFP, dfp);
1210     }
1211
1212     return spec;
1213 }
1214
1215 int kvmppc_fixup_cpu(CPUPPCState *env)
1216 {
1217     int smt;
1218
1219     /* Adjust cpu index for SMT */
1220     smt = kvmppc_smt_threads();
1221     env->cpu_index = (env->cpu_index / smp_threads) * smt
1222         + (env->cpu_index % smp_threads);
1223
1224     return 0;
1225 }
1226
1227
1228 bool kvm_arch_stop_on_emulation_error(CPUPPCState *env)
1229 {
1230     return true;
1231 }
1232
1233 int kvm_arch_on_sigbus_vcpu(CPUPPCState *env, int code, void *addr)
1234 {
1235     return 1;
1236 }
1237
1238 int kvm_arch_on_sigbus(int code, void *addr)
1239 {
1240     return 1;
1241 }