target-ppc/kvm.c

   1 /*
   2  * PowerPC implementation of KVM hooks
   3  *
   4  * Copyright IBM Corp. 2007
   5  * Copyright (C) 2011 Freescale Semiconductor, Inc.
   6  *
   7  * Authors:
   8  *  Jerone Young <jyoung5@us.ibm.com>
   9  *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
  10  *  Hollis Blanchard <hollisb@us.ibm.com>
  11  *
  12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  13  * See the COPYING file in the top-level directory.
  14  *
  15  */
  16
  17 #include <dirent.h>
  18 #include <sys/types.h>
  19 #include <sys/ioctl.h>
  20 #include <sys/mman.h>
  21 #include <sys/vfs.h>
  22
  23 #include <linux/kvm.h>
  24
  25 #include "qemu-common.h"
  26 #include "qemu/timer.h"
  27 #include "sysemu/sysemu.h"
  28 #include "sysemu/kvm.h"
  29 #include "kvm_ppc.h"
  30 #include "cpu.h"
  31 #include "sysemu/cpus.h"
  32 #include "sysemu/device_tree.h"
  33 #include "hw/sysbus.h"
  34 #include "hw/spapr.h"
  35
  36 #include "hw/sysbus.h"
  37 #include "hw/spapr.h"
  38 #include "hw/spapr_vio.h"
  39
  40 //#define DEBUG_KVM
  41
  42 #ifdef DEBUG_KVM
  43 #define dprintf(fmt, ...) \
  44     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
  45 #else
  46 #define dprintf(fmt, ...) \
  47     do { } while (0)
  48 #endif
  49
  50 #define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
  51
  52 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
  53     KVM_CAP_LAST_INFO
  54 };
  55
  56 static int cap_interrupt_unset = false;
  57 static int cap_interrupt_level = false;
  58 static int cap_segstate;
  59 static int cap_booke_sregs;
  60 static int cap_ppc_smt;
  61 static int cap_ppc_rma;
  62 static int cap_spapr_tce;
  63 static int cap_hior;
  64
  65 /* XXX We have a race condition where we actually have a level triggered
  66  *     interrupt, but the infrastructure can't expose that yet, so the guest
  67  *     takes but ignores it, goes to sleep and never gets notified that there's
  68  *     still an interrupt pending.
  69  *
  70  *     As a quick workaround, let's just wake up again 20 ms after we injected
  71  *     an interrupt. That way we can assure that we're always reinjecting
  72  *     interrupts in case the guest swallowed them.
  73  */
  74 static QEMUTimer *idle_timer;
  75
  76 static void kvm_kick_cpu(void *opaque)
  77 {
  78     PowerPCCPU *cpu = opaque;
  79
  80     qemu_cpu_kick(CPU(cpu));
  81 }
  82
  83 int kvm_arch_init(KVMState *s)
  84 {
  85     cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
  86     cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
  87     cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
  88     cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
  89     cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
  90     cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
  91     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
  92     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
  93
  94     if (!cap_interrupt_level) {
  95         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
  96                         "VM to stall at times!\n");
  97     }
  98
  99     return 0;
 100 }
 101
 102 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
 103 {
 104     CPUPPCState *cenv = &cpu->env;
 105     CPUState *cs = CPU(cpu);
 106     struct kvm_sregs sregs;
 107     int ret;
 108
 109     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 110         /* What we're really trying to say is "if we're on BookE, we use
 111            the native PVR for now". This is the only sane way to check
 112            it though, so we potentially confuse users that they can run
 113            BookE guests on BookS. Let's hope nobody dares enough :) */
 114         return 0;
 115     } else {
 116         if (!cap_segstate) {
 117             fprintf(stderr, "kvm error: missing PVR setting capability\n");
 118             return -ENOSYS;
 119         }
 120     }
 121
 122     ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
 123     if (ret) {
 124         return ret;
 125     }
 126
 127     sregs.pvr = cenv->spr[SPR_PVR];
 128     return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
 129 }
 130
 131 /* Set up a shared TLB array with KVM */
 132 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
 133 {
 134     CPUPPCState *env = &cpu->env;
 135     CPUState *cs = CPU(cpu);
 136     struct kvm_book3e_206_tlb_params params = {};
 137     struct kvm_config_tlb cfg = {};
 138     struct kvm_enable_cap encap = {};
 139     unsigned int entries = 0;
 140     int ret, i;
 141
 142     if (!kvm_enabled() ||
 143         !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
 144         return 0;
 145     }
 146
 147     assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
 148
 149     for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
 150         params.tlb_sizes[i] = booke206_tlb_size(env, i);
 151         params.tlb_ways[i] = booke206_tlb_ways(env, i);
 152         entries += params.tlb_sizes[i];
 153     }
 154
 155     assert(entries == env->nb_tlb);
 156     assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
 157
 158     env->tlb_dirty = true;
 159
 160     cfg.array = (uintptr_t)env->tlb.tlbm;
 161     cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
 162     cfg.params = (uintptr_t)&params;
 163     cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
 164
 165     encap.cap = KVM_CAP_SW_TLB;
 166     encap.args[0] = (uintptr_t)&cfg;
 167
 168     ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &encap);
 169     if (ret < 0) {
 170         fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
 171                 __func__, strerror(-ret));
 172         return ret;
 173     }
 174
 175     env->kvm_sw_tlb = true;
 176     return 0;
 177 }
 178
 179
 180 #if defined(TARGET_PPC64)
 181 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
 182                                        struct kvm_ppc_smmu_info *info)
 183 {
 184     CPUPPCState *env = &cpu->env;
 185     CPUState *cs = CPU(cpu);
 186
 187     memset(info, 0, sizeof(*info));
 188
 189     /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
 190      * need to "guess" what the supported page sizes are.
 191      *
 192      * For that to work we make a few assumptions:
 193      *
 194      * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
 195      *   KVM which only supports 4K and 16M pages, but supports them
 196      *   regardless of the backing store characteritics. We also don't
 197      *   support 1T segments.
 198      *
 199      *   This is safe as if HV KVM ever supports that capability or PR
 200      *   KVM grows supports for more page/segment sizes, those versions
 201      *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
 202      *   will not hit this fallback
 203      *
 204      * - Else we are running HV KVM. This means we only support page
 205      *   sizes that fit in the backing store. Additionally we only
 206      *   advertize 64K pages if the processor is ARCH 2.06 and we assume
 207      *   P7 encodings for the SLB and hash table. Here too, we assume
 208      *   support for any newer processor will mean a kernel that
 209      *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
 210      *   this fallback.
 211      */
 212     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
 213         /* No flags */
 214         info->flags = 0;
 215         info->slb_size = 64;
 216
 217         /* Standard 4k base page size segment */
 218         info->sps[0].page_shift = 12;
 219         info->sps[0].slb_enc = 0;
 220         info->sps[0].enc[0].page_shift = 12;
 221         info->sps[0].enc[0].pte_enc = 0;
 222
 223         /* Standard 16M large page size segment */
 224         info->sps[1].page_shift = 24;
 225         info->sps[1].slb_enc = SLB_VSID_L;
 226         info->sps[1].enc[0].page_shift = 24;
 227         info->sps[1].enc[0].pte_enc = 0;
 228     } else {
 229         int i = 0;
 230
 231         /* HV KVM has backing store size restrictions */
 232         info->flags = KVM_PPC_PAGE_SIZES_REAL;
 233
 234         if (env->mmu_model & POWERPC_MMU_1TSEG) {
 235             info->flags |= KVM_PPC_1T_SEGMENTS;
 236         }
 237
 238         if (env->mmu_model == POWERPC_MMU_2_06) {
 239             info->slb_size = 32;
 240         } else {
 241             info->slb_size = 64;
 242         }
 243
 244         /* Standard 4k base page size segment */
 245         info->sps[i].page_shift = 12;
 246         info->sps[i].slb_enc = 0;
 247         info->sps[i].enc[0].page_shift = 12;
 248         info->sps[i].enc[0].pte_enc = 0;
 249         i++;
 250
 251         /* 64K on MMU 2.06 */
 252         if (env->mmu_model == POWERPC_MMU_2_06) {
 253             info->sps[i].page_shift = 16;
 254             info->sps[i].slb_enc = 0x110;
 255             info->sps[i].enc[0].page_shift = 16;
 256             info->sps[i].enc[0].pte_enc = 1;
 257             i++;
 258         }
 259
 260         /* Standard 16M large page size segment */
 261         info->sps[i].page_shift = 24;
 262         info->sps[i].slb_enc = SLB_VSID_L;
 263         info->sps[i].enc[0].page_shift = 24;
 264         info->sps[i].enc[0].pte_enc = 0;
 265     }
 266 }
 267
 268 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
 269 {
 270     CPUState *cs = CPU(cpu);
 271     int ret;
 272
 273     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
 274         ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
 275         if (ret == 0) {
 276             return;
 277         }
 278     }
 279
 280     kvm_get_fallback_smmu_info(cpu, info);
 281 }
 282
 283 static long getrampagesize(void)
 284 {
 285     struct statfs fs;
 286     int ret;
 287
 288     if (!mem_path) {
 289         /* guest RAM is backed by normal anonymous pages */
 290         return getpagesize();
 291     }
 292
 293     do {
 294         ret = statfs(mem_path, &fs);
 295     } while (ret != 0 && errno == EINTR);
 296
 297     if (ret != 0) {
 298         fprintf(stderr, "Couldn't statfs() memory path: %s\n",
 299                 strerror(errno));
 300         exit(1);
 301     }
 302
 303 #define HUGETLBFS_MAGIC       0x958458f6
 304
 305     if (fs.f_type != HUGETLBFS_MAGIC) {
 306         /* Explicit mempath, but it's ordinary pages */
 307         return getpagesize();
 308     }
 309
 310     /* It's hugepage, return the huge page size */
 311     return fs.f_bsize;
 312 }
 313
 314 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
 315 {
 316     if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
 317         return true;
 318     }
 319
 320     return (1ul << shift) <= rampgsize;
 321 }
 322
 323 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 324 {
 325     static struct kvm_ppc_smmu_info smmu_info;
 326     static bool has_smmu_info;
 327     CPUPPCState *env = &cpu->env;
 328     long rampagesize;
 329     int iq, ik, jq, jk;
 330
 331     /* We only handle page sizes for 64-bit server guests for now */
 332     if (!(env->mmu_model & POWERPC_MMU_64)) {
 333         return;
 334     }
 335
 336     /* Collect MMU info from kernel if not already */
 337     if (!has_smmu_info) {
 338         kvm_get_smmu_info(cpu, &smmu_info);
 339         has_smmu_info = true;
 340     }
 341
 342     rampagesize = getrampagesize();
 343
 344     /* Convert to QEMU form */
 345     memset(&env->sps, 0, sizeof(env->sps));
 346
 347     for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
 348         struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
 349         struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
 350
 351         if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 352                                  ksps->page_shift)) {
 353             continue;
 354         }
 355         qsps->page_shift = ksps->page_shift;
 356         qsps->slb_enc = ksps->slb_enc;
 357         for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
 358             if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 359                                      ksps->enc[jk].page_shift)) {
 360                 continue;
 361             }
 362             qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
 363             qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
 364             if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
 365                 break;
 366             }
 367         }
 368         if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
 369             break;
 370         }
 371     }
 372     env->slb_nr = smmu_info.slb_size;
 373     if (smmu_info.flags & KVM_PPC_1T_SEGMENTS) {
 374         env->mmu_model |= POWERPC_MMU_1TSEG;
 375     } else {
 376         env->mmu_model &= ~POWERPC_MMU_1TSEG;
 377     }
 378 }
 379 #else /* defined (TARGET_PPC64) */
 380
 381 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 382 {
 383 }
 384
 385 #endif /* !defined (TARGET_PPC64) */
 386
 387 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
 388 {
 389     return cpu->cpu_index;
 390 }
 391
 392 int kvm_arch_init_vcpu(CPUState *cs)
 393 {
 394     PowerPCCPU *cpu = POWERPC_CPU(cs);
 395     CPUPPCState *cenv = &cpu->env;
 396     int ret;
 397
 398     /* Gather server mmu info from KVM and update the CPU state */
 399     kvm_fixup_page_sizes(cpu);
 400
 401     /* Synchronize sregs with kvm */
 402     ret = kvm_arch_sync_sregs(cpu);
 403     if (ret) {
 404         return ret;
 405     }
 406
 407     idle_timer = qemu_new_timer_ns(vm_clock, kvm_kick_cpu, cpu);
 408
 409     /* Some targets support access to KVM's guest TLB. */
 410     switch (cenv->mmu_model) {
 411     case POWERPC_MMU_BOOKE206:
 412         ret = kvm_booke206_tlb_init(cpu);
 413         break;
 414     default:
 415         break;
 416     }
 417
 418     return ret;
 419 }
 420
 421 void kvm_arch_reset_vcpu(CPUState *cpu)
 422 {
 423 }
 424
 425 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
 426 {
 427     CPUPPCState *env = &cpu->env;
 428     CPUState *cs = CPU(cpu);
 429     struct kvm_dirty_tlb dirty_tlb;
 430     unsigned char *bitmap;
 431     int ret;
 432
 433     if (!env->kvm_sw_tlb) {
 434         return;
 435     }
 436
 437     bitmap = g_malloc((env->nb_tlb + 7) / 8);
 438     memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
 439
 440     dirty_tlb.bitmap = (uintptr_t)bitmap;
 441     dirty_tlb.num_dirty = env->nb_tlb;
 442
 443     ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
 444     if (ret) {
 445         fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
 446                 __func__, strerror(-ret));
 447     }
 448
 449     g_free(bitmap);
 450 }
 451
 452 int kvm_arch_put_registers(CPUState *cs, int level)
 453 {
 454     PowerPCCPU *cpu = POWERPC_CPU(cs);
 455     CPUPPCState *env = &cpu->env;
 456     struct kvm_regs regs;
 457     int ret;
 458     int i;
 459
 460     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
 461     if (ret < 0) {
 462         return ret;
 463     }
 464
 465     regs.ctr = env->ctr;
 466     regs.lr  = env->lr;
 467     regs.xer = cpu_read_xer(env);
 468     regs.msr = env->msr;
 469     regs.pc = env->nip;
 470
 471     regs.srr0 = env->spr[SPR_SRR0];
 472     regs.srr1 = env->spr[SPR_SRR1];
 473
 474     regs.sprg0 = env->spr[SPR_SPRG0];
 475     regs.sprg1 = env->spr[SPR_SPRG1];
 476     regs.sprg2 = env->spr[SPR_SPRG2];
 477     regs.sprg3 = env->spr[SPR_SPRG3];
 478     regs.sprg4 = env->spr[SPR_SPRG4];
 479     regs.sprg5 = env->spr[SPR_SPRG5];
 480     regs.sprg6 = env->spr[SPR_SPRG6];
 481     regs.sprg7 = env->spr[SPR_SPRG7];
 482
 483     regs.pid = env->spr[SPR_BOOKE_PID];
 484
 485     for (i = 0;i < 32; i++)
 486         regs.gpr[i] = env->gpr[i];
 487
 488     ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
 489     if (ret < 0)
 490         return ret;
 491
 492     if (env->tlb_dirty) {
 493         kvm_sw_tlb_put(cpu);
 494         env->tlb_dirty = false;
 495     }
 496
 497     if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
 498         struct kvm_sregs sregs;
 499
 500         sregs.pvr = env->spr[SPR_PVR];
 501
 502         sregs.u.s.sdr1 = env->spr[SPR_SDR1];
 503
 504         /* Sync SLB */
 505 #ifdef TARGET_PPC64
 506         for (i = 0; i < 64; i++) {
 507             sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
 508             sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
 509         }
 510 #endif
 511
 512         /* Sync SRs */
 513         for (i = 0; i < 16; i++) {
 514             sregs.u.s.ppc32.sr[i] = env->sr[i];
 515         }
 516
 517         /* Sync BATs */
 518         for (i = 0; i < 8; i++) {
 519             /* Beware. We have to swap upper and lower bits here */
 520             sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
 521                 | env->DBAT[1][i];
 522             sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
 523                 | env->IBAT[1][i];
 524         }
 525
 526         ret = kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
 527         if (ret) {
 528             return ret;
 529         }
 530     }
 531
 532     if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
 533         uint64_t hior = env->spr[SPR_HIOR];
 534         struct kvm_one_reg reg = {
 535             .id = KVM_REG_PPC_HIOR,
 536             .addr = (uintptr_t) &hior,
 537         };
 538
 539         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 540         if (ret) {
 541             return ret;
 542         }
 543     }
 544
 545     return ret;
 546 }
 547
 548 int kvm_arch_get_registers(CPUState *cs)
 549 {
 550     PowerPCCPU *cpu = POWERPC_CPU(cs);
 551     CPUPPCState *env = &cpu->env;
 552     struct kvm_regs regs;
 553     struct kvm_sregs sregs;
 554     uint32_t cr;
 555     int i, ret;
 556
 557     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
 558     if (ret < 0)
 559         return ret;
 560
 561     cr = regs.cr;
 562     for (i = 7; i >= 0; i--) {
 563         env->crf[i] = cr & 15;
 564         cr >>= 4;
 565     }
 566
 567     env->ctr = regs.ctr;
 568     env->lr = regs.lr;
 569     cpu_write_xer(env, regs.xer);
 570     env->msr = regs.msr;
 571     env->nip = regs.pc;
 572
 573     env->spr[SPR_SRR0] = regs.srr0;
 574     env->spr[SPR_SRR1] = regs.srr1;
 575
 576     env->spr[SPR_SPRG0] = regs.sprg0;
 577     env->spr[SPR_SPRG1] = regs.sprg1;
 578     env->spr[SPR_SPRG2] = regs.sprg2;
 579     env->spr[SPR_SPRG3] = regs.sprg3;
 580     env->spr[SPR_SPRG4] = regs.sprg4;
 581     env->spr[SPR_SPRG5] = regs.sprg5;
 582     env->spr[SPR_SPRG6] = regs.sprg6;
 583     env->spr[SPR_SPRG7] = regs.sprg7;
 584
 585     env->spr[SPR_BOOKE_PID] = regs.pid;
 586
 587     for (i = 0;i < 32; i++)
 588         env->gpr[i] = regs.gpr[i];
 589
 590     if (cap_booke_sregs) {
 591         ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
 592         if (ret < 0) {
 593             return ret;
 594         }
 595
 596         if (sregs.u.e.features & KVM_SREGS_E_BASE) {
 597             env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
 598             env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
 599             env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
 600             env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
 601             env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
 602             env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
 603             env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
 604             env->spr[SPR_DECR] = sregs.u.e.dec;
 605             env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
 606             env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
 607             env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
 608         }
 609
 610         if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
 611             env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
 612             env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
 613             env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
 614             env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
 615             env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
 616         }
 617
 618         if (sregs.u.e.features & KVM_SREGS_E_64) {
 619             env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
 620         }
 621
 622         if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
 623             env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
 624         }
 625
 626         if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
 627             env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
 628             env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
 629             env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
 630             env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
 631             env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
 632             env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
 633             env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
 634             env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
 635             env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
 636             env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
 637             env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
 638             env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
 639             env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
 640             env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
 641             env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
 642             env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
 643
 644             if (sregs.u.e.features & KVM_SREGS_E_SPE) {
 645                 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
 646                 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
 647                 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
 648             }
 649
 650             if (sregs.u.e.features & KVM_SREGS_E_PM) {
 651                 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
 652             }
 653
 654             if (sregs.u.e.features & KVM_SREGS_E_PC) {
 655                 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
 656                 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
 657             }
 658         }
 659
 660         if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
 661             env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
 662             env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
 663             env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
 664             env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
 665             env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
 666             env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
 667             env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
 668             env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
 669             env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
 670             env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
 671         }
 672
 673         if (sregs.u.e.features & KVM_SREGS_EXP) {
 674             env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
 675         }
 676
 677         if (sregs.u.e.features & KVM_SREGS_E_PD) {
 678             env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
 679             env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
 680         }
 681
 682         if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
 683             env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
 684             env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
 685             env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
 686
 687             if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
 688                 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
 689                 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
 690             }
 691         }
 692     }
 693
 694     if (cap_segstate) {
 695         ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
 696         if (ret < 0) {
 697             return ret;
 698         }
 699
 700         ppc_store_sdr1(env, sregs.u.s.sdr1);
 701
 702         /* Sync SLB */
 703 #ifdef TARGET_PPC64
 704         for (i = 0; i < 64; i++) {
 705             ppc_store_slb(env, sregs.u.s.ppc64.slb[i].slbe,
 706                                sregs.u.s.ppc64.slb[i].slbv);
 707         }
 708 #endif
 709
 710         /* Sync SRs */
 711         for (i = 0; i < 16; i++) {
 712             env->sr[i] = sregs.u.s.ppc32.sr[i];
 713         }
 714
 715         /* Sync BATs */
 716         for (i = 0; i < 8; i++) {
 717             env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
 718             env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
 719             env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
 720             env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
 721         }
 722     }
 723
 724     return 0;
 725 }
 726
 727 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
 728 {
 729     unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
 730
 731     if (irq != PPC_INTERRUPT_EXT) {
 732         return 0;
 733     }
 734
 735     if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
 736         return 0;
 737     }
 738
 739     kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
 740
 741     return 0;
 742 }
 743
 744 #if defined(TARGET_PPCEMB)
 745 #define PPC_INPUT_INT PPC40x_INPUT_INT
 746 #elif defined(TARGET_PPC64)
 747 #define PPC_INPUT_INT PPC970_INPUT_INT
 748 #else
 749 #define PPC_INPUT_INT PPC6xx_INPUT_INT
 750 #endif
 751
 752 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
 753 {
 754     PowerPCCPU *cpu = POWERPC_CPU(cs);
 755     CPUPPCState *env = &cpu->env;
 756     int r;
 757     unsigned irq;
 758
 759     /* PowerPC QEMU tracks the various core input pins (interrupt, critical
 760      * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
 761     if (!cap_interrupt_level &&
 762         run->ready_for_interrupt_injection &&
 763         (env->interrupt_request & CPU_INTERRUPT_HARD) &&
 764         (env->irq_input_state & (1<<PPC_INPUT_INT)))
 765     {
 766         /* For now KVM disregards the 'irq' argument. However, in the
 767          * future KVM could cache it in-kernel to avoid a heavyweight exit
 768          * when reading the UIC.
 769          */
 770         irq = KVM_INTERRUPT_SET;
 771
 772         dprintf("injected interrupt %d\n", irq);
 773         r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
 774         if (r < 0) {
 775             printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
 776         }
 777
 778         /* Always wake up soon in case the interrupt was level based */
 779         qemu_mod_timer(idle_timer, qemu_get_clock_ns(vm_clock) +
 780                        (get_ticks_per_sec() / 50));
 781     }
 782
 783     /* We don't know if there are more interrupts pending after this. However,
 784      * the guest will return to userspace in the course of handling this one
 785      * anyways, so we will get a chance to deliver the rest. */
 786 }
 787
 788 void kvm_arch_post_run(CPUState *cpu, struct kvm_run *run)
 789 {
 790 }
 791
 792 int kvm_arch_process_async_events(CPUState *cs)
 793 {
 794     PowerPCCPU *cpu = POWERPC_CPU(cs);
 795     return cpu->env.halted;
 796 }
 797
 798 static int kvmppc_handle_halt(CPUPPCState *env)
 799 {
 800     if (!(env->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
 801         env->halted = 1;
 802         env->exception_index = EXCP_HLT;
 803     }
 804
 805     return 0;
 806 }
 807
 808 /* map dcr access to existing qemu dcr emulation */
 809 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
 810 {
 811     if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
 812         fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
 813
 814     return 0;
 815 }
 816
 817 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
 818 {
 819     if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
 820         fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
 821
 822     return 0;
 823 }
 824
 825 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
 826 {
 827     PowerPCCPU *cpu = POWERPC_CPU(cs);
 828     CPUPPCState *env = &cpu->env;
 829     int ret;
 830
 831     switch (run->exit_reason) {
 832     case KVM_EXIT_DCR:
 833         if (run->dcr.is_write) {
 834             dprintf("handle dcr write\n");
 835             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
 836         } else {
 837             dprintf("handle dcr read\n");
 838             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
 839         }
 840         break;
 841     case KVM_EXIT_HLT:
 842         dprintf("handle halt\n");
 843         ret = kvmppc_handle_halt(env);
 844         break;
 845 #ifdef CONFIG_PSERIES
 846     case KVM_EXIT_PAPR_HCALL:
 847         dprintf("handle PAPR hypercall\n");
 848         run->papr_hcall.ret = spapr_hypercall(cpu,
 849                                               run->papr_hcall.nr,
 850                                               run->papr_hcall.args);
 851         ret = 0;
 852         break;
 853 #endif
 854     case KVM_EXIT_EPR:
 855         dprintf("handle epr\n");
 856         run->epr.epr = ldl_phys(env->mpic_iack);
 857         ret = 0;
 858         break;
 859     default:
 860         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
 861         ret = -1;
 862         break;
 863     }
 864
 865     return ret;
 866 }
 867
 868 static int read_cpuinfo(const char *field, char *value, int len)
 869 {
 870     FILE *f;
 871     int ret = -1;
 872     int field_len = strlen(field);
 873     char line[512];
 874
 875     f = fopen("/proc/cpuinfo", "r");
 876     if (!f) {
 877         return -1;
 878     }
 879
 880     do {
 881         if(!fgets(line, sizeof(line), f)) {
 882             break;
 883         }
 884         if (!strncmp(line, field, field_len)) {
 885             pstrcpy(value, len, line);
 886             ret = 0;
 887             break;
 888         }
 889     } while(*line);
 890
 891     fclose(f);
 892
 893     return ret;
 894 }
 895
 896 uint32_t kvmppc_get_tbfreq(void)
 897 {
 898     char line[512];
 899     char *ns;
 900     uint32_t retval = get_ticks_per_sec();
 901
 902     if (read_cpuinfo("timebase", line, sizeof(line))) {
 903         return retval;
 904     }
 905
 906     if (!(ns = strchr(line, ':'))) {
 907         return retval;
 908     }
 909
 910     ns++;
 911
 912     retval = atoi(ns);
 913     return retval;
 914 }
 915
 916 /* Try to find a device tree node for a CPU with clock-frequency property */
 917 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
 918 {
 919     struct dirent *dirp;
 920     DIR *dp;
 921
 922     if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
 923         printf("Can't open directory " PROC_DEVTREE_CPU "\n");
 924         return -1;
 925     }
 926
 927     buf[0] = '\0';
 928     while ((dirp = readdir(dp)) != NULL) {
 929         FILE *f;
 930         snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
 931                  dirp->d_name);
 932         f = fopen(buf, "r");
 933         if (f) {
 934             snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
 935             fclose(f);
 936             break;
 937         }
 938         buf[0] = '\0';
 939     }
 940     closedir(dp);
 941     if (buf[0] == '\0') {
 942         printf("Unknown host!\n");
 943         return -1;
 944     }
 945
 946     return 0;
 947 }
 948
 949 /* Read a CPU node property from the host device tree that's a single
 950  * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
 951  * (can't find or open the property, or doesn't understand the
 952  * format) */
 953 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
 954 {
 955     char buf[PATH_MAX];
 956     union {
 957         uint32_t v32;
 958         uint64_t v64;
 959     } u;
 960     FILE *f;
 961     int len;
 962
 963     if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
 964         return -1;
 965     }
 966
 967     strncat(buf, "/", sizeof(buf) - strlen(buf));
 968     strncat(buf, propname, sizeof(buf) - strlen(buf));
 969
 970     f = fopen(buf, "rb");
 971     if (!f) {
 972         return -1;
 973     }
 974
 975     len = fread(&u, 1, sizeof(u), f);
 976     fclose(f);
 977     switch (len) {
 978     case 4:
 979         /* property is a 32-bit quantity */
 980         return be32_to_cpu(u.v32);
 981     case 8:
 982         return be64_to_cpu(u.v64);
 983     }
 984
 985     return 0;
 986 }
 987
 988 uint64_t kvmppc_get_clockfreq(void)
 989 {
 990     return kvmppc_read_int_cpu_dt("clock-frequency");
 991 }
 992
 993 uint32_t kvmppc_get_vmx(void)
 994 {
 995     return kvmppc_read_int_cpu_dt("ibm,vmx");
 996 }
 997
 998 uint32_t kvmppc_get_dfp(void)
 999 {
1000     return kvmppc_read_int_cpu_dt("ibm,dfp");
1001 }
1002
1003 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1004  {
1005      PowerPCCPU *cpu = ppc_env_get_cpu(env);
1006      CPUState *cs = CPU(cpu);
1007
1008     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1009         !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1010         return 0;
1011     }
1012
1013     return 1;
1014 }
1015
1016 int kvmppc_get_hasidle(CPUPPCState *env)
1017 {
1018     struct kvm_ppc_pvinfo pvinfo;
1019
1020     if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1021         (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1022         return 1;
1023     }
1024
1025     return 0;
1026 }
1027
1028 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1029 {
1030     uint32_t *hc = (uint32_t*)buf;
1031     struct kvm_ppc_pvinfo pvinfo;
1032
1033     if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1034         memcpy(buf, pvinfo.hcall, buf_len);
1035         return 0;
1036     }
1037
1038     /*
1039      * Fallback to always fail hypercalls:
1040      *
1041      *     li r3, -1
1042      *     nop
1043      *     nop
1044      *     nop
1045      */
1046
1047     hc[0] = 0x3860ffff;
1048     hc[1] = 0x60000000;
1049     hc[2] = 0x60000000;
1050     hc[3] = 0x60000000;
1051
1052     return 0;
1053 }
1054
1055 void kvmppc_set_papr(PowerPCCPU *cpu)
1056 {
1057     CPUPPCState *env = &cpu->env;
1058     CPUState *cs = CPU(cpu);
1059     struct kvm_enable_cap cap = {};
1060     int ret;
1061
1062     cap.cap = KVM_CAP_PPC_PAPR;
1063     ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &cap);
1064
1065     if (ret) {
1066         cpu_abort(env, "This KVM version does not support PAPR\n");
1067     }
1068 }
1069
1070 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
1071 {
1072     CPUPPCState *env = &cpu->env;
1073     CPUState *cs = CPU(cpu);
1074     struct kvm_enable_cap cap = {};
1075     int ret;
1076
1077     cap.cap = KVM_CAP_PPC_EPR;
1078     cap.args[0] = mpic_proxy;
1079     ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &cap);
1080
1081     if (ret && mpic_proxy) {
1082         cpu_abort(env, "This KVM version does not support EPR\n");
1083     }
1084 }
1085
1086 int kvmppc_smt_threads(void)
1087 {
1088     return cap_ppc_smt ? cap_ppc_smt : 1;
1089 }
1090
1091 #ifdef TARGET_PPC64
1092 off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem)
1093 {
1094     void *rma;
1095     off_t size;
1096     int fd;
1097     struct kvm_allocate_rma ret;
1098     MemoryRegion *rma_region;
1099
1100     /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
1101      * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
1102      *                      not necessary on this hardware
1103      * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
1104      *
1105      * FIXME: We should allow the user to force contiguous RMA
1106      * allocation in the cap_ppc_rma==1 case.
1107      */
1108     if (cap_ppc_rma < 2) {
1109         return 0;
1110     }
1111
1112     fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
1113     if (fd < 0) {
1114         fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
1115                 strerror(errno));
1116         return -1;
1117     }
1118
1119     size = MIN(ret.rma_size, 256ul << 20);
1120
1121     rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1122     if (rma == MAP_FAILED) {
1123         fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
1124         return -1;
1125     };
1126
1127     rma_region = g_new(MemoryRegion, 1);
1128     memory_region_init_ram_ptr(rma_region, name, size, rma);
1129     vmstate_register_ram_global(rma_region);
1130     memory_region_add_subregion(sysmem, 0, rma_region);
1131
1132     return size;
1133 }
1134
1135 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
1136 {
1137     if (cap_ppc_rma >= 2) {
1138         return current_size;
1139     }
1140     return MIN(current_size,
1141                getrampagesize() << (hash_shift - 7));
1142 }
1143 #endif
1144
1145 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd)
1146 {
1147     struct kvm_create_spapr_tce args = {
1148         .liobn = liobn,
1149         .window_size = window_size,
1150     };
1151     long len;
1152     int fd;
1153     void *table;
1154
1155     /* Must set fd to -1 so we don't try to munmap when called for
1156      * destroying the table, which the upper layers -will- do
1157      */
1158     *pfd = -1;
1159     if (!cap_spapr_tce) {
1160         return NULL;
1161     }
1162
1163     fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
1164     if (fd < 0) {
1165         fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
1166                 liobn);
1167         return NULL;
1168     }
1169
1170     len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(sPAPRTCE);
1171     /* FIXME: round this up to page size */
1172
1173     table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1174     if (table == MAP_FAILED) {
1175         fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
1176                 liobn);
1177         close(fd);
1178         return NULL;
1179     }
1180
1181     *pfd = fd;
1182     return table;
1183 }
1184
1185 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t window_size)
1186 {
1187     long len;
1188
1189     if (fd < 0) {
1190         return -1;
1191     }
1192
1193     len = (window_size / SPAPR_TCE_PAGE_SIZE)*sizeof(sPAPRTCE);
1194     if ((munmap(table, len) < 0) ||
1195         (close(fd) < 0)) {
1196         fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
1197                 strerror(errno));
1198         /* Leak the table */
1199     }
1200
1201     return 0;
1202 }
1203
1204 int kvmppc_reset_htab(int shift_hint)
1205 {
1206     uint32_t shift = shift_hint;
1207
1208     if (!kvm_enabled()) {
1209         /* Full emulation, tell caller to allocate htab itself */
1210         return 0;
1211     }
1212     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
1213         int ret;
1214         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
1215         if (ret == -ENOTTY) {
1216             /* At least some versions of PR KVM advertise the
1217              * capability, but don't implement the ioctl().  Oops.
1218              * Return 0 so that we allocate the htab in qemu, as is
1219              * correct for PR. */
1220             return 0;
1221         } else if (ret < 0) {
1222             return ret;
1223         }
1224         return shift;
1225     }
1226
1227     /* We have a kernel that predates the htab reset calls.  For PR
1228      * KVM, we need to allocate the htab ourselves, for an HV KVM of
1229      * this era, it has allocated a 16MB fixed size hash table
1230      * already.  Kernels of this era have the GET_PVINFO capability
1231      * only on PR, so we use this hack to determine the right
1232      * answer */
1233     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
1234         /* PR - tell caller to allocate htab */
1235         return 0;
1236     } else {
1237         /* HV - assume 16MB kernel allocated htab */
1238         return 24;
1239     }
1240 }
1241
1242 static inline uint32_t mfpvr(void)
1243 {
1244     uint32_t pvr;
1245
1246     asm ("mfpvr %0"
1247          : "=r"(pvr));
1248     return pvr;
1249 }
1250
1251 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
1252 {
1253     if (on) {
1254         *word |= flags;
1255     } else {
1256         *word &= ~flags;
1257     }
1258 }
1259
1260 static void kvmppc_host_cpu_initfn(Object *obj)
1261 {
1262     PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(obj);
1263
1264     assert(kvm_enabled());
1265
1266     if (pcc->info->pvr != mfpvr()) {
1267         fprintf(stderr, "Your host CPU is unsupported.\n"
1268                 "Please choose a supported model instead, see -cpu ?.\n");
1269         exit(1);
1270     }
1271 }
1272
1273 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
1274 {
1275     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
1276     uint32_t host_pvr = mfpvr();
1277     PowerPCCPUClass *pvr_pcc;
1278     ppc_def_t *spec;
1279     uint32_t vmx = kvmppc_get_vmx();
1280     uint32_t dfp = kvmppc_get_dfp();
1281
1282     spec = g_malloc0(sizeof(*spec));
1283
1284     pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
1285     if (pvr_pcc != NULL) {
1286         memcpy(spec, pvr_pcc->info, sizeof(*spec));
1287     }
1288     pcc->info = spec;
1289     /* Override the display name for -cpu ? and QMP */
1290     pcc->info->name = "host";
1291
1292     /* Now fix up the spec with information we can query from the host */
1293
1294     if (vmx != -1) {
1295         /* Only override when we know what the host supports */
1296         alter_insns(&spec->insns_flags, PPC_ALTIVEC, vmx > 0);
1297         alter_insns(&spec->insns_flags2, PPC2_VSX, vmx > 1);
1298     }
1299     if (dfp != -1) {
1300         /* Only override when we know what the host supports */
1301         alter_insns(&spec->insns_flags2, PPC2_DFP, dfp);
1302     }
1303 }
1304
1305 int kvmppc_fixup_cpu(PowerPCCPU *cpu)
1306 {
1307     CPUState *cs = CPU(cpu);
1308     int smt;
1309
1310     /* Adjust cpu index for SMT */
1311     smt = kvmppc_smt_threads();
1312     cs->cpu_index = (cs->cpu_index / smp_threads) * smt
1313         + (cs->cpu_index % smp_threads);
1314
1315     return 0;
1316 }
1317
1318
1319 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
1320 {
1321     return true;
1322 }
1323
1324 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
1325 {
1326     return 1;
1327 }
1328
1329 int kvm_arch_on_sigbus(int code, void *addr)
1330 {
1331     return 1;
1332 }
1333
1334 static const TypeInfo kvm_host_cpu_type_info = {
1335     .name = TYPE_HOST_POWERPC_CPU,
1336     .parent = TYPE_POWERPC_CPU,
1337     .instance_init = kvmppc_host_cpu_initfn,
1338     .class_init = kvmppc_host_cpu_class_init,
1339 };
1340
1341 static void kvm_ppc_register_types(void)
1342 {
1343     type_register_static(&kvm_host_cpu_type_info);
1344 }
1345
1346 type_init(kvm_ppc_register_types)