target-ppc/kvm.c

   1 /*
   2  * PowerPC implementation of KVM hooks
   3  *
   4  * Copyright IBM Corp. 2007
   5  * Copyright (C) 2011 Freescale Semiconductor, Inc.
   6  *
   7  * Authors:
   8  *  Jerone Young <jyoung5@us.ibm.com>
   9  *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
  10  *  Hollis Blanchard <hollisb@us.ibm.com>
  11  *
  12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  13  * See the COPYING file in the top-level directory.
  14  *
  15  */
  16
  17 #include <dirent.h>
  18 #include <sys/types.h>
  19 #include <sys/ioctl.h>
  20 #include <sys/mman.h>
  21 #include <sys/vfs.h>
  22
  23 #include <linux/kvm.h>
  24
  25 #include "qemu-common.h"
  26 #include "qemu/timer.h"
  27 #include "sysemu/sysemu.h"
  28 #include "sysemu/kvm.h"
  29 #include "kvm_ppc.h"
  30 #include "cpu.h"
  31 #include "sysemu/cpus.h"
  32 #include "sysemu/device_tree.h"
  33 #include "mmu-hash64.h"
  34
  35 #include "hw/sysbus.h"
  36 #include "hw/ppc/spapr.h"
  37 #include "hw/ppc/spapr_vio.h"
  38 #include "hw/ppc/ppc.h"
  39 #include "sysemu/watchdog.h"
  40 #include "trace.h"
  41
  42 //#define DEBUG_KVM
  43
  44 #ifdef DEBUG_KVM
  45 #define DPRINTF(fmt, ...) \
  46     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
  47 #else
  48 #define DPRINTF(fmt, ...) \
  49     do { } while (0)
  50 #endif
  51
  52 #define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
  53
  54 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
  55     KVM_CAP_LAST_INFO
  56 };
  57
  58 static int cap_interrupt_unset = false;
  59 static int cap_interrupt_level = false;
  60 static int cap_segstate;
  61 static int cap_booke_sregs;
  62 static int cap_ppc_smt;
  63 static int cap_ppc_rma;
  64 static int cap_spapr_tce;
  65 static int cap_spapr_multitce;
  66 static int cap_spapr_vfio;
  67 static int cap_hior;
  68 static int cap_one_reg;
  69 static int cap_epr;
  70 static int cap_ppc_watchdog;
  71 static int cap_papr;
  72 static int cap_htab_fd;
  73 static int cap_fixup_hcalls;
  74
  75 /* XXX We have a race condition where we actually have a level triggered
  76  *     interrupt, but the infrastructure can't expose that yet, so the guest
  77  *     takes but ignores it, goes to sleep and never gets notified that there's
  78  *     still an interrupt pending.
  79  *
  80  *     As a quick workaround, let's just wake up again 20 ms after we injected
  81  *     an interrupt. That way we can assure that we're always reinjecting
  82  *     interrupts in case the guest swallowed them.
  83  */
  84 static QEMUTimer *idle_timer;
  85
  86 static void kvm_kick_cpu(void *opaque)
  87 {
  88     PowerPCCPU *cpu = opaque;
  89
  90     qemu_cpu_kick(CPU(cpu));
  91 }
  92
  93 static int kvm_ppc_register_host_cpu_type(void);
  94
  95 int kvm_arch_init(KVMState *s)
  96 {
  97     cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
  98     cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
  99     cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
 100     cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
 101     cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
 102     cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
 103     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
 104     cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
 105     cap_spapr_vfio = false;
 106     cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
 107     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
 108     cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
 109     cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
 110     /* Note: we don't set cap_papr here, because this capability is
 111      * only activated after this by kvmppc_set_papr() */
 112     cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
 113     cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
 114
 115     if (!cap_interrupt_level) {
 116         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
 117                         "VM to stall at times!\n");
 118     }
 119
 120     kvm_ppc_register_host_cpu_type();
 121
 122     return 0;
 123 }
 124
 125 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
 126 {
 127     CPUPPCState *cenv = &cpu->env;
 128     CPUState *cs = CPU(cpu);
 129     struct kvm_sregs sregs;
 130     int ret;
 131
 132     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 133         /* What we're really trying to say is "if we're on BookE, we use
 134            the native PVR for now". This is the only sane way to check
 135            it though, so we potentially confuse users that they can run
 136            BookE guests on BookS. Let's hope nobody dares enough :) */
 137         return 0;
 138     } else {
 139         if (!cap_segstate) {
 140             fprintf(stderr, "kvm error: missing PVR setting capability\n");
 141             return -ENOSYS;
 142         }
 143     }
 144
 145     ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
 146     if (ret) {
 147         return ret;
 148     }
 149
 150     sregs.pvr = cenv->spr[SPR_PVR];
 151     return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
 152 }
 153
 154 /* Set up a shared TLB array with KVM */
 155 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
 156 {
 157     CPUPPCState *env = &cpu->env;
 158     CPUState *cs = CPU(cpu);
 159     struct kvm_book3e_206_tlb_params params = {};
 160     struct kvm_config_tlb cfg = {};
 161     unsigned int entries = 0;
 162     int ret, i;
 163
 164     if (!kvm_enabled() ||
 165         !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
 166         return 0;
 167     }
 168
 169     assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
 170
 171     for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
 172         params.tlb_sizes[i] = booke206_tlb_size(env, i);
 173         params.tlb_ways[i] = booke206_tlb_ways(env, i);
 174         entries += params.tlb_sizes[i];
 175     }
 176
 177     assert(entries == env->nb_tlb);
 178     assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
 179
 180     env->tlb_dirty = true;
 181
 182     cfg.array = (uintptr_t)env->tlb.tlbm;
 183     cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
 184     cfg.params = (uintptr_t)&params;
 185     cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
 186
 187     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
 188     if (ret < 0) {
 189         fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
 190                 __func__, strerror(-ret));
 191         return ret;
 192     }
 193
 194     env->kvm_sw_tlb = true;
 195     return 0;
 196 }
 197
 198
 199 #if defined(TARGET_PPC64)
 200 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
 201                                        struct kvm_ppc_smmu_info *info)
 202 {
 203     CPUPPCState *env = &cpu->env;
 204     CPUState *cs = CPU(cpu);
 205
 206     memset(info, 0, sizeof(*info));
 207
 208     /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
 209      * need to "guess" what the supported page sizes are.
 210      *
 211      * For that to work we make a few assumptions:
 212      *
 213      * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
 214      *   KVM which only supports 4K and 16M pages, but supports them
 215      *   regardless of the backing store characteritics. We also don't
 216      *   support 1T segments.
 217      *
 218      *   This is safe as if HV KVM ever supports that capability or PR
 219      *   KVM grows supports for more page/segment sizes, those versions
 220      *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
 221      *   will not hit this fallback
 222      *
 223      * - Else we are running HV KVM. This means we only support page
 224      *   sizes that fit in the backing store. Additionally we only
 225      *   advertize 64K pages if the processor is ARCH 2.06 and we assume
 226      *   P7 encodings for the SLB and hash table. Here too, we assume
 227      *   support for any newer processor will mean a kernel that
 228      *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
 229      *   this fallback.
 230      */
 231     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
 232         /* No flags */
 233         info->flags = 0;
 234         info->slb_size = 64;
 235
 236         /* Standard 4k base page size segment */
 237         info->sps[0].page_shift = 12;
 238         info->sps[0].slb_enc = 0;
 239         info->sps[0].enc[0].page_shift = 12;
 240         info->sps[0].enc[0].pte_enc = 0;
 241
 242         /* Standard 16M large page size segment */
 243         info->sps[1].page_shift = 24;
 244         info->sps[1].slb_enc = SLB_VSID_L;
 245         info->sps[1].enc[0].page_shift = 24;
 246         info->sps[1].enc[0].pte_enc = 0;
 247     } else {
 248         int i = 0;
 249
 250         /* HV KVM has backing store size restrictions */
 251         info->flags = KVM_PPC_PAGE_SIZES_REAL;
 252
 253         if (env->mmu_model & POWERPC_MMU_1TSEG) {
 254             info->flags |= KVM_PPC_1T_SEGMENTS;
 255         }
 256
 257         if (env->mmu_model == POWERPC_MMU_2_06) {
 258             info->slb_size = 32;
 259         } else {
 260             info->slb_size = 64;
 261         }
 262
 263         /* Standard 4k base page size segment */
 264         info->sps[i].page_shift = 12;
 265         info->sps[i].slb_enc = 0;
 266         info->sps[i].enc[0].page_shift = 12;
 267         info->sps[i].enc[0].pte_enc = 0;
 268         i++;
 269
 270         /* 64K on MMU 2.06 */
 271         if (env->mmu_model == POWERPC_MMU_2_06) {
 272             info->sps[i].page_shift = 16;
 273             info->sps[i].slb_enc = 0x110;
 274             info->sps[i].enc[0].page_shift = 16;
 275             info->sps[i].enc[0].pte_enc = 1;
 276             i++;
 277         }
 278
 279         /* Standard 16M large page size segment */
 280         info->sps[i].page_shift = 24;
 281         info->sps[i].slb_enc = SLB_VSID_L;
 282         info->sps[i].enc[0].page_shift = 24;
 283         info->sps[i].enc[0].pte_enc = 0;
 284     }
 285 }
 286
 287 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
 288 {
 289     CPUState *cs = CPU(cpu);
 290     int ret;
 291
 292     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
 293         ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
 294         if (ret == 0) {
 295             return;
 296         }
 297     }
 298
 299     kvm_get_fallback_smmu_info(cpu, info);
 300 }
 301
 302 static long getrampagesize(void)
 303 {
 304     struct statfs fs;
 305     int ret;
 306
 307     if (!mem_path) {
 308         /* guest RAM is backed by normal anonymous pages */
 309         return getpagesize();
 310     }
 311
 312     do {
 313         ret = statfs(mem_path, &fs);
 314     } while (ret != 0 && errno == EINTR);
 315
 316     if (ret != 0) {
 317         fprintf(stderr, "Couldn't statfs() memory path: %s\n",
 318                 strerror(errno));
 319         exit(1);
 320     }
 321
 322 #define HUGETLBFS_MAGIC       0x958458f6
 323
 324     if (fs.f_type != HUGETLBFS_MAGIC) {
 325         /* Explicit mempath, but it's ordinary pages */
 326         return getpagesize();
 327     }
 328
 329     /* It's hugepage, return the huge page size */
 330     return fs.f_bsize;
 331 }
 332
 333 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
 334 {
 335     if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
 336         return true;
 337     }
 338
 339     return (1ul << shift) <= rampgsize;
 340 }
 341
 342 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 343 {
 344     static struct kvm_ppc_smmu_info smmu_info;
 345     static bool has_smmu_info;
 346     CPUPPCState *env = &cpu->env;
 347     long rampagesize;
 348     int iq, ik, jq, jk;
 349
 350     /* We only handle page sizes for 64-bit server guests for now */
 351     if (!(env->mmu_model & POWERPC_MMU_64)) {
 352         return;
 353     }
 354
 355     /* Collect MMU info from kernel if not already */
 356     if (!has_smmu_info) {
 357         kvm_get_smmu_info(cpu, &smmu_info);
 358         has_smmu_info = true;
 359     }
 360
 361     rampagesize = getrampagesize();
 362
 363     /* Convert to QEMU form */
 364     memset(&env->sps, 0, sizeof(env->sps));
 365
 366     /*
 367      * XXX This loop should be an entry wide AND of the capabilities that
 368      *     the selected CPU has with the capabilities that KVM supports.
 369      */
 370     for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
 371         struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
 372         struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
 373
 374         if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 375                                  ksps->page_shift)) {
 376             continue;
 377         }
 378         qsps->page_shift = ksps->page_shift;
 379         qsps->slb_enc = ksps->slb_enc;
 380         for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
 381             if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 382                                      ksps->enc[jk].page_shift)) {
 383                 continue;
 384             }
 385             qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
 386             qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
 387             if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
 388                 break;
 389             }
 390         }
 391         if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
 392             break;
 393         }
 394     }
 395     env->slb_nr = smmu_info.slb_size;
 396     if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
 397         env->mmu_model &= ~POWERPC_MMU_1TSEG;
 398     }
 399 }
 400 #else /* defined (TARGET_PPC64) */
 401
 402 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 403 {
 404 }
 405
 406 #endif /* !defined (TARGET_PPC64) */
 407
 408 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
 409 {
 410     return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
 411 }
 412
 413 int kvm_arch_init_vcpu(CPUState *cs)
 414 {
 415     PowerPCCPU *cpu = POWERPC_CPU(cs);
 416     CPUPPCState *cenv = &cpu->env;
 417     int ret;
 418
 419     /* Gather server mmu info from KVM and update the CPU state */
 420     kvm_fixup_page_sizes(cpu);
 421
 422     /* Synchronize sregs with kvm */
 423     ret = kvm_arch_sync_sregs(cpu);
 424     if (ret) {
 425         return ret;
 426     }
 427
 428     idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
 429
 430     /* Some targets support access to KVM's guest TLB. */
 431     switch (cenv->mmu_model) {
 432     case POWERPC_MMU_BOOKE206:
 433         ret = kvm_booke206_tlb_init(cpu);
 434         break;
 435     default:
 436         break;
 437     }
 438
 439     return ret;
 440 }
 441
 442 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
 443 {
 444     CPUPPCState *env = &cpu->env;
 445     CPUState *cs = CPU(cpu);
 446     struct kvm_dirty_tlb dirty_tlb;
 447     unsigned char *bitmap;
 448     int ret;
 449
 450     if (!env->kvm_sw_tlb) {
 451         return;
 452     }
 453
 454     bitmap = g_malloc((env->nb_tlb + 7) / 8);
 455     memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
 456
 457     dirty_tlb.bitmap = (uintptr_t)bitmap;
 458     dirty_tlb.num_dirty = env->nb_tlb;
 459
 460     ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
 461     if (ret) {
 462         fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
 463                 __func__, strerror(-ret));
 464     }
 465
 466     g_free(bitmap);
 467 }
 468
 469 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
 470 {
 471     PowerPCCPU *cpu = POWERPC_CPU(cs);
 472     CPUPPCState *env = &cpu->env;
 473     union {
 474         uint32_t u32;
 475         uint64_t u64;
 476     } val;
 477     struct kvm_one_reg reg = {
 478         .id = id,
 479         .addr = (uintptr_t) &val,
 480     };
 481     int ret;
 482
 483     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 484     if (ret != 0) {
 485         trace_kvm_failed_spr_get(spr, strerror(errno));
 486     } else {
 487         switch (id & KVM_REG_SIZE_MASK) {
 488         case KVM_REG_SIZE_U32:
 489             env->spr[spr] = val.u32;
 490             break;
 491
 492         case KVM_REG_SIZE_U64:
 493             env->spr[spr] = val.u64;
 494             break;
 495
 496         default:
 497             /* Don't handle this size yet */
 498             abort();
 499         }
 500     }
 501 }
 502
 503 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
 504 {
 505     PowerPCCPU *cpu = POWERPC_CPU(cs);
 506     CPUPPCState *env = &cpu->env;
 507     union {
 508         uint32_t u32;
 509         uint64_t u64;
 510     } val;
 511     struct kvm_one_reg reg = {
 512         .id = id,
 513         .addr = (uintptr_t) &val,
 514     };
 515     int ret;
 516
 517     switch (id & KVM_REG_SIZE_MASK) {
 518     case KVM_REG_SIZE_U32:
 519         val.u32 = env->spr[spr];
 520         break;
 521
 522     case KVM_REG_SIZE_U64:
 523         val.u64 = env->spr[spr];
 524         break;
 525
 526     default:
 527         /* Don't handle this size yet */
 528         abort();
 529     }
 530
 531     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 532     if (ret != 0) {
 533         trace_kvm_failed_spr_set(spr, strerror(errno));
 534     }
 535 }
 536
 537 static int kvm_put_fp(CPUState *cs)
 538 {
 539     PowerPCCPU *cpu = POWERPC_CPU(cs);
 540     CPUPPCState *env = &cpu->env;
 541     struct kvm_one_reg reg;
 542     int i;
 543     int ret;
 544
 545     if (env->insns_flags & PPC_FLOAT) {
 546         uint64_t fpscr = env->fpscr;
 547         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 548
 549         reg.id = KVM_REG_PPC_FPSCR;
 550         reg.addr = (uintptr_t)&fpscr;
 551         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 552         if (ret < 0) {
 553             DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
 554             return ret;
 555         }
 556
 557         for (i = 0; i < 32; i++) {
 558             uint64_t vsr[2];
 559
 560             vsr[0] = float64_val(env->fpr[i]);
 561             vsr[1] = env->vsr[i];
 562             reg.addr = (uintptr_t) &vsr;
 563             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 564
 565             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 566             if (ret < 0) {
 567                 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
 568                         i, strerror(errno));
 569                 return ret;
 570             }
 571         }
 572     }
 573
 574     if (env->insns_flags & PPC_ALTIVEC) {
 575         reg.id = KVM_REG_PPC_VSCR;
 576         reg.addr = (uintptr_t)&env->vscr;
 577         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 578         if (ret < 0) {
 579             DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
 580             return ret;
 581         }
 582
 583         for (i = 0; i < 32; i++) {
 584             reg.id = KVM_REG_PPC_VR(i);
 585             reg.addr = (uintptr_t)&env->avr[i];
 586             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 587             if (ret < 0) {
 588                 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
 589                 return ret;
 590             }
 591         }
 592     }
 593
 594     return 0;
 595 }
 596
 597 static int kvm_get_fp(CPUState *cs)
 598 {
 599     PowerPCCPU *cpu = POWERPC_CPU(cs);
 600     CPUPPCState *env = &cpu->env;
 601     struct kvm_one_reg reg;
 602     int i;
 603     int ret;
 604
 605     if (env->insns_flags & PPC_FLOAT) {
 606         uint64_t fpscr;
 607         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 608
 609         reg.id = KVM_REG_PPC_FPSCR;
 610         reg.addr = (uintptr_t)&fpscr;
 611         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 612         if (ret < 0) {
 613             DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
 614             return ret;
 615         } else {
 616             env->fpscr = fpscr;
 617         }
 618
 619         for (i = 0; i < 32; i++) {
 620             uint64_t vsr[2];
 621
 622             reg.addr = (uintptr_t) &vsr;
 623             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 624
 625             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 626             if (ret < 0) {
 627                 DPRINTF("Unable to get %s%d from KVM: %s\n",
 628                         vsx ? "VSR" : "FPR", i, strerror(errno));
 629                 return ret;
 630             } else {
 631                 env->fpr[i] = vsr[0];
 632                 if (vsx) {
 633                     env->vsr[i] = vsr[1];
 634                 }
 635             }
 636         }
 637     }
 638
 639     if (env->insns_flags & PPC_ALTIVEC) {
 640         reg.id = KVM_REG_PPC_VSCR;
 641         reg.addr = (uintptr_t)&env->vscr;
 642         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 643         if (ret < 0) {
 644             DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
 645             return ret;
 646         }
 647
 648         for (i = 0; i < 32; i++) {
 649             reg.id = KVM_REG_PPC_VR(i);
 650             reg.addr = (uintptr_t)&env->avr[i];
 651             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 652             if (ret < 0) {
 653                 DPRINTF("Unable to get VR%d from KVM: %s\n",
 654                         i, strerror(errno));
 655                 return ret;
 656             }
 657         }
 658     }
 659
 660     return 0;
 661 }
 662
 663 #if defined(TARGET_PPC64)
 664 static int kvm_get_vpa(CPUState *cs)
 665 {
 666     PowerPCCPU *cpu = POWERPC_CPU(cs);
 667     CPUPPCState *env = &cpu->env;
 668     struct kvm_one_reg reg;
 669     int ret;
 670
 671     reg.id = KVM_REG_PPC_VPA_ADDR;
 672     reg.addr = (uintptr_t)&env->vpa_addr;
 673     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 674     if (ret < 0) {
 675         DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
 676         return ret;
 677     }
 678
 679     assert((uintptr_t)&env->slb_shadow_size
 680            == ((uintptr_t)&env->slb_shadow_addr + 8));
 681     reg.id = KVM_REG_PPC_VPA_SLB;
 682     reg.addr = (uintptr_t)&env->slb_shadow_addr;
 683     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 684     if (ret < 0) {
 685         DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
 686                 strerror(errno));
 687         return ret;
 688     }
 689
 690     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 691     reg.id = KVM_REG_PPC_VPA_DTL;
 692     reg.addr = (uintptr_t)&env->dtl_addr;
 693     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 694     if (ret < 0) {
 695         DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
 696                 strerror(errno));
 697         return ret;
 698     }
 699
 700     return 0;
 701 }
 702
 703 static int kvm_put_vpa(CPUState *cs)
 704 {
 705     PowerPCCPU *cpu = POWERPC_CPU(cs);
 706     CPUPPCState *env = &cpu->env;
 707     struct kvm_one_reg reg;
 708     int ret;
 709
 710     /* SLB shadow or DTL can't be registered unless a master VPA is
 711      * registered.  That means when restoring state, if a VPA *is*
 712      * registered, we need to set that up first.  If not, we need to
 713      * deregister the others before deregistering the master VPA */
 714     assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
 715
 716     if (env->vpa_addr) {
 717         reg.id = KVM_REG_PPC_VPA_ADDR;
 718         reg.addr = (uintptr_t)&env->vpa_addr;
 719         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 720         if (ret < 0) {
 721             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 722             return ret;
 723         }
 724     }
 725
 726     assert((uintptr_t)&env->slb_shadow_size
 727            == ((uintptr_t)&env->slb_shadow_addr + 8));
 728     reg.id = KVM_REG_PPC_VPA_SLB;
 729     reg.addr = (uintptr_t)&env->slb_shadow_addr;
 730     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 731     if (ret < 0) {
 732         DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
 733         return ret;
 734     }
 735
 736     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 737     reg.id = KVM_REG_PPC_VPA_DTL;
 738     reg.addr = (uintptr_t)&env->dtl_addr;
 739     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 740     if (ret < 0) {
 741         DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
 742                 strerror(errno));
 743         return ret;
 744     }
 745
 746     if (!env->vpa_addr) {
 747         reg.id = KVM_REG_PPC_VPA_ADDR;
 748         reg.addr = (uintptr_t)&env->vpa_addr;
 749         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 750         if (ret < 0) {
 751             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 752             return ret;
 753         }
 754     }
 755
 756     return 0;
 757 }
 758 #endif /* TARGET_PPC64 */
 759
 760 int kvm_arch_put_registers(CPUState *cs, int level)
 761 {
 762     PowerPCCPU *cpu = POWERPC_CPU(cs);
 763     CPUPPCState *env = &cpu->env;
 764     struct kvm_regs regs;
 765     int ret;
 766     int i;
 767
 768     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
 769     if (ret < 0) {
 770         return ret;
 771     }
 772
 773     regs.ctr = env->ctr;
 774     regs.lr  = env->lr;
 775     regs.xer = cpu_read_xer(env);
 776     regs.msr = env->msr;
 777     regs.pc = env->nip;
 778
 779     regs.srr0 = env->spr[SPR_SRR0];
 780     regs.srr1 = env->spr[SPR_SRR1];
 781
 782     regs.sprg0 = env->spr[SPR_SPRG0];
 783     regs.sprg1 = env->spr[SPR_SPRG1];
 784     regs.sprg2 = env->spr[SPR_SPRG2];
 785     regs.sprg3 = env->spr[SPR_SPRG3];
 786     regs.sprg4 = env->spr[SPR_SPRG4];
 787     regs.sprg5 = env->spr[SPR_SPRG5];
 788     regs.sprg6 = env->spr[SPR_SPRG6];
 789     regs.sprg7 = env->spr[SPR_SPRG7];
 790
 791     regs.pid = env->spr[SPR_BOOKE_PID];
 792
 793     for (i = 0;i < 32; i++)
 794         regs.gpr[i] = env->gpr[i];
 795
 796     regs.cr = 0;
 797     for (i = 0; i < 8; i++) {
 798         regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
 799     }
 800
 801     ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
 802     if (ret < 0)
 803         return ret;
 804
 805     kvm_put_fp(cs);
 806
 807     if (env->tlb_dirty) {
 808         kvm_sw_tlb_put(cpu);
 809         env->tlb_dirty = false;
 810     }
 811
 812     if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
 813         struct kvm_sregs sregs;
 814
 815         sregs.pvr = env->spr[SPR_PVR];
 816
 817         sregs.u.s.sdr1 = env->spr[SPR_SDR1];
 818
 819         /* Sync SLB */
 820 #ifdef TARGET_PPC64
 821         for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
 822             sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
 823             if (env->slb[i].esid & SLB_ESID_V) {
 824                 sregs.u.s.ppc64.slb[i].slbe |= i;
 825             }
 826             sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
 827         }
 828 #endif
 829
 830         /* Sync SRs */
 831         for (i = 0; i < 16; i++) {
 832             sregs.u.s.ppc32.sr[i] = env->sr[i];
 833         }
 834
 835         /* Sync BATs */
 836         for (i = 0; i < 8; i++) {
 837             /* Beware. We have to swap upper and lower bits here */
 838             sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
 839                 | env->DBAT[1][i];
 840             sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
 841                 | env->IBAT[1][i];
 842         }
 843
 844         ret = kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
 845         if (ret) {
 846             return ret;
 847         }
 848     }
 849
 850     if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
 851         kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
 852     }
 853
 854     if (cap_one_reg) {
 855         int i;
 856
 857         /* We deliberately ignore errors here, for kernels which have
 858          * the ONE_REG calls, but don't support the specific
 859          * registers, there's a reasonable chance things will still
 860          * work, at least until we try to migrate. */
 861         for (i = 0; i < 1024; i++) {
 862             uint64_t id = env->spr_cb[i].one_reg_id;
 863
 864             if (id != 0) {
 865                 kvm_put_one_spr(cs, id, i);
 866             }
 867         }
 868
 869 #ifdef TARGET_PPC64
 870         if (msr_ts) {
 871             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
 872                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
 873             }
 874             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
 875                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
 876             }
 877             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
 878             kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
 879             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
 880             kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
 881             kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
 882             kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
 883             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
 884             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
 885             kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
 886             kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
 887         }
 888
 889         if (cap_papr) {
 890             if (kvm_put_vpa(cs) < 0) {
 891                 DPRINTF("Warning: Unable to set VPA information to KVM\n");
 892             }
 893         }
 894
 895         kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
 896 #endif /* TARGET_PPC64 */
 897     }
 898
 899     return ret;
 900 }
 901
 902 int kvm_arch_get_registers(CPUState *cs)
 903 {
 904     PowerPCCPU *cpu = POWERPC_CPU(cs);
 905     CPUPPCState *env = &cpu->env;
 906     struct kvm_regs regs;
 907     struct kvm_sregs sregs;
 908     uint32_t cr;
 909     int i, ret;
 910
 911     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
 912     if (ret < 0)
 913         return ret;
 914
 915     cr = regs.cr;
 916     for (i = 7; i >= 0; i--) {
 917         env->crf[i] = cr & 15;
 918         cr >>= 4;
 919     }
 920
 921     env->ctr = regs.ctr;
 922     env->lr = regs.lr;
 923     cpu_write_xer(env, regs.xer);
 924     env->msr = regs.msr;
 925     env->nip = regs.pc;
 926
 927     env->spr[SPR_SRR0] = regs.srr0;
 928     env->spr[SPR_SRR1] = regs.srr1;
 929
 930     env->spr[SPR_SPRG0] = regs.sprg0;
 931     env->spr[SPR_SPRG1] = regs.sprg1;
 932     env->spr[SPR_SPRG2] = regs.sprg2;
 933     env->spr[SPR_SPRG3] = regs.sprg3;
 934     env->spr[SPR_SPRG4] = regs.sprg4;
 935     env->spr[SPR_SPRG5] = regs.sprg5;
 936     env->spr[SPR_SPRG6] = regs.sprg6;
 937     env->spr[SPR_SPRG7] = regs.sprg7;
 938
 939     env->spr[SPR_BOOKE_PID] = regs.pid;
 940
 941     for (i = 0;i < 32; i++)
 942         env->gpr[i] = regs.gpr[i];
 943
 944     kvm_get_fp(cs);
 945
 946     if (cap_booke_sregs) {
 947         ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
 948         if (ret < 0) {
 949             return ret;
 950         }
 951
 952         if (sregs.u.e.features & KVM_SREGS_E_BASE) {
 953             env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
 954             env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
 955             env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
 956             env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
 957             env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
 958             env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
 959             env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
 960             env->spr[SPR_DECR] = sregs.u.e.dec;
 961             env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
 962             env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
 963             env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
 964         }
 965
 966         if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
 967             env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
 968             env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
 969             env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
 970             env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
 971             env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
 972         }
 973
 974         if (sregs.u.e.features & KVM_SREGS_E_64) {
 975             env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
 976         }
 977
 978         if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
 979             env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
 980         }
 981
 982         if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
 983             env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
 984             env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
 985             env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
 986             env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
 987             env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
 988             env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
 989             env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
 990             env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
 991             env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
 992             env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
 993             env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
 994             env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
 995             env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
 996             env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
 997             env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
 998             env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
 999
1000             if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1001                 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1002                 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1003                 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1004             }
1005
1006             if (sregs.u.e.features & KVM_SREGS_E_PM) {
1007                 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1008             }
1009
1010             if (sregs.u.e.features & KVM_SREGS_E_PC) {
1011                 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1012                 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1013             }
1014         }
1015
1016         if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1017             env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1018             env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1019             env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1020             env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1021             env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1022             env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1023             env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1024             env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1025             env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1026             env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1027         }
1028
1029         if (sregs.u.e.features & KVM_SREGS_EXP) {
1030             env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1031         }
1032
1033         if (sregs.u.e.features & KVM_SREGS_E_PD) {
1034             env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1035             env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1036         }
1037
1038         if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1039             env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1040             env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1041             env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1042
1043             if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1044                 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1045                 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1046             }
1047         }
1048     }
1049
1050     if (cap_segstate) {
1051         ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
1052         if (ret < 0) {
1053             return ret;
1054         }
1055
1056         if (!env->external_htab) {
1057             ppc_store_sdr1(env, sregs.u.s.sdr1);
1058         }
1059
1060         /* Sync SLB */
1061 #ifdef TARGET_PPC64
1062         /*
1063          * The packed SLB array we get from KVM_GET_SREGS only contains
1064          * information about valid entries. So we flush our internal
1065          * copy to get rid of stale ones, then put all valid SLB entries
1066          * back in.
1067          */
1068         memset(env->slb, 0, sizeof(env->slb));
1069         for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1070             target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1071             target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1072             /*
1073              * Only restore valid entries
1074              */
1075             if (rb & SLB_ESID_V) {
1076                 ppc_store_slb(env, rb, rs);
1077             }
1078         }
1079 #endif
1080
1081         /* Sync SRs */
1082         for (i = 0; i < 16; i++) {
1083             env->sr[i] = sregs.u.s.ppc32.sr[i];
1084         }
1085
1086         /* Sync BATs */
1087         for (i = 0; i < 8; i++) {
1088             env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1089             env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1090             env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1091             env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1092         }
1093     }
1094
1095     if (cap_hior) {
1096         kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1097     }
1098
1099     if (cap_one_reg) {
1100         int i;
1101
1102         /* We deliberately ignore errors here, for kernels which have
1103          * the ONE_REG calls, but don't support the specific
1104          * registers, there's a reasonable chance things will still
1105          * work, at least until we try to migrate. */
1106         for (i = 0; i < 1024; i++) {
1107             uint64_t id = env->spr_cb[i].one_reg_id;
1108
1109             if (id != 0) {
1110                 kvm_get_one_spr(cs, id, i);
1111             }
1112         }
1113
1114 #ifdef TARGET_PPC64
1115         if (msr_ts) {
1116             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1117                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1118             }
1119             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1120                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1121             }
1122             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1123             kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1124             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1125             kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1126             kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1127             kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1128             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1129             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1130             kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1131             kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1132         }
1133
1134         if (cap_papr) {
1135             if (kvm_get_vpa(cs) < 0) {
1136                 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1137             }
1138         }
1139
1140         kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1141 #endif
1142     }
1143
1144     return 0;
1145 }
1146
1147 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1148 {
1149     unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1150
1151     if (irq != PPC_INTERRUPT_EXT) {
1152         return 0;
1153     }
1154
1155     if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1156         return 0;
1157     }
1158
1159     kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1160
1161     return 0;
1162 }
1163
1164 #if defined(TARGET_PPCEMB)
1165 #define PPC_INPUT_INT PPC40x_INPUT_INT
1166 #elif defined(TARGET_PPC64)
1167 #define PPC_INPUT_INT PPC970_INPUT_INT
1168 #else
1169 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1170 #endif
1171
1172 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1173 {
1174     PowerPCCPU *cpu = POWERPC_CPU(cs);
1175     CPUPPCState *env = &cpu->env;
1176     int r;
1177     unsigned irq;
1178
1179     /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1180      * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1181     if (!cap_interrupt_level &&
1182         run->ready_for_interrupt_injection &&
1183         (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1184         (env->irq_input_state & (1<<PPC_INPUT_INT)))
1185     {
1186         /* For now KVM disregards the 'irq' argument. However, in the
1187          * future KVM could cache it in-kernel to avoid a heavyweight exit
1188          * when reading the UIC.
1189          */
1190         irq = KVM_INTERRUPT_SET;
1191
1192         DPRINTF("injected interrupt %d\n", irq);
1193         r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1194         if (r < 0) {
1195             printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1196         }
1197
1198         /* Always wake up soon in case the interrupt was level based */
1199         timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1200                        (get_ticks_per_sec() / 50));
1201     }
1202
1203     /* We don't know if there are more interrupts pending after this. However,
1204      * the guest will return to userspace in the course of handling this one
1205      * anyways, so we will get a chance to deliver the rest. */
1206 }
1207
1208 void kvm_arch_post_run(CPUState *cpu, struct kvm_run *run)
1209 {
1210 }
1211
1212 int kvm_arch_process_async_events(CPUState *cs)
1213 {
1214     return cs->halted;
1215 }
1216
1217 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1218 {
1219     CPUState *cs = CPU(cpu);
1220     CPUPPCState *env = &cpu->env;
1221
1222     if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1223         cs->halted = 1;
1224         cs->exception_index = EXCP_HLT;
1225     }
1226
1227     return 0;
1228 }
1229
1230 /* map dcr access to existing qemu dcr emulation */
1231 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1232 {
1233     if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1234         fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1235
1236     return 0;
1237 }
1238
1239 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1240 {
1241     if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1242         fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1243
1244     return 0;
1245 }
1246
1247 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1248 {
1249     PowerPCCPU *cpu = POWERPC_CPU(cs);
1250     CPUPPCState *env = &cpu->env;
1251     int ret;
1252
1253     switch (run->exit_reason) {
1254     case KVM_EXIT_DCR:
1255         if (run->dcr.is_write) {
1256             DPRINTF("handle dcr write\n");
1257             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1258         } else {
1259             DPRINTF("handle dcr read\n");
1260             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1261         }
1262         break;
1263     case KVM_EXIT_HLT:
1264         DPRINTF("handle halt\n");
1265         ret = kvmppc_handle_halt(cpu);
1266         break;
1267 #if defined(TARGET_PPC64)
1268     case KVM_EXIT_PAPR_HCALL:
1269         DPRINTF("handle PAPR hypercall\n");
1270         run->papr_hcall.ret = spapr_hypercall(cpu,
1271                                               run->papr_hcall.nr,
1272                                               run->papr_hcall.args);
1273         ret = 0;
1274         break;
1275 #endif
1276     case KVM_EXIT_EPR:
1277         DPRINTF("handle epr\n");
1278         run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1279         ret = 0;
1280         break;
1281     case KVM_EXIT_WATCHDOG:
1282         DPRINTF("handle watchdog expiry\n");
1283         watchdog_perform_action();
1284         ret = 0;
1285         break;
1286
1287     default:
1288         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1289         ret = -1;
1290         break;
1291     }
1292
1293     return ret;
1294 }
1295
1296 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1297 {
1298     CPUState *cs = CPU(cpu);
1299     uint32_t bits = tsr_bits;
1300     struct kvm_one_reg reg = {
1301         .id = KVM_REG_PPC_OR_TSR,
1302         .addr = (uintptr_t) &bits,
1303     };
1304
1305     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1306 }
1307
1308 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1309 {
1310
1311     CPUState *cs = CPU(cpu);
1312     uint32_t bits = tsr_bits;
1313     struct kvm_one_reg reg = {
1314         .id = KVM_REG_PPC_CLEAR_TSR,
1315         .addr = (uintptr_t) &bits,
1316     };
1317
1318     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1319 }
1320
1321 int kvmppc_set_tcr(PowerPCCPU *cpu)
1322 {
1323     CPUState *cs = CPU(cpu);
1324     CPUPPCState *env = &cpu->env;
1325     uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1326
1327     struct kvm_one_reg reg = {
1328         .id = KVM_REG_PPC_TCR,
1329         .addr = (uintptr_t) &tcr,
1330     };
1331
1332     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1333 }
1334
1335 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1336 {
1337     CPUState *cs = CPU(cpu);
1338     int ret;
1339
1340     if (!kvm_enabled()) {
1341         return -1;
1342     }
1343
1344     if (!cap_ppc_watchdog) {
1345         printf("warning: KVM does not support watchdog");
1346         return -1;
1347     }
1348
1349     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1350     if (ret < 0) {
1351         fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1352                 __func__, strerror(-ret));
1353         return ret;
1354     }
1355
1356     return ret;
1357 }
1358
1359 static int read_cpuinfo(const char *field, char *value, int len)
1360 {
1361     FILE *f;
1362     int ret = -1;
1363     int field_len = strlen(field);
1364     char line[512];
1365
1366     f = fopen("/proc/cpuinfo", "r");
1367     if (!f) {
1368         return -1;
1369     }
1370
1371     do {
1372         if(!fgets(line, sizeof(line), f)) {
1373             break;
1374         }
1375         if (!strncmp(line, field, field_len)) {
1376             pstrcpy(value, len, line);
1377             ret = 0;
1378             break;
1379         }
1380     } while(*line);
1381
1382     fclose(f);
1383
1384     return ret;
1385 }
1386
1387 uint32_t kvmppc_get_tbfreq(void)
1388 {
1389     char line[512];
1390     char *ns;
1391     uint32_t retval = get_ticks_per_sec();
1392
1393     if (read_cpuinfo("timebase", line, sizeof(line))) {
1394         return retval;
1395     }
1396
1397     if (!(ns = strchr(line, ':'))) {
1398         return retval;
1399     }
1400
1401     ns++;
1402
1403     retval = atoi(ns);
1404     return retval;
1405 }
1406
1407 /* Try to find a device tree node for a CPU with clock-frequency property */
1408 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1409 {
1410     struct dirent *dirp;
1411     DIR *dp;
1412
1413     if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1414         printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1415         return -1;
1416     }
1417
1418     buf[0] = '\0';
1419     while ((dirp = readdir(dp)) != NULL) {
1420         FILE *f;
1421         snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1422                  dirp->d_name);
1423         f = fopen(buf, "r");
1424         if (f) {
1425             snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1426             fclose(f);
1427             break;
1428         }
1429         buf[0] = '\0';
1430     }
1431     closedir(dp);
1432     if (buf[0] == '\0') {
1433         printf("Unknown host!\n");
1434         return -1;
1435     }
1436
1437     return 0;
1438 }
1439
1440 /* Read a CPU node property from the host device tree that's a single
1441  * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1442  * (can't find or open the property, or doesn't understand the
1443  * format) */
1444 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1445 {
1446     char buf[PATH_MAX];
1447     union {
1448         uint32_t v32;
1449         uint64_t v64;
1450     } u;
1451     FILE *f;
1452     int len;
1453
1454     if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1455         return -1;
1456     }
1457
1458     strncat(buf, "/", sizeof(buf) - strlen(buf));
1459     strncat(buf, propname, sizeof(buf) - strlen(buf));
1460
1461     f = fopen(buf, "rb");
1462     if (!f) {
1463         return -1;
1464     }
1465
1466     len = fread(&u, 1, sizeof(u), f);
1467     fclose(f);
1468     switch (len) {
1469     case 4:
1470         /* property is a 32-bit quantity */
1471         return be32_to_cpu(u.v32);
1472     case 8:
1473         return be64_to_cpu(u.v64);
1474     }
1475
1476     return 0;
1477 }
1478
1479 uint64_t kvmppc_get_clockfreq(void)
1480 {
1481     return kvmppc_read_int_cpu_dt("clock-frequency");
1482 }
1483
1484 uint32_t kvmppc_get_vmx(void)
1485 {
1486     return kvmppc_read_int_cpu_dt("ibm,vmx");
1487 }
1488
1489 uint32_t kvmppc_get_dfp(void)
1490 {
1491     return kvmppc_read_int_cpu_dt("ibm,dfp");
1492 }
1493
1494 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1495  {
1496      PowerPCCPU *cpu = ppc_env_get_cpu(env);
1497      CPUState *cs = CPU(cpu);
1498
1499     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1500         !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1501         return 0;
1502     }
1503
1504     return 1;
1505 }
1506
1507 int kvmppc_get_hasidle(CPUPPCState *env)
1508 {
1509     struct kvm_ppc_pvinfo pvinfo;
1510
1511     if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1512         (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1513         return 1;
1514     }
1515
1516     return 0;
1517 }
1518
1519 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1520 {
1521     uint32_t *hc = (uint32_t*)buf;
1522     struct kvm_ppc_pvinfo pvinfo;
1523
1524     if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1525         memcpy(buf, pvinfo.hcall, buf_len);
1526         return 0;
1527     }
1528
1529     /*
1530      * Fallback to always fail hypercalls regardless of endianness:
1531      *
1532      *     tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
1533      *     li r3, -1
1534      *     b .+8       (becomes nop in wrong endian)
1535      *     bswap32(li r3, -1)
1536      */
1537
1538     hc[0] = cpu_to_be32(0x08000048);
1539     hc[1] = cpu_to_be32(0x3860ffff);
1540     hc[2] = cpu_to_be32(0x48000008);
1541     hc[3] = cpu_to_be32(bswap32(0x3860ffff));
1542
1543     return 0;
1544 }
1545
1546 void kvmppc_set_papr(PowerPCCPU *cpu)
1547 {
1548     CPUState *cs = CPU(cpu);
1549     int ret;
1550
1551     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
1552     if (ret) {
1553         cpu_abort(cs, "This KVM version does not support PAPR\n");
1554     }
1555
1556     /* Update the capability flag so we sync the right information
1557      * with kvm */
1558     cap_papr = 1;
1559 }
1560
1561 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t cpu_version)
1562 {
1563     return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &cpu_version);
1564 }
1565
1566 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
1567 {
1568     CPUState *cs = CPU(cpu);
1569     int ret;
1570
1571     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
1572     if (ret && mpic_proxy) {
1573         cpu_abort(cs, "This KVM version does not support EPR\n");
1574     }
1575 }
1576
1577 int kvmppc_smt_threads(void)
1578 {
1579     return cap_ppc_smt ? cap_ppc_smt : 1;
1580 }
1581
1582 #ifdef TARGET_PPC64
1583 off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem)
1584 {
1585     void *rma;
1586     off_t size;
1587     int fd;
1588     struct kvm_allocate_rma ret;
1589     MemoryRegion *rma_region;
1590
1591     /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
1592      * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
1593      *                      not necessary on this hardware
1594      * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
1595      *
1596      * FIXME: We should allow the user to force contiguous RMA
1597      * allocation in the cap_ppc_rma==1 case.
1598      */
1599     if (cap_ppc_rma < 2) {
1600         return 0;
1601     }
1602
1603     fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
1604     if (fd < 0) {
1605         fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
1606                 strerror(errno));
1607         return -1;
1608     }
1609
1610     size = MIN(ret.rma_size, 256ul << 20);
1611
1612     rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1613     if (rma == MAP_FAILED) {
1614         fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
1615         return -1;
1616     };
1617
1618     rma_region = g_new(MemoryRegion, 1);
1619     memory_region_init_ram_ptr(rma_region, NULL, name, size, rma);
1620     vmstate_register_ram_global(rma_region);
1621     memory_region_add_subregion(sysmem, 0, rma_region);
1622
1623     return size;
1624 }
1625
1626 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
1627 {
1628     struct kvm_ppc_smmu_info info;
1629     long rampagesize, best_page_shift;
1630     int i;
1631
1632     if (cap_ppc_rma >= 2) {
1633         return current_size;
1634     }
1635
1636     /* Find the largest hardware supported page size that's less than
1637      * or equal to the (logical) backing page size of guest RAM */
1638     kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
1639     rampagesize = getrampagesize();
1640     best_page_shift = 0;
1641
1642     for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
1643         struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
1644
1645         if (!sps->page_shift) {
1646             continue;
1647         }
1648
1649         if ((sps->page_shift > best_page_shift)
1650             && ((1UL << sps->page_shift) <= rampagesize)) {
1651             best_page_shift = sps->page_shift;
1652         }
1653     }
1654
1655     return MIN(current_size,
1656                1ULL << (best_page_shift + hash_shift - 7));
1657 }
1658 #endif
1659
1660 bool kvmppc_spapr_use_multitce(void)
1661 {
1662     return cap_spapr_multitce;
1663 }
1664
1665 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
1666                               bool vfio_accel)
1667 {
1668     struct kvm_create_spapr_tce args = {
1669         .liobn = liobn,
1670         .window_size = window_size,
1671     };
1672     long len;
1673     int fd;
1674     void *table;
1675
1676     /* Must set fd to -1 so we don't try to munmap when called for
1677      * destroying the table, which the upper layers -will- do
1678      */
1679     *pfd = -1;
1680     if (!cap_spapr_tce || (vfio_accel && !cap_spapr_vfio)) {
1681         return NULL;
1682     }
1683
1684     fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
1685     if (fd < 0) {
1686         fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
1687                 liobn);
1688         return NULL;
1689     }
1690
1691     len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
1692     /* FIXME: round this up to page size */
1693
1694     table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1695     if (table == MAP_FAILED) {
1696         fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
1697                 liobn);
1698         close(fd);
1699         return NULL;
1700     }
1701
1702     *pfd = fd;
1703     return table;
1704 }
1705
1706 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
1707 {
1708     long len;
1709
1710     if (fd < 0) {
1711         return -1;
1712     }
1713
1714     len = nb_table * sizeof(uint64_t);
1715     if ((munmap(table, len) < 0) ||
1716         (close(fd) < 0)) {
1717         fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
1718                 strerror(errno));
1719         /* Leak the table */
1720     }
1721
1722     return 0;
1723 }
1724
1725 int kvmppc_reset_htab(int shift_hint)
1726 {
1727     uint32_t shift = shift_hint;
1728
1729     if (!kvm_enabled()) {
1730         /* Full emulation, tell caller to allocate htab itself */
1731         return 0;
1732     }
1733     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
1734         int ret;
1735         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
1736         if (ret == -ENOTTY) {
1737             /* At least some versions of PR KVM advertise the
1738              * capability, but don't implement the ioctl().  Oops.
1739              * Return 0 so that we allocate the htab in qemu, as is
1740              * correct for PR. */
1741             return 0;
1742         } else if (ret < 0) {
1743             return ret;
1744         }
1745         return shift;
1746     }
1747
1748     /* We have a kernel that predates the htab reset calls.  For PR
1749      * KVM, we need to allocate the htab ourselves, for an HV KVM of
1750      * this era, it has allocated a 16MB fixed size hash table
1751      * already.  Kernels of this era have the GET_PVINFO capability
1752      * only on PR, so we use this hack to determine the right
1753      * answer */
1754     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
1755         /* PR - tell caller to allocate htab */
1756         return 0;
1757     } else {
1758         /* HV - assume 16MB kernel allocated htab */
1759         return 24;
1760     }
1761 }
1762
1763 static inline uint32_t mfpvr(void)
1764 {
1765     uint32_t pvr;
1766
1767     asm ("mfpvr %0"
1768          : "=r"(pvr));
1769     return pvr;
1770 }
1771
1772 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
1773 {
1774     if (on) {
1775         *word |= flags;
1776     } else {
1777         *word &= ~flags;
1778     }
1779 }
1780
1781 static void kvmppc_host_cpu_initfn(Object *obj)
1782 {
1783     assert(kvm_enabled());
1784 }
1785
1786 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
1787 {
1788     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
1789     uint32_t vmx = kvmppc_get_vmx();
1790     uint32_t dfp = kvmppc_get_dfp();
1791     uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
1792     uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
1793
1794     /* Now fix up the class with information we can query from the host */
1795     pcc->pvr = mfpvr();
1796
1797     if (vmx != -1) {
1798         /* Only override when we know what the host supports */
1799         alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
1800         alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
1801     }
1802     if (dfp != -1) {
1803         /* Only override when we know what the host supports */
1804         alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
1805     }
1806
1807     if (dcache_size != -1) {
1808         pcc->l1_dcache_size = dcache_size;
1809     }
1810
1811     if (icache_size != -1) {
1812         pcc->l1_icache_size = icache_size;
1813     }
1814 }
1815
1816 bool kvmppc_has_cap_epr(void)
1817 {
1818     return cap_epr;
1819 }
1820
1821 bool kvmppc_has_cap_htab_fd(void)
1822 {
1823     return cap_htab_fd;
1824 }
1825
1826 bool kvmppc_has_cap_fixup_hcalls(void)
1827 {
1828     return cap_fixup_hcalls;
1829 }
1830
1831 static PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc)
1832 {
1833     ObjectClass *oc = OBJECT_CLASS(pcc);
1834
1835     while (oc && !object_class_is_abstract(oc)) {
1836         oc = object_class_get_parent(oc);
1837     }
1838     assert(oc);
1839
1840     return POWERPC_CPU_CLASS(oc);
1841 }
1842
1843 static int kvm_ppc_register_host_cpu_type(void)
1844 {
1845     TypeInfo type_info = {
1846         .name = TYPE_HOST_POWERPC_CPU,
1847         .instance_init = kvmppc_host_cpu_initfn,
1848         .class_init = kvmppc_host_cpu_class_init,
1849     };
1850     uint32_t host_pvr = mfpvr();
1851     PowerPCCPUClass *pvr_pcc;
1852     DeviceClass *dc;
1853
1854     pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
1855     if (pvr_pcc == NULL) {
1856         pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
1857     }
1858     if (pvr_pcc == NULL) {
1859         return -1;
1860     }
1861     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
1862     type_register(&type_info);
1863
1864     /* Register generic family CPU class for a family */
1865     pvr_pcc = ppc_cpu_get_family_class(pvr_pcc);
1866     dc = DEVICE_CLASS(pvr_pcc);
1867     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
1868     type_info.name = g_strdup_printf("%s-"TYPE_POWERPC_CPU, dc->desc);
1869     type_register(&type_info);
1870
1871     return 0;
1872 }
1873
1874 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
1875 {
1876     struct kvm_rtas_token_args args = {
1877         .token = token,
1878     };
1879
1880     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
1881         return -ENOENT;
1882     }
1883
1884     strncpy(args.name, function, sizeof(args.name));
1885
1886     return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
1887 }
1888
1889 int kvmppc_get_htab_fd(bool write)
1890 {
1891     struct kvm_get_htab_fd s = {
1892         .flags = write ? KVM_GET_HTAB_WRITE : 0,
1893         .start_index = 0,
1894     };
1895
1896     if (!cap_htab_fd) {
1897         fprintf(stderr, "KVM version doesn't support saving the hash table\n");
1898         return -1;
1899     }
1900
1901     return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
1902 }
1903
1904 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
1905 {
1906     int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
1907     uint8_t buf[bufsize];
1908     ssize_t rc;
1909
1910     do {
1911         rc = read(fd, buf, bufsize);
1912         if (rc < 0) {
1913             fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
1914                     strerror(errno));
1915             return rc;
1916         } else if (rc) {
1917             /* Kernel already retuns data in BE format for the file */
1918             qemu_put_buffer(f, buf, rc);
1919         }
1920     } while ((rc != 0)
1921              && ((max_ns < 0)
1922                  || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
1923
1924     return (rc == 0) ? 1 : 0;
1925 }
1926
1927 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
1928                            uint16_t n_valid, uint16_t n_invalid)
1929 {
1930     struct kvm_get_htab_header *buf;
1931     size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
1932     ssize_t rc;
1933
1934     buf = alloca(chunksize);
1935     /* This is KVM on ppc, so this is all big-endian */
1936     buf->index = index;
1937     buf->n_valid = n_valid;
1938     buf->n_invalid = n_invalid;
1939
1940     qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
1941
1942     rc = write(fd, buf, chunksize);
1943     if (rc < 0) {
1944         fprintf(stderr, "Error writing KVM hash table: %s\n",
1945                 strerror(errno));
1946         return rc;
1947     }
1948     if (rc != chunksize) {
1949         /* We should never get a short write on a single chunk */
1950         fprintf(stderr, "Short write, restoring KVM hash table\n");
1951         return -1;
1952     }
1953     return 0;
1954 }
1955
1956 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
1957 {
1958     return true;
1959 }
1960
1961 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
1962 {
1963     return 1;
1964 }
1965
1966 int kvm_arch_on_sigbus(int code, void *addr)
1967 {
1968     return 1;
1969 }
1970
1971 void kvm_arch_init_irq_routing(KVMState *s)
1972 {
1973 }
1974
1975 int kvm_arch_insert_sw_breakpoint(CPUState *cpu, struct kvm_sw_breakpoint *bp)
1976 {
1977     return -EINVAL;
1978 }
1979
1980 int kvm_arch_remove_sw_breakpoint(CPUState *cpu, struct kvm_sw_breakpoint *bp)
1981 {
1982     return -EINVAL;
1983 }
1984
1985 int kvm_arch_insert_hw_breakpoint(target_ulong addr, target_ulong len, int type)
1986 {
1987     return -EINVAL;
1988 }
1989
1990 int kvm_arch_remove_hw_breakpoint(target_ulong addr, target_ulong len, int type)
1991 {
1992     return -EINVAL;
1993 }
1994
1995 void kvm_arch_remove_all_hw_breakpoints(void)
1996 {
1997 }
1998
1999 void kvm_arch_update_guest_debug(CPUState *cpu, struct kvm_guest_debug *dbg)
2000 {
2001 }
2002
2003 struct kvm_get_htab_buf {
2004     struct kvm_get_htab_header header;
2005     /*
2006      * We require one extra byte for read
2007      */
2008     target_ulong hpte[(HPTES_PER_GROUP * 2) + 1];
2009 };
2010
2011 uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index)
2012 {
2013     int htab_fd;
2014     struct kvm_get_htab_fd ghf;
2015     struct kvm_get_htab_buf  *hpte_buf;
2016
2017     ghf.flags = 0;
2018     ghf.start_index = pte_index;
2019     htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2020     if (htab_fd < 0) {
2021         goto error_out;
2022     }
2023
2024     hpte_buf = g_malloc0(sizeof(*hpte_buf));
2025     /*
2026      * Read the hpte group
2027      */
2028     if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) {
2029         goto out_close;
2030     }
2031
2032     close(htab_fd);
2033     return (uint64_t)(uintptr_t) hpte_buf->hpte;
2034
2035 out_close:
2036     g_free(hpte_buf);
2037     close(htab_fd);
2038 error_out:
2039     return 0;
2040 }
2041
2042 void kvmppc_hash64_free_pteg(uint64_t token)
2043 {
2044     struct kvm_get_htab_buf *htab_buf;
2045
2046     htab_buf = container_of((void *)(uintptr_t) token, struct kvm_get_htab_buf,
2047                             hpte);
2048     g_free(htab_buf);
2049     return;
2050 }
2051
2052 void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
2053                              target_ulong pte0, target_ulong pte1)
2054 {
2055     int htab_fd;
2056     struct kvm_get_htab_fd ghf;
2057     struct kvm_get_htab_buf hpte_buf;
2058
2059     ghf.flags = 0;
2060     ghf.start_index = 0;     /* Ignored */
2061     htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2062     if (htab_fd < 0) {
2063         goto error_out;
2064     }
2065
2066     hpte_buf.header.n_valid = 1;
2067     hpte_buf.header.n_invalid = 0;
2068     hpte_buf.header.index = pte_index;
2069     hpte_buf.hpte[0] = pte0;
2070     hpte_buf.hpte[1] = pte1;
2071     /*
2072      * Write the hpte entry.
2073      * CAUTION: write() has the warn_unused_result attribute. Hence we
2074      * need to check the return value, even though we do nothing.
2075      */
2076     if (write(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) {
2077         goto out_close;
2078     }
2079
2080 out_close:
2081     close(htab_fd);
2082     return;
2083
2084 error_out:
2085     return;
2086 }