target-ppc/kvm.c

   1 /*
   2  * PowerPC implementation of KVM hooks
   3  *
   4  * Copyright IBM Corp. 2007
   5  * Copyright (C) 2011 Freescale Semiconductor, Inc.
   6  *
   7  * Authors:
   8  *  Jerone Young <jyoung5@us.ibm.com>
   9  *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
  10  *  Hollis Blanchard <hollisb@us.ibm.com>
  11  *
  12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  13  * See the COPYING file in the top-level directory.
  14  *
  15  */
  16
  17 #include <dirent.h>
  18 #include <sys/types.h>
  19 #include <sys/ioctl.h>
  20 #include <sys/mman.h>
  21 #include <sys/vfs.h>
  22
  23 #include <linux/kvm.h>
  24
  25 #include "qemu-common.h"
  26 #include "qemu/timer.h"
  27 #include "sysemu/sysemu.h"
  28 #include "sysemu/kvm.h"
  29 #include "kvm_ppc.h"
  30 #include "cpu.h"
  31 #include "sysemu/cpus.h"
  32 #include "sysemu/device_tree.h"
  33 #include "mmu-hash64.h"
  34
  35 #include "hw/sysbus.h"
  36 #include "hw/ppc/spapr.h"
  37 #include "hw/ppc/spapr_vio.h"
  38 #include "hw/ppc/ppc.h"
  39 #include "sysemu/watchdog.h"
  40 #include "trace.h"
  41
  42 //#define DEBUG_KVM
  43
  44 #ifdef DEBUG_KVM
  45 #define DPRINTF(fmt, ...) \
  46     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
  47 #else
  48 #define DPRINTF(fmt, ...) \
  49     do { } while (0)
  50 #endif
  51
  52 #define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
  53
  54 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
  55     KVM_CAP_LAST_INFO
  56 };
  57
  58 static int cap_interrupt_unset = false;
  59 static int cap_interrupt_level = false;
  60 static int cap_segstate;
  61 static int cap_booke_sregs;
  62 static int cap_ppc_smt;
  63 static int cap_ppc_rma;
  64 static int cap_spapr_tce;
  65 static int cap_spapr_multitce;
  66 static int cap_hior;
  67 static int cap_one_reg;
  68 static int cap_epr;
  69 static int cap_ppc_watchdog;
  70 static int cap_papr;
  71 static int cap_htab_fd;
  72 static int cap_fixup_hcalls;
  73
  74 /* XXX We have a race condition where we actually have a level triggered
  75  *     interrupt, but the infrastructure can't expose that yet, so the guest
  76  *     takes but ignores it, goes to sleep and never gets notified that there's
  77  *     still an interrupt pending.
  78  *
  79  *     As a quick workaround, let's just wake up again 20 ms after we injected
  80  *     an interrupt. That way we can assure that we're always reinjecting
  81  *     interrupts in case the guest swallowed them.
  82  */
  83 static QEMUTimer *idle_timer;
  84
  85 static void kvm_kick_cpu(void *opaque)
  86 {
  87     PowerPCCPU *cpu = opaque;
  88
  89     qemu_cpu_kick(CPU(cpu));
  90 }
  91
  92 static int kvm_ppc_register_host_cpu_type(void);
  93
  94 int kvm_arch_init(KVMState *s)
  95 {
  96     cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
  97     cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
  98     cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
  99     cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
 100     cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
 101     cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
 102     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
 103     cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
 104     cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
 105     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
 106     cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
 107     cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
 108     /* Note: we don't set cap_papr here, because this capability is
 109      * only activated after this by kvmppc_set_papr() */
 110     cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
 111     cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
 112
 113     if (!cap_interrupt_level) {
 114         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
 115                         "VM to stall at times!\n");
 116     }
 117
 118     kvm_ppc_register_host_cpu_type();
 119
 120     return 0;
 121 }
 122
 123 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
 124 {
 125     CPUPPCState *cenv = &cpu->env;
 126     CPUState *cs = CPU(cpu);
 127     struct kvm_sregs sregs;
 128     int ret;
 129
 130     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 131         /* What we're really trying to say is "if we're on BookE, we use
 132            the native PVR for now". This is the only sane way to check
 133            it though, so we potentially confuse users that they can run
 134            BookE guests on BookS. Let's hope nobody dares enough :) */
 135         return 0;
 136     } else {
 137         if (!cap_segstate) {
 138             fprintf(stderr, "kvm error: missing PVR setting capability\n");
 139             return -ENOSYS;
 140         }
 141     }
 142
 143     ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
 144     if (ret) {
 145         return ret;
 146     }
 147
 148     sregs.pvr = cenv->spr[SPR_PVR];
 149     return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
 150 }
 151
 152 /* Set up a shared TLB array with KVM */
 153 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
 154 {
 155     CPUPPCState *env = &cpu->env;
 156     CPUState *cs = CPU(cpu);
 157     struct kvm_book3e_206_tlb_params params = {};
 158     struct kvm_config_tlb cfg = {};
 159     unsigned int entries = 0;
 160     int ret, i;
 161
 162     if (!kvm_enabled() ||
 163         !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
 164         return 0;
 165     }
 166
 167     assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
 168
 169     for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
 170         params.tlb_sizes[i] = booke206_tlb_size(env, i);
 171         params.tlb_ways[i] = booke206_tlb_ways(env, i);
 172         entries += params.tlb_sizes[i];
 173     }
 174
 175     assert(entries == env->nb_tlb);
 176     assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
 177
 178     env->tlb_dirty = true;
 179
 180     cfg.array = (uintptr_t)env->tlb.tlbm;
 181     cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
 182     cfg.params = (uintptr_t)&params;
 183     cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
 184
 185     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
 186     if (ret < 0) {
 187         fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
 188                 __func__, strerror(-ret));
 189         return ret;
 190     }
 191
 192     env->kvm_sw_tlb = true;
 193     return 0;
 194 }
 195
 196
 197 #if defined(TARGET_PPC64)
 198 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
 199                                        struct kvm_ppc_smmu_info *info)
 200 {
 201     CPUPPCState *env = &cpu->env;
 202     CPUState *cs = CPU(cpu);
 203
 204     memset(info, 0, sizeof(*info));
 205
 206     /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
 207      * need to "guess" what the supported page sizes are.
 208      *
 209      * For that to work we make a few assumptions:
 210      *
 211      * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
 212      *   KVM which only supports 4K and 16M pages, but supports them
 213      *   regardless of the backing store characteritics. We also don't
 214      *   support 1T segments.
 215      *
 216      *   This is safe as if HV KVM ever supports that capability or PR
 217      *   KVM grows supports for more page/segment sizes, those versions
 218      *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
 219      *   will not hit this fallback
 220      *
 221      * - Else we are running HV KVM. This means we only support page
 222      *   sizes that fit in the backing store. Additionally we only
 223      *   advertize 64K pages if the processor is ARCH 2.06 and we assume
 224      *   P7 encodings for the SLB and hash table. Here too, we assume
 225      *   support for any newer processor will mean a kernel that
 226      *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
 227      *   this fallback.
 228      */
 229     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
 230         /* No flags */
 231         info->flags = 0;
 232         info->slb_size = 64;
 233
 234         /* Standard 4k base page size segment */
 235         info->sps[0].page_shift = 12;
 236         info->sps[0].slb_enc = 0;
 237         info->sps[0].enc[0].page_shift = 12;
 238         info->sps[0].enc[0].pte_enc = 0;
 239
 240         /* Standard 16M large page size segment */
 241         info->sps[1].page_shift = 24;
 242         info->sps[1].slb_enc = SLB_VSID_L;
 243         info->sps[1].enc[0].page_shift = 24;
 244         info->sps[1].enc[0].pte_enc = 0;
 245     } else {
 246         int i = 0;
 247
 248         /* HV KVM has backing store size restrictions */
 249         info->flags = KVM_PPC_PAGE_SIZES_REAL;
 250
 251         if (env->mmu_model & POWERPC_MMU_1TSEG) {
 252             info->flags |= KVM_PPC_1T_SEGMENTS;
 253         }
 254
 255         if (env->mmu_model == POWERPC_MMU_2_06) {
 256             info->slb_size = 32;
 257         } else {
 258             info->slb_size = 64;
 259         }
 260
 261         /* Standard 4k base page size segment */
 262         info->sps[i].page_shift = 12;
 263         info->sps[i].slb_enc = 0;
 264         info->sps[i].enc[0].page_shift = 12;
 265         info->sps[i].enc[0].pte_enc = 0;
 266         i++;
 267
 268         /* 64K on MMU 2.06 */
 269         if (env->mmu_model == POWERPC_MMU_2_06) {
 270             info->sps[i].page_shift = 16;
 271             info->sps[i].slb_enc = 0x110;
 272             info->sps[i].enc[0].page_shift = 16;
 273             info->sps[i].enc[0].pte_enc = 1;
 274             i++;
 275         }
 276
 277         /* Standard 16M large page size segment */
 278         info->sps[i].page_shift = 24;
 279         info->sps[i].slb_enc = SLB_VSID_L;
 280         info->sps[i].enc[0].page_shift = 24;
 281         info->sps[i].enc[0].pte_enc = 0;
 282     }
 283 }
 284
 285 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
 286 {
 287     CPUState *cs = CPU(cpu);
 288     int ret;
 289
 290     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
 291         ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
 292         if (ret == 0) {
 293             return;
 294         }
 295     }
 296
 297     kvm_get_fallback_smmu_info(cpu, info);
 298 }
 299
 300 static long getrampagesize(void)
 301 {
 302     struct statfs fs;
 303     int ret;
 304
 305     if (!mem_path) {
 306         /* guest RAM is backed by normal anonymous pages */
 307         return getpagesize();
 308     }
 309
 310     do {
 311         ret = statfs(mem_path, &fs);
 312     } while (ret != 0 && errno == EINTR);
 313
 314     if (ret != 0) {
 315         fprintf(stderr, "Couldn't statfs() memory path: %s\n",
 316                 strerror(errno));
 317         exit(1);
 318     }
 319
 320 #define HUGETLBFS_MAGIC       0x958458f6
 321
 322     if (fs.f_type != HUGETLBFS_MAGIC) {
 323         /* Explicit mempath, but it's ordinary pages */
 324         return getpagesize();
 325     }
 326
 327     /* It's hugepage, return the huge page size */
 328     return fs.f_bsize;
 329 }
 330
 331 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
 332 {
 333     if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
 334         return true;
 335     }
 336
 337     return (1ul << shift) <= rampgsize;
 338 }
 339
 340 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 341 {
 342     static struct kvm_ppc_smmu_info smmu_info;
 343     static bool has_smmu_info;
 344     CPUPPCState *env = &cpu->env;
 345     long rampagesize;
 346     int iq, ik, jq, jk;
 347
 348     /* We only handle page sizes for 64-bit server guests for now */
 349     if (!(env->mmu_model & POWERPC_MMU_64)) {
 350         return;
 351     }
 352
 353     /* Collect MMU info from kernel if not already */
 354     if (!has_smmu_info) {
 355         kvm_get_smmu_info(cpu, &smmu_info);
 356         has_smmu_info = true;
 357     }
 358
 359     rampagesize = getrampagesize();
 360
 361     /* Convert to QEMU form */
 362     memset(&env->sps, 0, sizeof(env->sps));
 363
 364     /*
 365      * XXX This loop should be an entry wide AND of the capabilities that
 366      *     the selected CPU has with the capabilities that KVM supports.
 367      */
 368     for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
 369         struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
 370         struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
 371
 372         if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 373                                  ksps->page_shift)) {
 374             continue;
 375         }
 376         qsps->page_shift = ksps->page_shift;
 377         qsps->slb_enc = ksps->slb_enc;
 378         for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
 379             if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 380                                      ksps->enc[jk].page_shift)) {
 381                 continue;
 382             }
 383             qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
 384             qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
 385             if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
 386                 break;
 387             }
 388         }
 389         if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
 390             break;
 391         }
 392     }
 393     env->slb_nr = smmu_info.slb_size;
 394     if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
 395         env->mmu_model &= ~POWERPC_MMU_1TSEG;
 396     }
 397 }
 398 #else /* defined (TARGET_PPC64) */
 399
 400 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 401 {
 402 }
 403
 404 #endif /* !defined (TARGET_PPC64) */
 405
 406 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
 407 {
 408     return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
 409 }
 410
 411 int kvm_arch_init_vcpu(CPUState *cs)
 412 {
 413     PowerPCCPU *cpu = POWERPC_CPU(cs);
 414     CPUPPCState *cenv = &cpu->env;
 415     int ret;
 416
 417     /* Gather server mmu info from KVM and update the CPU state */
 418     kvm_fixup_page_sizes(cpu);
 419
 420     /* Synchronize sregs with kvm */
 421     ret = kvm_arch_sync_sregs(cpu);
 422     if (ret) {
 423         return ret;
 424     }
 425
 426     idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
 427
 428     /* Some targets support access to KVM's guest TLB. */
 429     switch (cenv->mmu_model) {
 430     case POWERPC_MMU_BOOKE206:
 431         ret = kvm_booke206_tlb_init(cpu);
 432         break;
 433     default:
 434         break;
 435     }
 436
 437     return ret;
 438 }
 439
 440 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
 441 {
 442     CPUPPCState *env = &cpu->env;
 443     CPUState *cs = CPU(cpu);
 444     struct kvm_dirty_tlb dirty_tlb;
 445     unsigned char *bitmap;
 446     int ret;
 447
 448     if (!env->kvm_sw_tlb) {
 449         return;
 450     }
 451
 452     bitmap = g_malloc((env->nb_tlb + 7) / 8);
 453     memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
 454
 455     dirty_tlb.bitmap = (uintptr_t)bitmap;
 456     dirty_tlb.num_dirty = env->nb_tlb;
 457
 458     ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
 459     if (ret) {
 460         fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
 461                 __func__, strerror(-ret));
 462     }
 463
 464     g_free(bitmap);
 465 }
 466
 467 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
 468 {
 469     PowerPCCPU *cpu = POWERPC_CPU(cs);
 470     CPUPPCState *env = &cpu->env;
 471     union {
 472         uint32_t u32;
 473         uint64_t u64;
 474     } val;
 475     struct kvm_one_reg reg = {
 476         .id = id,
 477         .addr = (uintptr_t) &val,
 478     };
 479     int ret;
 480
 481     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 482     if (ret != 0) {
 483         trace_kvm_failed_spr_get(spr, strerror(errno));
 484     } else {
 485         switch (id & KVM_REG_SIZE_MASK) {
 486         case KVM_REG_SIZE_U32:
 487             env->spr[spr] = val.u32;
 488             break;
 489
 490         case KVM_REG_SIZE_U64:
 491             env->spr[spr] = val.u64;
 492             break;
 493
 494         default:
 495             /* Don't handle this size yet */
 496             abort();
 497         }
 498     }
 499 }
 500
 501 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
 502 {
 503     PowerPCCPU *cpu = POWERPC_CPU(cs);
 504     CPUPPCState *env = &cpu->env;
 505     union {
 506         uint32_t u32;
 507         uint64_t u64;
 508     } val;
 509     struct kvm_one_reg reg = {
 510         .id = id,
 511         .addr = (uintptr_t) &val,
 512     };
 513     int ret;
 514
 515     switch (id & KVM_REG_SIZE_MASK) {
 516     case KVM_REG_SIZE_U32:
 517         val.u32 = env->spr[spr];
 518         break;
 519
 520     case KVM_REG_SIZE_U64:
 521         val.u64 = env->spr[spr];
 522         break;
 523
 524     default:
 525         /* Don't handle this size yet */
 526         abort();
 527     }
 528
 529     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 530     if (ret != 0) {
 531         trace_kvm_failed_spr_set(spr, strerror(errno));
 532     }
 533 }
 534
 535 static int kvm_put_fp(CPUState *cs)
 536 {
 537     PowerPCCPU *cpu = POWERPC_CPU(cs);
 538     CPUPPCState *env = &cpu->env;
 539     struct kvm_one_reg reg;
 540     int i;
 541     int ret;
 542
 543     if (env->insns_flags & PPC_FLOAT) {
 544         uint64_t fpscr = env->fpscr;
 545         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 546
 547         reg.id = KVM_REG_PPC_FPSCR;
 548         reg.addr = (uintptr_t)&fpscr;
 549         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 550         if (ret < 0) {
 551             DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
 552             return ret;
 553         }
 554
 555         for (i = 0; i < 32; i++) {
 556             uint64_t vsr[2];
 557
 558             vsr[0] = float64_val(env->fpr[i]);
 559             vsr[1] = env->vsr[i];
 560             reg.addr = (uintptr_t) &vsr;
 561             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 562
 563             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 564             if (ret < 0) {
 565                 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
 566                         i, strerror(errno));
 567                 return ret;
 568             }
 569         }
 570     }
 571
 572     if (env->insns_flags & PPC_ALTIVEC) {
 573         reg.id = KVM_REG_PPC_VSCR;
 574         reg.addr = (uintptr_t)&env->vscr;
 575         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 576         if (ret < 0) {
 577             DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
 578             return ret;
 579         }
 580
 581         for (i = 0; i < 32; i++) {
 582             reg.id = KVM_REG_PPC_VR(i);
 583             reg.addr = (uintptr_t)&env->avr[i];
 584             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 585             if (ret < 0) {
 586                 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
 587                 return ret;
 588             }
 589         }
 590     }
 591
 592     return 0;
 593 }
 594
 595 static int kvm_get_fp(CPUState *cs)
 596 {
 597     PowerPCCPU *cpu = POWERPC_CPU(cs);
 598     CPUPPCState *env = &cpu->env;
 599     struct kvm_one_reg reg;
 600     int i;
 601     int ret;
 602
 603     if (env->insns_flags & PPC_FLOAT) {
 604         uint64_t fpscr;
 605         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 606
 607         reg.id = KVM_REG_PPC_FPSCR;
 608         reg.addr = (uintptr_t)&fpscr;
 609         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 610         if (ret < 0) {
 611             DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
 612             return ret;
 613         } else {
 614             env->fpscr = fpscr;
 615         }
 616
 617         for (i = 0; i < 32; i++) {
 618             uint64_t vsr[2];
 619
 620             reg.addr = (uintptr_t) &vsr;
 621             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 622
 623             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 624             if (ret < 0) {
 625                 DPRINTF("Unable to get %s%d from KVM: %s\n",
 626                         vsx ? "VSR" : "FPR", i, strerror(errno));
 627                 return ret;
 628             } else {
 629                 env->fpr[i] = vsr[0];
 630                 if (vsx) {
 631                     env->vsr[i] = vsr[1];
 632                 }
 633             }
 634         }
 635     }
 636
 637     if (env->insns_flags & PPC_ALTIVEC) {
 638         reg.id = KVM_REG_PPC_VSCR;
 639         reg.addr = (uintptr_t)&env->vscr;
 640         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 641         if (ret < 0) {
 642             DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
 643             return ret;
 644         }
 645
 646         for (i = 0; i < 32; i++) {
 647             reg.id = KVM_REG_PPC_VR(i);
 648             reg.addr = (uintptr_t)&env->avr[i];
 649             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 650             if (ret < 0) {
 651                 DPRINTF("Unable to get VR%d from KVM: %s\n",
 652                         i, strerror(errno));
 653                 return ret;
 654             }
 655         }
 656     }
 657
 658     return 0;
 659 }
 660
 661 #if defined(TARGET_PPC64)
 662 static int kvm_get_vpa(CPUState *cs)
 663 {
 664     PowerPCCPU *cpu = POWERPC_CPU(cs);
 665     CPUPPCState *env = &cpu->env;
 666     struct kvm_one_reg reg;
 667     int ret;
 668
 669     reg.id = KVM_REG_PPC_VPA_ADDR;
 670     reg.addr = (uintptr_t)&env->vpa_addr;
 671     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 672     if (ret < 0) {
 673         DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
 674         return ret;
 675     }
 676
 677     assert((uintptr_t)&env->slb_shadow_size
 678            == ((uintptr_t)&env->slb_shadow_addr + 8));
 679     reg.id = KVM_REG_PPC_VPA_SLB;
 680     reg.addr = (uintptr_t)&env->slb_shadow_addr;
 681     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 682     if (ret < 0) {
 683         DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
 684                 strerror(errno));
 685         return ret;
 686     }
 687
 688     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 689     reg.id = KVM_REG_PPC_VPA_DTL;
 690     reg.addr = (uintptr_t)&env->dtl_addr;
 691     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 692     if (ret < 0) {
 693         DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
 694                 strerror(errno));
 695         return ret;
 696     }
 697
 698     return 0;
 699 }
 700
 701 static int kvm_put_vpa(CPUState *cs)
 702 {
 703     PowerPCCPU *cpu = POWERPC_CPU(cs);
 704     CPUPPCState *env = &cpu->env;
 705     struct kvm_one_reg reg;
 706     int ret;
 707
 708     /* SLB shadow or DTL can't be registered unless a master VPA is
 709      * registered.  That means when restoring state, if a VPA *is*
 710      * registered, we need to set that up first.  If not, we need to
 711      * deregister the others before deregistering the master VPA */
 712     assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
 713
 714     if (env->vpa_addr) {
 715         reg.id = KVM_REG_PPC_VPA_ADDR;
 716         reg.addr = (uintptr_t)&env->vpa_addr;
 717         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 718         if (ret < 0) {
 719             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 720             return ret;
 721         }
 722     }
 723
 724     assert((uintptr_t)&env->slb_shadow_size
 725            == ((uintptr_t)&env->slb_shadow_addr + 8));
 726     reg.id = KVM_REG_PPC_VPA_SLB;
 727     reg.addr = (uintptr_t)&env->slb_shadow_addr;
 728     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 729     if (ret < 0) {
 730         DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
 731         return ret;
 732     }
 733
 734     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 735     reg.id = KVM_REG_PPC_VPA_DTL;
 736     reg.addr = (uintptr_t)&env->dtl_addr;
 737     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 738     if (ret < 0) {
 739         DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
 740                 strerror(errno));
 741         return ret;
 742     }
 743
 744     if (!env->vpa_addr) {
 745         reg.id = KVM_REG_PPC_VPA_ADDR;
 746         reg.addr = (uintptr_t)&env->vpa_addr;
 747         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 748         if (ret < 0) {
 749             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 750             return ret;
 751         }
 752     }
 753
 754     return 0;
 755 }
 756 #endif /* TARGET_PPC64 */
 757
 758 int kvm_arch_put_registers(CPUState *cs, int level)
 759 {
 760     PowerPCCPU *cpu = POWERPC_CPU(cs);
 761     CPUPPCState *env = &cpu->env;
 762     struct kvm_regs regs;
 763     int ret;
 764     int i;
 765
 766     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
 767     if (ret < 0) {
 768         return ret;
 769     }
 770
 771     regs.ctr = env->ctr;
 772     regs.lr  = env->lr;
 773     regs.xer = cpu_read_xer(env);
 774     regs.msr = env->msr;
 775     regs.pc = env->nip;
 776
 777     regs.srr0 = env->spr[SPR_SRR0];
 778     regs.srr1 = env->spr[SPR_SRR1];
 779
 780     regs.sprg0 = env->spr[SPR_SPRG0];
 781     regs.sprg1 = env->spr[SPR_SPRG1];
 782     regs.sprg2 = env->spr[SPR_SPRG2];
 783     regs.sprg3 = env->spr[SPR_SPRG3];
 784     regs.sprg4 = env->spr[SPR_SPRG4];
 785     regs.sprg5 = env->spr[SPR_SPRG5];
 786     regs.sprg6 = env->spr[SPR_SPRG6];
 787     regs.sprg7 = env->spr[SPR_SPRG7];
 788
 789     regs.pid = env->spr[SPR_BOOKE_PID];
 790
 791     for (i = 0;i < 32; i++)
 792         regs.gpr[i] = env->gpr[i];
 793
 794     regs.cr = 0;
 795     for (i = 0; i < 8; i++) {
 796         regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
 797     }
 798
 799     ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
 800     if (ret < 0)
 801         return ret;
 802
 803     kvm_put_fp(cs);
 804
 805     if (env->tlb_dirty) {
 806         kvm_sw_tlb_put(cpu);
 807         env->tlb_dirty = false;
 808     }
 809
 810     if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
 811         struct kvm_sregs sregs;
 812
 813         sregs.pvr = env->spr[SPR_PVR];
 814
 815         sregs.u.s.sdr1 = env->spr[SPR_SDR1];
 816
 817         /* Sync SLB */
 818 #ifdef TARGET_PPC64
 819         for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
 820             sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
 821             if (env->slb[i].esid & SLB_ESID_V) {
 822                 sregs.u.s.ppc64.slb[i].slbe |= i;
 823             }
 824             sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
 825         }
 826 #endif
 827
 828         /* Sync SRs */
 829         for (i = 0; i < 16; i++) {
 830             sregs.u.s.ppc32.sr[i] = env->sr[i];
 831         }
 832
 833         /* Sync BATs */
 834         for (i = 0; i < 8; i++) {
 835             /* Beware. We have to swap upper and lower bits here */
 836             sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
 837                 | env->DBAT[1][i];
 838             sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
 839                 | env->IBAT[1][i];
 840         }
 841
 842         ret = kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
 843         if (ret) {
 844             return ret;
 845         }
 846     }
 847
 848     if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
 849         kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
 850     }
 851
 852     if (cap_one_reg) {
 853         int i;
 854
 855         /* We deliberately ignore errors here, for kernels which have
 856          * the ONE_REG calls, but don't support the specific
 857          * registers, there's a reasonable chance things will still
 858          * work, at least until we try to migrate. */
 859         for (i = 0; i < 1024; i++) {
 860             uint64_t id = env->spr_cb[i].one_reg_id;
 861
 862             if (id != 0) {
 863                 kvm_put_one_spr(cs, id, i);
 864             }
 865         }
 866
 867 #ifdef TARGET_PPC64
 868         if (msr_ts) {
 869             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
 870                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
 871             }
 872             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
 873                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
 874             }
 875             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
 876             kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
 877             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
 878             kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
 879             kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
 880             kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
 881             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
 882             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
 883             kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
 884             kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
 885         }
 886
 887         if (cap_papr) {
 888             if (kvm_put_vpa(cs) < 0) {
 889                 DPRINTF("Warning: Unable to set VPA information to KVM\n");
 890             }
 891         }
 892
 893         kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
 894 #endif /* TARGET_PPC64 */
 895     }
 896
 897     return ret;
 898 }
 899
 900 int kvm_arch_get_registers(CPUState *cs)
 901 {
 902     PowerPCCPU *cpu = POWERPC_CPU(cs);
 903     CPUPPCState *env = &cpu->env;
 904     struct kvm_regs regs;
 905     struct kvm_sregs sregs;
 906     uint32_t cr;
 907     int i, ret;
 908
 909     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
 910     if (ret < 0)
 911         return ret;
 912
 913     cr = regs.cr;
 914     for (i = 7; i >= 0; i--) {
 915         env->crf[i] = cr & 15;
 916         cr >>= 4;
 917     }
 918
 919     env->ctr = regs.ctr;
 920     env->lr = regs.lr;
 921     cpu_write_xer(env, regs.xer);
 922     env->msr = regs.msr;
 923     env->nip = regs.pc;
 924
 925     env->spr[SPR_SRR0] = regs.srr0;
 926     env->spr[SPR_SRR1] = regs.srr1;
 927
 928     env->spr[SPR_SPRG0] = regs.sprg0;
 929     env->spr[SPR_SPRG1] = regs.sprg1;
 930     env->spr[SPR_SPRG2] = regs.sprg2;
 931     env->spr[SPR_SPRG3] = regs.sprg3;
 932     env->spr[SPR_SPRG4] = regs.sprg4;
 933     env->spr[SPR_SPRG5] = regs.sprg5;
 934     env->spr[SPR_SPRG6] = regs.sprg6;
 935     env->spr[SPR_SPRG7] = regs.sprg7;
 936
 937     env->spr[SPR_BOOKE_PID] = regs.pid;
 938
 939     for (i = 0;i < 32; i++)
 940         env->gpr[i] = regs.gpr[i];
 941
 942     kvm_get_fp(cs);
 943
 944     if (cap_booke_sregs) {
 945         ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
 946         if (ret < 0) {
 947             return ret;
 948         }
 949
 950         if (sregs.u.e.features & KVM_SREGS_E_BASE) {
 951             env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
 952             env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
 953             env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
 954             env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
 955             env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
 956             env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
 957             env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
 958             env->spr[SPR_DECR] = sregs.u.e.dec;
 959             env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
 960             env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
 961             env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
 962         }
 963
 964         if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
 965             env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
 966             env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
 967             env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
 968             env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
 969             env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
 970         }
 971
 972         if (sregs.u.e.features & KVM_SREGS_E_64) {
 973             env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
 974         }
 975
 976         if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
 977             env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
 978         }
 979
 980         if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
 981             env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
 982             env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
 983             env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
 984             env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
 985             env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
 986             env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
 987             env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
 988             env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
 989             env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
 990             env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
 991             env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
 992             env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
 993             env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
 994             env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
 995             env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
 996             env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
 997
 998             if (sregs.u.e.features & KVM_SREGS_E_SPE) {
 999                 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1000                 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1001                 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1002             }
1003
1004             if (sregs.u.e.features & KVM_SREGS_E_PM) {
1005                 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1006             }
1007
1008             if (sregs.u.e.features & KVM_SREGS_E_PC) {
1009                 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1010                 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1011             }
1012         }
1013
1014         if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1015             env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1016             env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1017             env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1018             env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1019             env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1020             env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1021             env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1022             env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1023             env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1024             env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1025         }
1026
1027         if (sregs.u.e.features & KVM_SREGS_EXP) {
1028             env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1029         }
1030
1031         if (sregs.u.e.features & KVM_SREGS_E_PD) {
1032             env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1033             env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1034         }
1035
1036         if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1037             env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1038             env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1039             env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1040
1041             if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1042                 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1043                 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1044             }
1045         }
1046     }
1047
1048     if (cap_segstate) {
1049         ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
1050         if (ret < 0) {
1051             return ret;
1052         }
1053
1054         if (!env->external_htab) {
1055             ppc_store_sdr1(env, sregs.u.s.sdr1);
1056         }
1057
1058         /* Sync SLB */
1059 #ifdef TARGET_PPC64
1060         /*
1061          * The packed SLB array we get from KVM_GET_SREGS only contains
1062          * information about valid entries. So we flush our internal
1063          * copy to get rid of stale ones, then put all valid SLB entries
1064          * back in.
1065          */
1066         memset(env->slb, 0, sizeof(env->slb));
1067         for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1068             target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1069             target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1070             /*
1071              * Only restore valid entries
1072              */
1073             if (rb & SLB_ESID_V) {
1074                 ppc_store_slb(env, rb, rs);
1075             }
1076         }
1077 #endif
1078
1079         /* Sync SRs */
1080         for (i = 0; i < 16; i++) {
1081             env->sr[i] = sregs.u.s.ppc32.sr[i];
1082         }
1083
1084         /* Sync BATs */
1085         for (i = 0; i < 8; i++) {
1086             env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1087             env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1088             env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1089             env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1090         }
1091     }
1092
1093     if (cap_hior) {
1094         kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1095     }
1096
1097     if (cap_one_reg) {
1098         int i;
1099
1100         /* We deliberately ignore errors here, for kernels which have
1101          * the ONE_REG calls, but don't support the specific
1102          * registers, there's a reasonable chance things will still
1103          * work, at least until we try to migrate. */
1104         for (i = 0; i < 1024; i++) {
1105             uint64_t id = env->spr_cb[i].one_reg_id;
1106
1107             if (id != 0) {
1108                 kvm_get_one_spr(cs, id, i);
1109             }
1110         }
1111
1112 #ifdef TARGET_PPC64
1113         if (msr_ts) {
1114             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1115                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1116             }
1117             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1118                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1119             }
1120             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1121             kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1122             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1123             kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1124             kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1125             kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1126             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1127             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1128             kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1129             kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1130         }
1131
1132         if (cap_papr) {
1133             if (kvm_get_vpa(cs) < 0) {
1134                 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1135             }
1136         }
1137
1138         kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1139 #endif
1140     }
1141
1142     return 0;
1143 }
1144
1145 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1146 {
1147     unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1148
1149     if (irq != PPC_INTERRUPT_EXT) {
1150         return 0;
1151     }
1152
1153     if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1154         return 0;
1155     }
1156
1157     kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1158
1159     return 0;
1160 }
1161
1162 #if defined(TARGET_PPCEMB)
1163 #define PPC_INPUT_INT PPC40x_INPUT_INT
1164 #elif defined(TARGET_PPC64)
1165 #define PPC_INPUT_INT PPC970_INPUT_INT
1166 #else
1167 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1168 #endif
1169
1170 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1171 {
1172     PowerPCCPU *cpu = POWERPC_CPU(cs);
1173     CPUPPCState *env = &cpu->env;
1174     int r;
1175     unsigned irq;
1176
1177     /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1178      * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1179     if (!cap_interrupt_level &&
1180         run->ready_for_interrupt_injection &&
1181         (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1182         (env->irq_input_state & (1<<PPC_INPUT_INT)))
1183     {
1184         /* For now KVM disregards the 'irq' argument. However, in the
1185          * future KVM could cache it in-kernel to avoid a heavyweight exit
1186          * when reading the UIC.
1187          */
1188         irq = KVM_INTERRUPT_SET;
1189
1190         DPRINTF("injected interrupt %d\n", irq);
1191         r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1192         if (r < 0) {
1193             printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1194         }
1195
1196         /* Always wake up soon in case the interrupt was level based */
1197         timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1198                        (get_ticks_per_sec() / 50));
1199     }
1200
1201     /* We don't know if there are more interrupts pending after this. However,
1202      * the guest will return to userspace in the course of handling this one
1203      * anyways, so we will get a chance to deliver the rest. */
1204 }
1205
1206 void kvm_arch_post_run(CPUState *cpu, struct kvm_run *run)
1207 {
1208 }
1209
1210 int kvm_arch_process_async_events(CPUState *cs)
1211 {
1212     return cs->halted;
1213 }
1214
1215 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1216 {
1217     CPUState *cs = CPU(cpu);
1218     CPUPPCState *env = &cpu->env;
1219
1220     if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1221         cs->halted = 1;
1222         cs->exception_index = EXCP_HLT;
1223     }
1224
1225     return 0;
1226 }
1227
1228 /* map dcr access to existing qemu dcr emulation */
1229 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1230 {
1231     if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1232         fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1233
1234     return 0;
1235 }
1236
1237 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1238 {
1239     if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1240         fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1241
1242     return 0;
1243 }
1244
1245 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1246 {
1247     PowerPCCPU *cpu = POWERPC_CPU(cs);
1248     CPUPPCState *env = &cpu->env;
1249     int ret;
1250
1251     switch (run->exit_reason) {
1252     case KVM_EXIT_DCR:
1253         if (run->dcr.is_write) {
1254             DPRINTF("handle dcr write\n");
1255             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1256         } else {
1257             DPRINTF("handle dcr read\n");
1258             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1259         }
1260         break;
1261     case KVM_EXIT_HLT:
1262         DPRINTF("handle halt\n");
1263         ret = kvmppc_handle_halt(cpu);
1264         break;
1265 #if defined(TARGET_PPC64)
1266     case KVM_EXIT_PAPR_HCALL:
1267         DPRINTF("handle PAPR hypercall\n");
1268         run->papr_hcall.ret = spapr_hypercall(cpu,
1269                                               run->papr_hcall.nr,
1270                                               run->papr_hcall.args);
1271         ret = 0;
1272         break;
1273 #endif
1274     case KVM_EXIT_EPR:
1275         DPRINTF("handle epr\n");
1276         run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1277         ret = 0;
1278         break;
1279     case KVM_EXIT_WATCHDOG:
1280         DPRINTF("handle watchdog expiry\n");
1281         watchdog_perform_action();
1282         ret = 0;
1283         break;
1284
1285     default:
1286         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1287         ret = -1;
1288         break;
1289     }
1290
1291     return ret;
1292 }
1293
1294 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1295 {
1296     CPUState *cs = CPU(cpu);
1297     uint32_t bits = tsr_bits;
1298     struct kvm_one_reg reg = {
1299         .id = KVM_REG_PPC_OR_TSR,
1300         .addr = (uintptr_t) &bits,
1301     };
1302
1303     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1304 }
1305
1306 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1307 {
1308
1309     CPUState *cs = CPU(cpu);
1310     uint32_t bits = tsr_bits;
1311     struct kvm_one_reg reg = {
1312         .id = KVM_REG_PPC_CLEAR_TSR,
1313         .addr = (uintptr_t) &bits,
1314     };
1315
1316     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1317 }
1318
1319 int kvmppc_set_tcr(PowerPCCPU *cpu)
1320 {
1321     CPUState *cs = CPU(cpu);
1322     CPUPPCState *env = &cpu->env;
1323     uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1324
1325     struct kvm_one_reg reg = {
1326         .id = KVM_REG_PPC_TCR,
1327         .addr = (uintptr_t) &tcr,
1328     };
1329
1330     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1331 }
1332
1333 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1334 {
1335     CPUState *cs = CPU(cpu);
1336     int ret;
1337
1338     if (!kvm_enabled()) {
1339         return -1;
1340     }
1341
1342     if (!cap_ppc_watchdog) {
1343         printf("warning: KVM does not support watchdog");
1344         return -1;
1345     }
1346
1347     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1348     if (ret < 0) {
1349         fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1350                 __func__, strerror(-ret));
1351         return ret;
1352     }
1353
1354     return ret;
1355 }
1356
1357 static int read_cpuinfo(const char *field, char *value, int len)
1358 {
1359     FILE *f;
1360     int ret = -1;
1361     int field_len = strlen(field);
1362     char line[512];
1363
1364     f = fopen("/proc/cpuinfo", "r");
1365     if (!f) {
1366         return -1;
1367     }
1368
1369     do {
1370         if(!fgets(line, sizeof(line), f)) {
1371             break;
1372         }
1373         if (!strncmp(line, field, field_len)) {
1374             pstrcpy(value, len, line);
1375             ret = 0;
1376             break;
1377         }
1378     } while(*line);
1379
1380     fclose(f);
1381
1382     return ret;
1383 }
1384
1385 uint32_t kvmppc_get_tbfreq(void)
1386 {
1387     char line[512];
1388     char *ns;
1389     uint32_t retval = get_ticks_per_sec();
1390
1391     if (read_cpuinfo("timebase", line, sizeof(line))) {
1392         return retval;
1393     }
1394
1395     if (!(ns = strchr(line, ':'))) {
1396         return retval;
1397     }
1398
1399     ns++;
1400
1401     retval = atoi(ns);
1402     return retval;
1403 }
1404
1405 /* Try to find a device tree node for a CPU with clock-frequency property */
1406 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1407 {
1408     struct dirent *dirp;
1409     DIR *dp;
1410
1411     if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1412         printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1413         return -1;
1414     }
1415
1416     buf[0] = '\0';
1417     while ((dirp = readdir(dp)) != NULL) {
1418         FILE *f;
1419         snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1420                  dirp->d_name);
1421         f = fopen(buf, "r");
1422         if (f) {
1423             snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1424             fclose(f);
1425             break;
1426         }
1427         buf[0] = '\0';
1428     }
1429     closedir(dp);
1430     if (buf[0] == '\0') {
1431         printf("Unknown host!\n");
1432         return -1;
1433     }
1434
1435     return 0;
1436 }
1437
1438 /* Read a CPU node property from the host device tree that's a single
1439  * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1440  * (can't find or open the property, or doesn't understand the
1441  * format) */
1442 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1443 {
1444     char buf[PATH_MAX];
1445     union {
1446         uint32_t v32;
1447         uint64_t v64;
1448     } u;
1449     FILE *f;
1450     int len;
1451
1452     if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1453         return -1;
1454     }
1455
1456     strncat(buf, "/", sizeof(buf) - strlen(buf));
1457     strncat(buf, propname, sizeof(buf) - strlen(buf));
1458
1459     f = fopen(buf, "rb");
1460     if (!f) {
1461         return -1;
1462     }
1463
1464     len = fread(&u, 1, sizeof(u), f);
1465     fclose(f);
1466     switch (len) {
1467     case 4:
1468         /* property is a 32-bit quantity */
1469         return be32_to_cpu(u.v32);
1470     case 8:
1471         return be64_to_cpu(u.v64);
1472     }
1473
1474     return 0;
1475 }
1476
1477 uint64_t kvmppc_get_clockfreq(void)
1478 {
1479     return kvmppc_read_int_cpu_dt("clock-frequency");
1480 }
1481
1482 uint32_t kvmppc_get_vmx(void)
1483 {
1484     return kvmppc_read_int_cpu_dt("ibm,vmx");
1485 }
1486
1487 uint32_t kvmppc_get_dfp(void)
1488 {
1489     return kvmppc_read_int_cpu_dt("ibm,dfp");
1490 }
1491
1492 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1493  {
1494      PowerPCCPU *cpu = ppc_env_get_cpu(env);
1495      CPUState *cs = CPU(cpu);
1496
1497     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1498         !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1499         return 0;
1500     }
1501
1502     return 1;
1503 }
1504
1505 int kvmppc_get_hasidle(CPUPPCState *env)
1506 {
1507     struct kvm_ppc_pvinfo pvinfo;
1508
1509     if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1510         (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1511         return 1;
1512     }
1513
1514     return 0;
1515 }
1516
1517 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1518 {
1519     uint32_t *hc = (uint32_t*)buf;
1520     struct kvm_ppc_pvinfo pvinfo;
1521
1522     if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1523         memcpy(buf, pvinfo.hcall, buf_len);
1524         return 0;
1525     }
1526
1527     /*
1528      * Fallback to always fail hypercalls regardless of endianness:
1529      *
1530      *     tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
1531      *     li r3, -1
1532      *     b .+8       (becomes nop in wrong endian)
1533      *     bswap32(li r3, -1)
1534      */
1535
1536     hc[0] = cpu_to_be32(0x08000048);
1537     hc[1] = cpu_to_be32(0x3860ffff);
1538     hc[2] = cpu_to_be32(0x48000008);
1539     hc[3] = cpu_to_be32(bswap32(0x3860ffff));
1540
1541     return 0;
1542 }
1543
1544 void kvmppc_set_papr(PowerPCCPU *cpu)
1545 {
1546     CPUState *cs = CPU(cpu);
1547     int ret;
1548
1549     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
1550     if (ret) {
1551         cpu_abort(cs, "This KVM version does not support PAPR\n");
1552     }
1553
1554     /* Update the capability flag so we sync the right information
1555      * with kvm */
1556     cap_papr = 1;
1557 }
1558
1559 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t cpu_version)
1560 {
1561     return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &cpu_version);
1562 }
1563
1564 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
1565 {
1566     CPUState *cs = CPU(cpu);
1567     int ret;
1568
1569     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
1570     if (ret && mpic_proxy) {
1571         cpu_abort(cs, "This KVM version does not support EPR\n");
1572     }
1573 }
1574
1575 int kvmppc_smt_threads(void)
1576 {
1577     return cap_ppc_smt ? cap_ppc_smt : 1;
1578 }
1579
1580 #ifdef TARGET_PPC64
1581 off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem)
1582 {
1583     void *rma;
1584     off_t size;
1585     int fd;
1586     struct kvm_allocate_rma ret;
1587     MemoryRegion *rma_region;
1588
1589     /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
1590      * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
1591      *                      not necessary on this hardware
1592      * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
1593      *
1594      * FIXME: We should allow the user to force contiguous RMA
1595      * allocation in the cap_ppc_rma==1 case.
1596      */
1597     if (cap_ppc_rma < 2) {
1598         return 0;
1599     }
1600
1601     fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
1602     if (fd < 0) {
1603         fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
1604                 strerror(errno));
1605         return -1;
1606     }
1607
1608     size = MIN(ret.rma_size, 256ul << 20);
1609
1610     rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1611     if (rma == MAP_FAILED) {
1612         fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
1613         return -1;
1614     };
1615
1616     rma_region = g_new(MemoryRegion, 1);
1617     memory_region_init_ram_ptr(rma_region, NULL, name, size, rma);
1618     vmstate_register_ram_global(rma_region);
1619     memory_region_add_subregion(sysmem, 0, rma_region);
1620
1621     return size;
1622 }
1623
1624 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
1625 {
1626     struct kvm_ppc_smmu_info info;
1627     long rampagesize, best_page_shift;
1628     int i;
1629
1630     if (cap_ppc_rma >= 2) {
1631         return current_size;
1632     }
1633
1634     /* Find the largest hardware supported page size that's less than
1635      * or equal to the (logical) backing page size of guest RAM */
1636     kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
1637     rampagesize = getrampagesize();
1638     best_page_shift = 0;
1639
1640     for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
1641         struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
1642
1643         if (!sps->page_shift) {
1644             continue;
1645         }
1646
1647         if ((sps->page_shift > best_page_shift)
1648             && ((1UL << sps->page_shift) <= rampagesize)) {
1649             best_page_shift = sps->page_shift;
1650         }
1651     }
1652
1653     return MIN(current_size,
1654                1ULL << (best_page_shift + hash_shift - 7));
1655 }
1656 #endif
1657
1658 bool kvmppc_spapr_use_multitce(void)
1659 {
1660     return cap_spapr_multitce;
1661 }
1662
1663 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd)
1664 {
1665     struct kvm_create_spapr_tce args = {
1666         .liobn = liobn,
1667         .window_size = window_size,
1668     };
1669     long len;
1670     int fd;
1671     void *table;
1672
1673     /* Must set fd to -1 so we don't try to munmap when called for
1674      * destroying the table, which the upper layers -will- do
1675      */
1676     *pfd = -1;
1677     if (!cap_spapr_tce) {
1678         return NULL;
1679     }
1680
1681     fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
1682     if (fd < 0) {
1683         fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
1684                 liobn);
1685         return NULL;
1686     }
1687
1688     len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
1689     /* FIXME: round this up to page size */
1690
1691     table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1692     if (table == MAP_FAILED) {
1693         fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
1694                 liobn);
1695         close(fd);
1696         return NULL;
1697     }
1698
1699     *pfd = fd;
1700     return table;
1701 }
1702
1703 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
1704 {
1705     long len;
1706
1707     if (fd < 0) {
1708         return -1;
1709     }
1710
1711     len = nb_table * sizeof(uint64_t);
1712     if ((munmap(table, len) < 0) ||
1713         (close(fd) < 0)) {
1714         fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
1715                 strerror(errno));
1716         /* Leak the table */
1717     }
1718
1719     return 0;
1720 }
1721
1722 int kvmppc_reset_htab(int shift_hint)
1723 {
1724     uint32_t shift = shift_hint;
1725
1726     if (!kvm_enabled()) {
1727         /* Full emulation, tell caller to allocate htab itself */
1728         return 0;
1729     }
1730     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
1731         int ret;
1732         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
1733         if (ret == -ENOTTY) {
1734             /* At least some versions of PR KVM advertise the
1735              * capability, but don't implement the ioctl().  Oops.
1736              * Return 0 so that we allocate the htab in qemu, as is
1737              * correct for PR. */
1738             return 0;
1739         } else if (ret < 0) {
1740             return ret;
1741         }
1742         return shift;
1743     }
1744
1745     /* We have a kernel that predates the htab reset calls.  For PR
1746      * KVM, we need to allocate the htab ourselves, for an HV KVM of
1747      * this era, it has allocated a 16MB fixed size hash table
1748      * already.  Kernels of this era have the GET_PVINFO capability
1749      * only on PR, so we use this hack to determine the right
1750      * answer */
1751     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
1752         /* PR - tell caller to allocate htab */
1753         return 0;
1754     } else {
1755         /* HV - assume 16MB kernel allocated htab */
1756         return 24;
1757     }
1758 }
1759
1760 static inline uint32_t mfpvr(void)
1761 {
1762     uint32_t pvr;
1763
1764     asm ("mfpvr %0"
1765          : "=r"(pvr));
1766     return pvr;
1767 }
1768
1769 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
1770 {
1771     if (on) {
1772         *word |= flags;
1773     } else {
1774         *word &= ~flags;
1775     }
1776 }
1777
1778 static void kvmppc_host_cpu_initfn(Object *obj)
1779 {
1780     assert(kvm_enabled());
1781 }
1782
1783 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
1784 {
1785     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
1786     uint32_t vmx = kvmppc_get_vmx();
1787     uint32_t dfp = kvmppc_get_dfp();
1788     uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
1789     uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
1790
1791     /* Now fix up the class with information we can query from the host */
1792     pcc->pvr = mfpvr();
1793
1794     if (vmx != -1) {
1795         /* Only override when we know what the host supports */
1796         alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
1797         alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
1798     }
1799     if (dfp != -1) {
1800         /* Only override when we know what the host supports */
1801         alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
1802     }
1803
1804     if (dcache_size != -1) {
1805         pcc->l1_dcache_size = dcache_size;
1806     }
1807
1808     if (icache_size != -1) {
1809         pcc->l1_icache_size = icache_size;
1810     }
1811 }
1812
1813 bool kvmppc_has_cap_epr(void)
1814 {
1815     return cap_epr;
1816 }
1817
1818 bool kvmppc_has_cap_htab_fd(void)
1819 {
1820     return cap_htab_fd;
1821 }
1822
1823 bool kvmppc_has_cap_fixup_hcalls(void)
1824 {
1825     return cap_fixup_hcalls;
1826 }
1827
1828 static PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc)
1829 {
1830     ObjectClass *oc = OBJECT_CLASS(pcc);
1831
1832     while (oc && !object_class_is_abstract(oc)) {
1833         oc = object_class_get_parent(oc);
1834     }
1835     assert(oc);
1836
1837     return POWERPC_CPU_CLASS(oc);
1838 }
1839
1840 static int kvm_ppc_register_host_cpu_type(void)
1841 {
1842     TypeInfo type_info = {
1843         .name = TYPE_HOST_POWERPC_CPU,
1844         .instance_init = kvmppc_host_cpu_initfn,
1845         .class_init = kvmppc_host_cpu_class_init,
1846     };
1847     uint32_t host_pvr = mfpvr();
1848     PowerPCCPUClass *pvr_pcc;
1849     DeviceClass *dc;
1850
1851     pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
1852     if (pvr_pcc == NULL) {
1853         pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
1854     }
1855     if (pvr_pcc == NULL) {
1856         return -1;
1857     }
1858     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
1859     type_register(&type_info);
1860
1861     /* Register generic family CPU class for a family */
1862     pvr_pcc = ppc_cpu_get_family_class(pvr_pcc);
1863     dc = DEVICE_CLASS(pvr_pcc);
1864     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
1865     type_info.name = g_strdup_printf("%s-"TYPE_POWERPC_CPU, dc->desc);
1866     type_register(&type_info);
1867
1868     return 0;
1869 }
1870
1871 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
1872 {
1873     struct kvm_rtas_token_args args = {
1874         .token = token,
1875     };
1876
1877     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
1878         return -ENOENT;
1879     }
1880
1881     strncpy(args.name, function, sizeof(args.name));
1882
1883     return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
1884 }
1885
1886 int kvmppc_get_htab_fd(bool write)
1887 {
1888     struct kvm_get_htab_fd s = {
1889         .flags = write ? KVM_GET_HTAB_WRITE : 0,
1890         .start_index = 0,
1891     };
1892
1893     if (!cap_htab_fd) {
1894         fprintf(stderr, "KVM version doesn't support saving the hash table\n");
1895         return -1;
1896     }
1897
1898     return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
1899 }
1900
1901 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
1902 {
1903     int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
1904     uint8_t buf[bufsize];
1905     ssize_t rc;
1906
1907     do {
1908         rc = read(fd, buf, bufsize);
1909         if (rc < 0) {
1910             fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
1911                     strerror(errno));
1912             return rc;
1913         } else if (rc) {
1914             /* Kernel already retuns data in BE format for the file */
1915             qemu_put_buffer(f, buf, rc);
1916         }
1917     } while ((rc != 0)
1918              && ((max_ns < 0)
1919                  || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
1920
1921     return (rc == 0) ? 1 : 0;
1922 }
1923
1924 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
1925                            uint16_t n_valid, uint16_t n_invalid)
1926 {
1927     struct kvm_get_htab_header *buf;
1928     size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
1929     ssize_t rc;
1930
1931     buf = alloca(chunksize);
1932     /* This is KVM on ppc, so this is all big-endian */
1933     buf->index = index;
1934     buf->n_valid = n_valid;
1935     buf->n_invalid = n_invalid;
1936
1937     qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
1938
1939     rc = write(fd, buf, chunksize);
1940     if (rc < 0) {
1941         fprintf(stderr, "Error writing KVM hash table: %s\n",
1942                 strerror(errno));
1943         return rc;
1944     }
1945     if (rc != chunksize) {
1946         /* We should never get a short write on a single chunk */
1947         fprintf(stderr, "Short write, restoring KVM hash table\n");
1948         return -1;
1949     }
1950     return 0;
1951 }
1952
1953 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
1954 {
1955     return true;
1956 }
1957
1958 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
1959 {
1960     return 1;
1961 }
1962
1963 int kvm_arch_on_sigbus(int code, void *addr)
1964 {
1965     return 1;
1966 }
1967
1968 void kvm_arch_init_irq_routing(KVMState *s)
1969 {
1970 }
1971
1972 int kvm_arch_insert_sw_breakpoint(CPUState *cpu, struct kvm_sw_breakpoint *bp)
1973 {
1974     return -EINVAL;
1975 }
1976
1977 int kvm_arch_remove_sw_breakpoint(CPUState *cpu, struct kvm_sw_breakpoint *bp)
1978 {
1979     return -EINVAL;
1980 }
1981
1982 int kvm_arch_insert_hw_breakpoint(target_ulong addr, target_ulong len, int type)
1983 {
1984     return -EINVAL;
1985 }
1986
1987 int kvm_arch_remove_hw_breakpoint(target_ulong addr, target_ulong len, int type)
1988 {
1989     return -EINVAL;
1990 }
1991
1992 void kvm_arch_remove_all_hw_breakpoints(void)
1993 {
1994 }
1995
1996 void kvm_arch_update_guest_debug(CPUState *cpu, struct kvm_guest_debug *dbg)
1997 {
1998 }
1999
2000 struct kvm_get_htab_buf {
2001     struct kvm_get_htab_header header;
2002     /*
2003      * We require one extra byte for read
2004      */
2005     target_ulong hpte[(HPTES_PER_GROUP * 2) + 1];
2006 };
2007
2008 uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index)
2009 {
2010     int htab_fd;
2011     struct kvm_get_htab_fd ghf;
2012     struct kvm_get_htab_buf  *hpte_buf;
2013
2014     ghf.flags = 0;
2015     ghf.start_index = pte_index;
2016     htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2017     if (htab_fd < 0) {
2018         goto error_out;
2019     }
2020
2021     hpte_buf = g_malloc0(sizeof(*hpte_buf));
2022     /*
2023      * Read the hpte group
2024      */
2025     if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) {
2026         goto out_close;
2027     }
2028
2029     close(htab_fd);
2030     return (uint64_t)(uintptr_t) hpte_buf->hpte;
2031
2032 out_close:
2033     g_free(hpte_buf);
2034     close(htab_fd);
2035 error_out:
2036     return 0;
2037 }
2038
2039 void kvmppc_hash64_free_pteg(uint64_t token)
2040 {
2041     struct kvm_get_htab_buf *htab_buf;
2042
2043     htab_buf = container_of((void *)(uintptr_t) token, struct kvm_get_htab_buf,
2044                             hpte);
2045     g_free(htab_buf);
2046     return;
2047 }
2048
2049 void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
2050                              target_ulong pte0, target_ulong pte1)
2051 {
2052     int htab_fd;
2053     struct kvm_get_htab_fd ghf;
2054     struct kvm_get_htab_buf hpte_buf;
2055
2056     ghf.flags = 0;
2057     ghf.start_index = 0;     /* Ignored */
2058     htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2059     if (htab_fd < 0) {
2060         goto error_out;
2061     }
2062
2063     hpte_buf.header.n_valid = 1;
2064     hpte_buf.header.n_invalid = 0;
2065     hpte_buf.header.index = pte_index;
2066     hpte_buf.hpte[0] = pte0;
2067     hpte_buf.hpte[1] = pte1;
2068     /*
2069      * Write the hpte entry.
2070      * CAUTION: write() has the warn_unused_result attribute. Hence we
2071      * need to check the return value, even though we do nothing.
2072      */
2073     if (write(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) {
2074         goto out_close;
2075     }
2076
2077 out_close:
2078     close(htab_fd);
2079     return;
2080
2081 error_out:
2082     return;
2083 }