target/ppc/kvm.c

   1 /*
   2  * PowerPC implementation of KVM hooks
   3  *
   4  * Copyright IBM Corp. 2007
   5  * Copyright (C) 2011 Freescale Semiconductor, Inc.
   6  *
   7  * Authors:
   8  *  Jerone Young <jyoung5@us.ibm.com>
   9  *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
  10  *  Hollis Blanchard <hollisb@us.ibm.com>
  11  *
  12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  13  * See the COPYING file in the top-level directory.
  14  *
  15  */
  16
  17 #include "qemu/osdep.h"
  18 #include <dirent.h>
  19 #include <sys/ioctl.h>
  20 #include <sys/vfs.h>
  21
  22 #include <linux/kvm.h>
  23
  24 #include "qemu-common.h"
  25 #include "qapi/error.h"
  26 #include "qemu/error-report.h"
  27 #include "cpu.h"
  28 #include "cpu-models.h"
  29 #include "qemu/timer.h"
  30 #include "sysemu/sysemu.h"
  31 #include "sysemu/hw_accel.h"
  32 #include "kvm_ppc.h"
  33 #include "sysemu/cpus.h"
  34 #include "sysemu/device_tree.h"
  35 #include "mmu-hash64.h"
  36
  37 #include "hw/sysbus.h"
  38 #include "hw/ppc/spapr.h"
  39 #include "hw/ppc/spapr_vio.h"
  40 #include "hw/ppc/spapr_cpu_core.h"
  41 #include "hw/ppc/ppc.h"
  42 #include "sysemu/watchdog.h"
  43 #include "trace.h"
  44 #include "exec/gdbstub.h"
  45 #include "exec/memattrs.h"
  46 #include "exec/ram_addr.h"
  47 #include "sysemu/hostmem.h"
  48 #include "qemu/cutils.h"
  49 #include "qemu/mmap-alloc.h"
  50 #include "elf.h"
  51 #include "sysemu/kvm_int.h"
  52
  53 //#define DEBUG_KVM
  54
  55 #ifdef DEBUG_KVM
  56 #define DPRINTF(fmt, ...) \
  57     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
  58 #else
  59 #define DPRINTF(fmt, ...) \
  60     do { } while (0)
  61 #endif
  62
  63 #define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
  64
  65 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
  66     KVM_CAP_LAST_INFO
  67 };
  68
  69 static int cap_interrupt_unset = false;
  70 static int cap_interrupt_level = false;
  71 static int cap_segstate;
  72 static int cap_booke_sregs;
  73 static int cap_ppc_smt;
  74 static int cap_ppc_smt_possible;
  75 static int cap_spapr_tce;
  76 static int cap_spapr_tce_64;
  77 static int cap_spapr_multitce;
  78 static int cap_spapr_vfio;
  79 static int cap_hior;
  80 static int cap_one_reg;
  81 static int cap_epr;
  82 static int cap_ppc_watchdog;
  83 static int cap_papr;
  84 static int cap_htab_fd;
  85 static int cap_fixup_hcalls;
  86 static int cap_htm;             /* Hardware transactional memory support */
  87 static int cap_mmu_radix;
  88 static int cap_mmu_hash_v3;
  89 static int cap_resize_hpt;
  90 static int cap_ppc_pvr_compat;
  91 static int cap_ppc_safe_cache;
  92 static int cap_ppc_safe_bounds_check;
  93 static int cap_ppc_safe_indirect_branch;
  94
  95 static uint32_t debug_inst_opcode;
  96
  97 /* XXX We have a race condition where we actually have a level triggered
  98  *     interrupt, but the infrastructure can't expose that yet, so the guest
  99  *     takes but ignores it, goes to sleep and never gets notified that there's
 100  *     still an interrupt pending.
 101  *
 102  *     As a quick workaround, let's just wake up again 20 ms after we injected
 103  *     an interrupt. That way we can assure that we're always reinjecting
 104  *     interrupts in case the guest swallowed them.
 105  */
 106 static QEMUTimer *idle_timer;
 107
 108 static void kvm_kick_cpu(void *opaque)
 109 {
 110     PowerPCCPU *cpu = opaque;
 111
 112     qemu_cpu_kick(CPU(cpu));
 113 }
 114
 115 /* Check whether we are running with KVM-PR (instead of KVM-HV).  This
 116  * should only be used for fallback tests - generally we should use
 117  * explicit capabilities for the features we want, rather than
 118  * assuming what is/isn't available depending on the KVM variant. */
 119 static bool kvmppc_is_pr(KVMState *ks)
 120 {
 121     /* Assume KVM-PR if the GET_PVINFO capability is available */
 122     return kvm_vm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0;
 123 }
 124
 125 static int kvm_ppc_register_host_cpu_type(MachineState *ms);
 126 static void kvmppc_get_cpu_characteristics(KVMState *s);
 127
 128 int kvm_arch_init(MachineState *ms, KVMState *s)
 129 {
 130     cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
 131     cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
 132     cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
 133     cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
 134     cap_ppc_smt_possible = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT_POSSIBLE);
 135     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
 136     cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64);
 137     cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
 138     cap_spapr_vfio = kvm_vm_check_extension(s, KVM_CAP_SPAPR_TCE_VFIO);
 139     cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
 140     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
 141     cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
 142     cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
 143     /* Note: we don't set cap_papr here, because this capability is
 144      * only activated after this by kvmppc_set_papr() */
 145     cap_htab_fd = kvm_vm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
 146     cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
 147     cap_ppc_smt = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT);
 148     cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
 149     cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX);
 150     cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3);
 151     cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT);
 152     kvmppc_get_cpu_characteristics(s);
 153     /*
 154      * Note: setting it to false because there is not such capability
 155      * in KVM at this moment.
 156      *
 157      * TODO: call kvm_vm_check_extension() with the right capability
 158      * after the kernel starts implementing it.*/
 159     cap_ppc_pvr_compat = false;
 160
 161     if (!cap_interrupt_level) {
 162         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
 163                         "VM to stall at times!\n");
 164     }
 165
 166     kvm_ppc_register_host_cpu_type(ms);
 167
 168     return 0;
 169 }
 170
 171 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
 172 {
 173     return 0;
 174 }
 175
 176 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
 177 {
 178     CPUPPCState *cenv = &cpu->env;
 179     CPUState *cs = CPU(cpu);
 180     struct kvm_sregs sregs;
 181     int ret;
 182
 183     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 184         /* What we're really trying to say is "if we're on BookE, we use
 185            the native PVR for now". This is the only sane way to check
 186            it though, so we potentially confuse users that they can run
 187            BookE guests on BookS. Let's hope nobody dares enough :) */
 188         return 0;
 189     } else {
 190         if (!cap_segstate) {
 191             fprintf(stderr, "kvm error: missing PVR setting capability\n");
 192             return -ENOSYS;
 193         }
 194     }
 195
 196     ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
 197     if (ret) {
 198         return ret;
 199     }
 200
 201     sregs.pvr = cenv->spr[SPR_PVR];
 202     return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
 203 }
 204
 205 /* Set up a shared TLB array with KVM */
 206 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
 207 {
 208     CPUPPCState *env = &cpu->env;
 209     CPUState *cs = CPU(cpu);
 210     struct kvm_book3e_206_tlb_params params = {};
 211     struct kvm_config_tlb cfg = {};
 212     unsigned int entries = 0;
 213     int ret, i;
 214
 215     if (!kvm_enabled() ||
 216         !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
 217         return 0;
 218     }
 219
 220     assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
 221
 222     for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
 223         params.tlb_sizes[i] = booke206_tlb_size(env, i);
 224         params.tlb_ways[i] = booke206_tlb_ways(env, i);
 225         entries += params.tlb_sizes[i];
 226     }
 227
 228     assert(entries == env->nb_tlb);
 229     assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
 230
 231     env->tlb_dirty = true;
 232
 233     cfg.array = (uintptr_t)env->tlb.tlbm;
 234     cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
 235     cfg.params = (uintptr_t)&params;
 236     cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
 237
 238     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
 239     if (ret < 0) {
 240         fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
 241                 __func__, strerror(-ret));
 242         return ret;
 243     }
 244
 245     env->kvm_sw_tlb = true;
 246     return 0;
 247 }
 248
 249
 250 #if defined(TARGET_PPC64)
 251 static void kvm_get_smmu_info(struct kvm_ppc_smmu_info *info, Error **errp)
 252 {
 253     int ret;
 254
 255     assert(kvm_state != NULL);
 256
 257     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
 258         error_setg(errp, "KVM doesn't expose the MMU features it supports");
 259         error_append_hint(errp, "Consider switching to a newer KVM\n");
 260         return;
 261     }
 262
 263     ret = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_SMMU_INFO, info);
 264     if (ret == 0) {
 265         return;
 266     }
 267
 268     error_setg_errno(errp, -ret,
 269                      "KVM failed to provide the MMU features it supports");
 270 }
 271
 272 struct ppc_radix_page_info *kvm_get_radix_page_info(void)
 273 {
 274     KVMState *s = KVM_STATE(current_machine->accelerator);
 275     struct ppc_radix_page_info *radix_page_info;
 276     struct kvm_ppc_rmmu_info rmmu_info;
 277     int i;
 278
 279     if (!kvm_check_extension(s, KVM_CAP_PPC_MMU_RADIX)) {
 280         return NULL;
 281     }
 282     if (kvm_vm_ioctl(s, KVM_PPC_GET_RMMU_INFO, &rmmu_info)) {
 283         return NULL;
 284     }
 285     radix_page_info = g_malloc0(sizeof(*radix_page_info));
 286     radix_page_info->count = 0;
 287     for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) {
 288         if (rmmu_info.ap_encodings[i]) {
 289             radix_page_info->entries[i] = rmmu_info.ap_encodings[i];
 290             radix_page_info->count++;
 291         }
 292     }
 293     return radix_page_info;
 294 }
 295
 296 target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu,
 297                                      bool radix, bool gtse,
 298                                      uint64_t proc_tbl)
 299 {
 300     CPUState *cs = CPU(cpu);
 301     int ret;
 302     uint64_t flags = 0;
 303     struct kvm_ppc_mmuv3_cfg cfg = {
 304         .process_table = proc_tbl,
 305     };
 306
 307     if (radix) {
 308         flags |= KVM_PPC_MMUV3_RADIX;
 309     }
 310     if (gtse) {
 311         flags |= KVM_PPC_MMUV3_GTSE;
 312     }
 313     cfg.flags = flags;
 314     ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_CONFIGURE_V3_MMU, &cfg);
 315     switch (ret) {
 316     case 0:
 317         return H_SUCCESS;
 318     case -EINVAL:
 319         return H_PARAMETER;
 320     case -ENODEV:
 321         return H_NOT_AVAILABLE;
 322     default:
 323         return H_HARDWARE;
 324     }
 325 }
 326
 327 bool kvmppc_hpt_needs_host_contiguous_pages(void)
 328 {
 329     static struct kvm_ppc_smmu_info smmu_info;
 330
 331     if (!kvm_enabled()) {
 332         return false;
 333     }
 334
 335     kvm_get_smmu_info(&smmu_info, &error_fatal);
 336     return !!(smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL);
 337 }
 338
 339 void kvm_check_mmu(PowerPCCPU *cpu, Error **errp)
 340 {
 341     struct kvm_ppc_smmu_info smmu_info;
 342     int iq, ik, jq, jk;
 343     Error *local_err = NULL;
 344
 345     /* For now, we only have anything to check on hash64 MMUs */
 346     if (!cpu->hash64_opts || !kvm_enabled()) {
 347         return;
 348     }
 349
 350     kvm_get_smmu_info(&smmu_info, &local_err);
 351     if (local_err) {
 352         error_propagate(errp, local_err);
 353         return;
 354     }
 355
 356     if (ppc_hash64_has(cpu, PPC_HASH64_1TSEG)
 357         && !(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
 358         error_setg(errp,
 359                    "KVM does not support 1TiB segments which guest expects");
 360         return;
 361     }
 362
 363     if (smmu_info.slb_size < cpu->hash64_opts->slb_size) {
 364         error_setg(errp, "KVM only supports %u SLB entries, but guest needs %u",
 365                    smmu_info.slb_size, cpu->hash64_opts->slb_size);
 366         return;
 367     }
 368
 369     /*
 370      * Verify that every pagesize supported by the cpu model is
 371      * supported by KVM with the same encodings
 372      */
 373     for (iq = 0; iq < ARRAY_SIZE(cpu->hash64_opts->sps); iq++) {
 374         PPCHash64SegmentPageSizes *qsps = &cpu->hash64_opts->sps[iq];
 375         struct kvm_ppc_one_seg_page_size *ksps;
 376
 377         for (ik = 0; ik < ARRAY_SIZE(smmu_info.sps); ik++) {
 378             if (qsps->page_shift == smmu_info.sps[ik].page_shift) {
 379                 break;
 380             }
 381         }
 382         if (ik >= ARRAY_SIZE(smmu_info.sps)) {
 383             error_setg(errp, "KVM doesn't support for base page shift %u",
 384                        qsps->page_shift);
 385             return;
 386         }
 387
 388         ksps = &smmu_info.sps[ik];
 389         if (ksps->slb_enc != qsps->slb_enc) {
 390             error_setg(errp,
 391 "KVM uses SLB encoding 0x%x for page shift %u, but guest expects 0x%x",
 392                        ksps->slb_enc, ksps->page_shift, qsps->slb_enc);
 393             return;
 394         }
 395
 396         for (jq = 0; jq < ARRAY_SIZE(qsps->enc); jq++) {
 397             for (jk = 0; jk < ARRAY_SIZE(ksps->enc); jk++) {
 398                 if (qsps->enc[jq].page_shift == ksps->enc[jk].page_shift) {
 399                     break;
 400                 }
 401             }
 402
 403             if (jk >= ARRAY_SIZE(ksps->enc)) {
 404                 error_setg(errp, "KVM doesn't support page shift %u/%u",
 405                            qsps->enc[jq].page_shift, qsps->page_shift);
 406                 return;
 407             }
 408             if (qsps->enc[jq].pte_enc != ksps->enc[jk].pte_enc) {
 409                 error_setg(errp,
 410 "KVM uses PTE encoding 0x%x for page shift %u/%u, but guest expects 0x%x",
 411                            ksps->enc[jk].pte_enc, qsps->enc[jq].page_shift,
 412                            qsps->page_shift, qsps->enc[jq].pte_enc);
 413                 return;
 414             }
 415         }
 416     }
 417
 418     if (ppc_hash64_has(cpu, PPC_HASH64_CI_LARGEPAGE)) {
 419         /* Mostly what guest pagesizes we can use are related to the
 420          * host pages used to map guest RAM, which is handled in the
 421          * platform code. Cache-Inhibited largepages (64k) however are
 422          * used for I/O, so if they're mapped to the host at all it
 423          * will be a normal mapping, not a special hugepage one used
 424          * for RAM. */
 425         if (getpagesize() < 0x10000) {
 426             error_setg(errp,
 427                        "KVM can't supply 64kiB CI pages, which guest expects");
 428         }
 429     }
 430 }
 431 #endif /* !defined (TARGET_PPC64) */
 432
 433 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
 434 {
 435     return POWERPC_CPU(cpu)->vcpu_id;
 436 }
 437
 438 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
 439  * book3s supports only 1 watchpoint, so array size
 440  * of 4 is sufficient for now.
 441  */
 442 #define MAX_HW_BKPTS 4
 443
 444 static struct HWBreakpoint {
 445     target_ulong addr;
 446     int type;
 447 } hw_debug_points[MAX_HW_BKPTS];
 448
 449 static CPUWatchpoint hw_watchpoint;
 450
 451 /* Default there is no breakpoint and watchpoint supported */
 452 static int max_hw_breakpoint;
 453 static int max_hw_watchpoint;
 454 static int nb_hw_breakpoint;
 455 static int nb_hw_watchpoint;
 456
 457 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
 458 {
 459     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 460         max_hw_breakpoint = 2;
 461         max_hw_watchpoint = 2;
 462     }
 463
 464     if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
 465         fprintf(stderr, "Error initializing h/w breakpoints\n");
 466         return;
 467     }
 468 }
 469
 470 int kvm_arch_init_vcpu(CPUState *cs)
 471 {
 472     PowerPCCPU *cpu = POWERPC_CPU(cs);
 473     CPUPPCState *cenv = &cpu->env;
 474     int ret;
 475
 476     /* Synchronize sregs with kvm */
 477     ret = kvm_arch_sync_sregs(cpu);
 478     if (ret) {
 479         if (ret == -EINVAL) {
 480             error_report("Register sync failed... If you're using kvm-hv.ko,"
 481                          " only \"-cpu host\" is possible");
 482         }
 483         return ret;
 484     }
 485
 486     idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
 487
 488     switch (cenv->mmu_model) {
 489     case POWERPC_MMU_BOOKE206:
 490         /* This target supports access to KVM's guest TLB */
 491         ret = kvm_booke206_tlb_init(cpu);
 492         break;
 493     case POWERPC_MMU_2_07:
 494         if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) {
 495             /* KVM-HV has transactional memory on POWER8 also without the
 496              * KVM_CAP_PPC_HTM extension, so enable it here instead as
 497              * long as it's availble to userspace on the host. */
 498             if (qemu_getauxval(AT_HWCAP2) & PPC_FEATURE2_HAS_HTM) {
 499                 cap_htm = true;
 500             }
 501         }
 502         break;
 503     default:
 504         break;
 505     }
 506
 507     kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
 508     kvmppc_hw_debug_points_init(cenv);
 509
 510     return ret;
 511 }
 512
 513 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
 514 {
 515     CPUPPCState *env = &cpu->env;
 516     CPUState *cs = CPU(cpu);
 517     struct kvm_dirty_tlb dirty_tlb;
 518     unsigned char *bitmap;
 519     int ret;
 520
 521     if (!env->kvm_sw_tlb) {
 522         return;
 523     }
 524
 525     bitmap = g_malloc((env->nb_tlb + 7) / 8);
 526     memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
 527
 528     dirty_tlb.bitmap = (uintptr_t)bitmap;
 529     dirty_tlb.num_dirty = env->nb_tlb;
 530
 531     ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
 532     if (ret) {
 533         fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
 534                 __func__, strerror(-ret));
 535     }
 536
 537     g_free(bitmap);
 538 }
 539
 540 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
 541 {
 542     PowerPCCPU *cpu = POWERPC_CPU(cs);
 543     CPUPPCState *env = &cpu->env;
 544     union {
 545         uint32_t u32;
 546         uint64_t u64;
 547     } val;
 548     struct kvm_one_reg reg = {
 549         .id = id,
 550         .addr = (uintptr_t) &val,
 551     };
 552     int ret;
 553
 554     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 555     if (ret != 0) {
 556         trace_kvm_failed_spr_get(spr, strerror(errno));
 557     } else {
 558         switch (id & KVM_REG_SIZE_MASK) {
 559         case KVM_REG_SIZE_U32:
 560             env->spr[spr] = val.u32;
 561             break;
 562
 563         case KVM_REG_SIZE_U64:
 564             env->spr[spr] = val.u64;
 565             break;
 566
 567         default:
 568             /* Don't handle this size yet */
 569             abort();
 570         }
 571     }
 572 }
 573
 574 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
 575 {
 576     PowerPCCPU *cpu = POWERPC_CPU(cs);
 577     CPUPPCState *env = &cpu->env;
 578     union {
 579         uint32_t u32;
 580         uint64_t u64;
 581     } val;
 582     struct kvm_one_reg reg = {
 583         .id = id,
 584         .addr = (uintptr_t) &val,
 585     };
 586     int ret;
 587
 588     switch (id & KVM_REG_SIZE_MASK) {
 589     case KVM_REG_SIZE_U32:
 590         val.u32 = env->spr[spr];
 591         break;
 592
 593     case KVM_REG_SIZE_U64:
 594         val.u64 = env->spr[spr];
 595         break;
 596
 597     default:
 598         /* Don't handle this size yet */
 599         abort();
 600     }
 601
 602     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 603     if (ret != 0) {
 604         trace_kvm_failed_spr_set(spr, strerror(errno));
 605     }
 606 }
 607
 608 static int kvm_put_fp(CPUState *cs)
 609 {
 610     PowerPCCPU *cpu = POWERPC_CPU(cs);
 611     CPUPPCState *env = &cpu->env;
 612     struct kvm_one_reg reg;
 613     int i;
 614     int ret;
 615
 616     if (env->insns_flags & PPC_FLOAT) {
 617         uint64_t fpscr = env->fpscr;
 618         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 619
 620         reg.id = KVM_REG_PPC_FPSCR;
 621         reg.addr = (uintptr_t)&fpscr;
 622         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 623         if (ret < 0) {
 624             DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
 625             return ret;
 626         }
 627
 628         for (i = 0; i < 32; i++) {
 629             uint64_t vsr[2];
 630
 631 #ifdef HOST_WORDS_BIGENDIAN
 632             vsr[0] = float64_val(env->fpr[i]);
 633             vsr[1] = env->vsr[i];
 634 #else
 635             vsr[0] = env->vsr[i];
 636             vsr[1] = float64_val(env->fpr[i]);
 637 #endif
 638             reg.addr = (uintptr_t) &vsr;
 639             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 640
 641             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 642             if (ret < 0) {
 643                 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
 644                         i, strerror(errno));
 645                 return ret;
 646             }
 647         }
 648     }
 649
 650     if (env->insns_flags & PPC_ALTIVEC) {
 651         reg.id = KVM_REG_PPC_VSCR;
 652         reg.addr = (uintptr_t)&env->vscr;
 653         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 654         if (ret < 0) {
 655             DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
 656             return ret;
 657         }
 658
 659         for (i = 0; i < 32; i++) {
 660             reg.id = KVM_REG_PPC_VR(i);
 661             reg.addr = (uintptr_t)&env->avr[i];
 662             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 663             if (ret < 0) {
 664                 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
 665                 return ret;
 666             }
 667         }
 668     }
 669
 670     return 0;
 671 }
 672
 673 static int kvm_get_fp(CPUState *cs)
 674 {
 675     PowerPCCPU *cpu = POWERPC_CPU(cs);
 676     CPUPPCState *env = &cpu->env;
 677     struct kvm_one_reg reg;
 678     int i;
 679     int ret;
 680
 681     if (env->insns_flags & PPC_FLOAT) {
 682         uint64_t fpscr;
 683         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 684
 685         reg.id = KVM_REG_PPC_FPSCR;
 686         reg.addr = (uintptr_t)&fpscr;
 687         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 688         if (ret < 0) {
 689             DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
 690             return ret;
 691         } else {
 692             env->fpscr = fpscr;
 693         }
 694
 695         for (i = 0; i < 32; i++) {
 696             uint64_t vsr[2];
 697
 698             reg.addr = (uintptr_t) &vsr;
 699             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 700
 701             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 702             if (ret < 0) {
 703                 DPRINTF("Unable to get %s%d from KVM: %s\n",
 704                         vsx ? "VSR" : "FPR", i, strerror(errno));
 705                 return ret;
 706             } else {
 707 #ifdef HOST_WORDS_BIGENDIAN
 708                 env->fpr[i] = vsr[0];
 709                 if (vsx) {
 710                     env->vsr[i] = vsr[1];
 711                 }
 712 #else
 713                 env->fpr[i] = vsr[1];
 714                 if (vsx) {
 715                     env->vsr[i] = vsr[0];
 716                 }
 717 #endif
 718             }
 719         }
 720     }
 721
 722     if (env->insns_flags & PPC_ALTIVEC) {
 723         reg.id = KVM_REG_PPC_VSCR;
 724         reg.addr = (uintptr_t)&env->vscr;
 725         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 726         if (ret < 0) {
 727             DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
 728             return ret;
 729         }
 730
 731         for (i = 0; i < 32; i++) {
 732             reg.id = KVM_REG_PPC_VR(i);
 733             reg.addr = (uintptr_t)&env->avr[i];
 734             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 735             if (ret < 0) {
 736                 DPRINTF("Unable to get VR%d from KVM: %s\n",
 737                         i, strerror(errno));
 738                 return ret;
 739             }
 740         }
 741     }
 742
 743     return 0;
 744 }
 745
 746 #if defined(TARGET_PPC64)
 747 static int kvm_get_vpa(CPUState *cs)
 748 {
 749     PowerPCCPU *cpu = POWERPC_CPU(cs);
 750     sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu);
 751     struct kvm_one_reg reg;
 752     int ret;
 753
 754     reg.id = KVM_REG_PPC_VPA_ADDR;
 755     reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
 756     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 757     if (ret < 0) {
 758         DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
 759         return ret;
 760     }
 761
 762     assert((uintptr_t)&spapr_cpu->slb_shadow_size
 763            == ((uintptr_t)&spapr_cpu->slb_shadow_addr + 8));
 764     reg.id = KVM_REG_PPC_VPA_SLB;
 765     reg.addr = (uintptr_t)&spapr_cpu->slb_shadow_addr;
 766     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 767     if (ret < 0) {
 768         DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
 769                 strerror(errno));
 770         return ret;
 771     }
 772
 773     assert((uintptr_t)&spapr_cpu->dtl_size
 774            == ((uintptr_t)&spapr_cpu->dtl_addr + 8));
 775     reg.id = KVM_REG_PPC_VPA_DTL;
 776     reg.addr = (uintptr_t)&spapr_cpu->dtl_addr;
 777     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 778     if (ret < 0) {
 779         DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
 780                 strerror(errno));
 781         return ret;
 782     }
 783
 784     return 0;
 785 }
 786
 787 static int kvm_put_vpa(CPUState *cs)
 788 {
 789     PowerPCCPU *cpu = POWERPC_CPU(cs);
 790     sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu);
 791     struct kvm_one_reg reg;
 792     int ret;
 793
 794     /* SLB shadow or DTL can't be registered unless a master VPA is
 795      * registered.  That means when restoring state, if a VPA *is*
 796      * registered, we need to set that up first.  If not, we need to
 797      * deregister the others before deregistering the master VPA */
 798     assert(spapr_cpu->vpa_addr
 799            || !(spapr_cpu->slb_shadow_addr || spapr_cpu->dtl_addr));
 800
 801     if (spapr_cpu->vpa_addr) {
 802         reg.id = KVM_REG_PPC_VPA_ADDR;
 803         reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
 804         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 805         if (ret < 0) {
 806             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 807             return ret;
 808         }
 809     }
 810
 811     assert((uintptr_t)&spapr_cpu->slb_shadow_size
 812            == ((uintptr_t)&spapr_cpu->slb_shadow_addr + 8));
 813     reg.id = KVM_REG_PPC_VPA_SLB;
 814     reg.addr = (uintptr_t)&spapr_cpu->slb_shadow_addr;
 815     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 816     if (ret < 0) {
 817         DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
 818         return ret;
 819     }
 820
 821     assert((uintptr_t)&spapr_cpu->dtl_size
 822            == ((uintptr_t)&spapr_cpu->dtl_addr + 8));
 823     reg.id = KVM_REG_PPC_VPA_DTL;
 824     reg.addr = (uintptr_t)&spapr_cpu->dtl_addr;
 825     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 826     if (ret < 0) {
 827         DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
 828                 strerror(errno));
 829         return ret;
 830     }
 831
 832     if (!spapr_cpu->vpa_addr) {
 833         reg.id = KVM_REG_PPC_VPA_ADDR;
 834         reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
 835         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 836         if (ret < 0) {
 837             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 838             return ret;
 839         }
 840     }
 841
 842     return 0;
 843 }
 844 #endif /* TARGET_PPC64 */
 845
 846 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
 847 {
 848     CPUPPCState *env = &cpu->env;
 849     struct kvm_sregs sregs;
 850     int i;
 851
 852     sregs.pvr = env->spr[SPR_PVR];
 853
 854     if (cpu->vhyp) {
 855         PPCVirtualHypervisorClass *vhc =
 856             PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
 857         sregs.u.s.sdr1 = vhc->encode_hpt_for_kvm_pr(cpu->vhyp);
 858     } else {
 859         sregs.u.s.sdr1 = env->spr[SPR_SDR1];
 860     }
 861
 862     /* Sync SLB */
 863 #ifdef TARGET_PPC64
 864     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
 865         sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
 866         if (env->slb[i].esid & SLB_ESID_V) {
 867             sregs.u.s.ppc64.slb[i].slbe |= i;
 868         }
 869         sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
 870     }
 871 #endif
 872
 873     /* Sync SRs */
 874     for (i = 0; i < 16; i++) {
 875         sregs.u.s.ppc32.sr[i] = env->sr[i];
 876     }
 877
 878     /* Sync BATs */
 879     for (i = 0; i < 8; i++) {
 880         /* Beware. We have to swap upper and lower bits here */
 881         sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
 882             | env->DBAT[1][i];
 883         sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
 884             | env->IBAT[1][i];
 885     }
 886
 887     return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
 888 }
 889
 890 int kvm_arch_put_registers(CPUState *cs, int level)
 891 {
 892     PowerPCCPU *cpu = POWERPC_CPU(cs);
 893     CPUPPCState *env = &cpu->env;
 894     struct kvm_regs regs;
 895     int ret;
 896     int i;
 897
 898     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
 899     if (ret < 0) {
 900         return ret;
 901     }
 902
 903     regs.ctr = env->ctr;
 904     regs.lr  = env->lr;
 905     regs.xer = cpu_read_xer(env);
 906     regs.msr = env->msr;
 907     regs.pc = env->nip;
 908
 909     regs.srr0 = env->spr[SPR_SRR0];
 910     regs.srr1 = env->spr[SPR_SRR1];
 911
 912     regs.sprg0 = env->spr[SPR_SPRG0];
 913     regs.sprg1 = env->spr[SPR_SPRG1];
 914     regs.sprg2 = env->spr[SPR_SPRG2];
 915     regs.sprg3 = env->spr[SPR_SPRG3];
 916     regs.sprg4 = env->spr[SPR_SPRG4];
 917     regs.sprg5 = env->spr[SPR_SPRG5];
 918     regs.sprg6 = env->spr[SPR_SPRG6];
 919     regs.sprg7 = env->spr[SPR_SPRG7];
 920
 921     regs.pid = env->spr[SPR_BOOKE_PID];
 922
 923     for (i = 0;i < 32; i++)
 924         regs.gpr[i] = env->gpr[i];
 925
 926     regs.cr = 0;
 927     for (i = 0; i < 8; i++) {
 928         regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
 929     }
 930
 931     ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
 932     if (ret < 0)
 933         return ret;
 934
 935     kvm_put_fp(cs);
 936
 937     if (env->tlb_dirty) {
 938         kvm_sw_tlb_put(cpu);
 939         env->tlb_dirty = false;
 940     }
 941
 942     if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
 943         ret = kvmppc_put_books_sregs(cpu);
 944         if (ret < 0) {
 945             return ret;
 946         }
 947     }
 948
 949     if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
 950         kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
 951     }
 952
 953     if (cap_one_reg) {
 954         int i;
 955
 956         /* We deliberately ignore errors here, for kernels which have
 957          * the ONE_REG calls, but don't support the specific
 958          * registers, there's a reasonable chance things will still
 959          * work, at least until we try to migrate. */
 960         for (i = 0; i < 1024; i++) {
 961             uint64_t id = env->spr_cb[i].one_reg_id;
 962
 963             if (id != 0) {
 964                 kvm_put_one_spr(cs, id, i);
 965             }
 966         }
 967
 968 #ifdef TARGET_PPC64
 969         if (msr_ts) {
 970             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
 971                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
 972             }
 973             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
 974                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
 975             }
 976             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
 977             kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
 978             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
 979             kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
 980             kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
 981             kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
 982             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
 983             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
 984             kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
 985             kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
 986         }
 987
 988         if (cap_papr) {
 989             if (kvm_put_vpa(cs) < 0) {
 990                 DPRINTF("Warning: Unable to set VPA information to KVM\n");
 991             }
 992         }
 993
 994         kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
 995 #endif /* TARGET_PPC64 */
 996     }
 997
 998     return ret;
 999 }
1000
1001 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1002 {
1003      env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1004 }
1005
1006 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1007 {
1008     CPUPPCState *env = &cpu->env;
1009     struct kvm_sregs sregs;
1010     int ret;
1011
1012     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1013     if (ret < 0) {
1014         return ret;
1015     }
1016
1017     if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1018         env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1019         env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1020         env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1021         env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1022         env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1023         env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1024         env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1025         env->spr[SPR_DECR] = sregs.u.e.dec;
1026         env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1027         env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1028         env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1029     }
1030
1031     if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1032         env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1033         env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1034         env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1035         env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1036         env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1037     }
1038
1039     if (sregs.u.e.features & KVM_SREGS_E_64) {
1040         env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1041     }
1042
1043     if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1044         env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1045     }
1046
1047     if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1048         env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1049         kvm_sync_excp(env, POWERPC_EXCP_CRITICAL,  SPR_BOOKE_IVOR0);
1050         env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1051         kvm_sync_excp(env, POWERPC_EXCP_MCHECK,  SPR_BOOKE_IVOR1);
1052         env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1053         kvm_sync_excp(env, POWERPC_EXCP_DSI,  SPR_BOOKE_IVOR2);
1054         env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1055         kvm_sync_excp(env, POWERPC_EXCP_ISI,  SPR_BOOKE_IVOR3);
1056         env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1057         kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL,  SPR_BOOKE_IVOR4);
1058         env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1059         kvm_sync_excp(env, POWERPC_EXCP_ALIGN,  SPR_BOOKE_IVOR5);
1060         env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1061         kvm_sync_excp(env, POWERPC_EXCP_PROGRAM,  SPR_BOOKE_IVOR6);
1062         env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1063         kvm_sync_excp(env, POWERPC_EXCP_FPU,  SPR_BOOKE_IVOR7);
1064         env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1065         kvm_sync_excp(env, POWERPC_EXCP_SYSCALL,  SPR_BOOKE_IVOR8);
1066         env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1067         kvm_sync_excp(env, POWERPC_EXCP_APU,  SPR_BOOKE_IVOR9);
1068         env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1069         kvm_sync_excp(env, POWERPC_EXCP_DECR,  SPR_BOOKE_IVOR10);
1070         env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1071         kvm_sync_excp(env, POWERPC_EXCP_FIT,  SPR_BOOKE_IVOR11);
1072         env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1073         kvm_sync_excp(env, POWERPC_EXCP_WDT,  SPR_BOOKE_IVOR12);
1074         env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1075         kvm_sync_excp(env, POWERPC_EXCP_DTLB,  SPR_BOOKE_IVOR13);
1076         env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1077         kvm_sync_excp(env, POWERPC_EXCP_ITLB,  SPR_BOOKE_IVOR14);
1078         env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1079         kvm_sync_excp(env, POWERPC_EXCP_DEBUG,  SPR_BOOKE_IVOR15);
1080
1081         if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1082             env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1083             kvm_sync_excp(env, POWERPC_EXCP_SPEU,  SPR_BOOKE_IVOR32);
1084             env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1085             kvm_sync_excp(env, POWERPC_EXCP_EFPDI,  SPR_BOOKE_IVOR33);
1086             env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1087             kvm_sync_excp(env, POWERPC_EXCP_EFPRI,  SPR_BOOKE_IVOR34);
1088         }
1089
1090         if (sregs.u.e.features & KVM_SREGS_E_PM) {
1091             env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1092             kvm_sync_excp(env, POWERPC_EXCP_EPERFM,  SPR_BOOKE_IVOR35);
1093         }
1094
1095         if (sregs.u.e.features & KVM_SREGS_E_PC) {
1096             env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1097             kvm_sync_excp(env, POWERPC_EXCP_DOORI,  SPR_BOOKE_IVOR36);
1098             env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1099             kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1100         }
1101     }
1102
1103     if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1104         env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1105         env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1106         env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1107         env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1108         env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1109         env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1110         env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1111         env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1112         env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1113         env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1114     }
1115
1116     if (sregs.u.e.features & KVM_SREGS_EXP) {
1117         env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1118     }
1119
1120     if (sregs.u.e.features & KVM_SREGS_E_PD) {
1121         env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1122         env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1123     }
1124
1125     if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1126         env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1127         env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1128         env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1129
1130         if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1131             env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1132             env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1133         }
1134     }
1135
1136     return 0;
1137 }
1138
1139 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1140 {
1141     CPUPPCState *env = &cpu->env;
1142     struct kvm_sregs sregs;
1143     int ret;
1144     int i;
1145
1146     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1147     if (ret < 0) {
1148         return ret;
1149     }
1150
1151     if (!cpu->vhyp) {
1152         ppc_store_sdr1(env, sregs.u.s.sdr1);
1153     }
1154
1155     /* Sync SLB */
1156 #ifdef TARGET_PPC64
1157     /*
1158      * The packed SLB array we get from KVM_GET_SREGS only contains
1159      * information about valid entries. So we flush our internal copy
1160      * to get rid of stale ones, then put all valid SLB entries back
1161      * in.
1162      */
1163     memset(env->slb, 0, sizeof(env->slb));
1164     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1165         target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1166         target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1167         /*
1168          * Only restore valid entries
1169          */
1170         if (rb & SLB_ESID_V) {
1171             ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1172         }
1173     }
1174 #endif
1175
1176     /* Sync SRs */
1177     for (i = 0; i < 16; i++) {
1178         env->sr[i] = sregs.u.s.ppc32.sr[i];
1179     }
1180
1181     /* Sync BATs */
1182     for (i = 0; i < 8; i++) {
1183         env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1184         env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1185         env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1186         env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1187     }
1188
1189     return 0;
1190 }
1191
1192 int kvm_arch_get_registers(CPUState *cs)
1193 {
1194     PowerPCCPU *cpu = POWERPC_CPU(cs);
1195     CPUPPCState *env = &cpu->env;
1196     struct kvm_regs regs;
1197     uint32_t cr;
1198     int i, ret;
1199
1200     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1201     if (ret < 0)
1202         return ret;
1203
1204     cr = regs.cr;
1205     for (i = 7; i >= 0; i--) {
1206         env->crf[i] = cr & 15;
1207         cr >>= 4;
1208     }
1209
1210     env->ctr = regs.ctr;
1211     env->lr = regs.lr;
1212     cpu_write_xer(env, regs.xer);
1213     env->msr = regs.msr;
1214     env->nip = regs.pc;
1215
1216     env->spr[SPR_SRR0] = regs.srr0;
1217     env->spr[SPR_SRR1] = regs.srr1;
1218
1219     env->spr[SPR_SPRG0] = regs.sprg0;
1220     env->spr[SPR_SPRG1] = regs.sprg1;
1221     env->spr[SPR_SPRG2] = regs.sprg2;
1222     env->spr[SPR_SPRG3] = regs.sprg3;
1223     env->spr[SPR_SPRG4] = regs.sprg4;
1224     env->spr[SPR_SPRG5] = regs.sprg5;
1225     env->spr[SPR_SPRG6] = regs.sprg6;
1226     env->spr[SPR_SPRG7] = regs.sprg7;
1227
1228     env->spr[SPR_BOOKE_PID] = regs.pid;
1229
1230     for (i = 0;i < 32; i++)
1231         env->gpr[i] = regs.gpr[i];
1232
1233     kvm_get_fp(cs);
1234
1235     if (cap_booke_sregs) {
1236         ret = kvmppc_get_booke_sregs(cpu);
1237         if (ret < 0) {
1238             return ret;
1239         }
1240     }
1241
1242     if (cap_segstate) {
1243         ret = kvmppc_get_books_sregs(cpu);
1244         if (ret < 0) {
1245             return ret;
1246         }
1247     }
1248
1249     if (cap_hior) {
1250         kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1251     }
1252
1253     if (cap_one_reg) {
1254         int i;
1255
1256         /* We deliberately ignore errors here, for kernels which have
1257          * the ONE_REG calls, but don't support the specific
1258          * registers, there's a reasonable chance things will still
1259          * work, at least until we try to migrate. */
1260         for (i = 0; i < 1024; i++) {
1261             uint64_t id = env->spr_cb[i].one_reg_id;
1262
1263             if (id != 0) {
1264                 kvm_get_one_spr(cs, id, i);
1265             }
1266         }
1267
1268 #ifdef TARGET_PPC64
1269         if (msr_ts) {
1270             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1271                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1272             }
1273             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1274                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1275             }
1276             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1277             kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1278             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1279             kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1280             kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1281             kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1282             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1283             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1284             kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1285             kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1286         }
1287
1288         if (cap_papr) {
1289             if (kvm_get_vpa(cs) < 0) {
1290                 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1291             }
1292         }
1293
1294         kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1295 #endif
1296     }
1297
1298     return 0;
1299 }
1300
1301 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1302 {
1303     unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1304
1305     if (irq != PPC_INTERRUPT_EXT) {
1306         return 0;
1307     }
1308
1309     if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1310         return 0;
1311     }
1312
1313     kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1314
1315     return 0;
1316 }
1317
1318 #if defined(TARGET_PPCEMB)
1319 #define PPC_INPUT_INT PPC40x_INPUT_INT
1320 #elif defined(TARGET_PPC64)
1321 #define PPC_INPUT_INT PPC970_INPUT_INT
1322 #else
1323 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1324 #endif
1325
1326 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1327 {
1328     PowerPCCPU *cpu = POWERPC_CPU(cs);
1329     CPUPPCState *env = &cpu->env;
1330     int r;
1331     unsigned irq;
1332
1333     qemu_mutex_lock_iothread();
1334
1335     /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1336      * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1337     if (!cap_interrupt_level &&
1338         run->ready_for_interrupt_injection &&
1339         (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1340         (env->irq_input_state & (1<<PPC_INPUT_INT)))
1341     {
1342         /* For now KVM disregards the 'irq' argument. However, in the
1343          * future KVM could cache it in-kernel to avoid a heavyweight exit
1344          * when reading the UIC.
1345          */
1346         irq = KVM_INTERRUPT_SET;
1347
1348         DPRINTF("injected interrupt %d\n", irq);
1349         r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1350         if (r < 0) {
1351             printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1352         }
1353
1354         /* Always wake up soon in case the interrupt was level based */
1355         timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1356                        (NANOSECONDS_PER_SECOND / 50));
1357     }
1358
1359     /* We don't know if there are more interrupts pending after this. However,
1360      * the guest will return to userspace in the course of handling this one
1361      * anyways, so we will get a chance to deliver the rest. */
1362
1363     qemu_mutex_unlock_iothread();
1364 }
1365
1366 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1367 {
1368     return MEMTXATTRS_UNSPECIFIED;
1369 }
1370
1371 int kvm_arch_process_async_events(CPUState *cs)
1372 {
1373     return cs->halted;
1374 }
1375
1376 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1377 {
1378     CPUState *cs = CPU(cpu);
1379     CPUPPCState *env = &cpu->env;
1380
1381     if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1382         cs->halted = 1;
1383         cs->exception_index = EXCP_HLT;
1384     }
1385
1386     return 0;
1387 }
1388
1389 /* map dcr access to existing qemu dcr emulation */
1390 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1391 {
1392     if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1393         fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1394
1395     return 0;
1396 }
1397
1398 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1399 {
1400     if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1401         fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1402
1403     return 0;
1404 }
1405
1406 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1407 {
1408     /* Mixed endian case is not handled */
1409     uint32_t sc = debug_inst_opcode;
1410
1411     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1412                             sizeof(sc), 0) ||
1413         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1414         return -EINVAL;
1415     }
1416
1417     return 0;
1418 }
1419
1420 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1421 {
1422     uint32_t sc;
1423
1424     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1425         sc != debug_inst_opcode ||
1426         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1427                             sizeof(sc), 1)) {
1428         return -EINVAL;
1429     }
1430
1431     return 0;
1432 }
1433
1434 static int find_hw_breakpoint(target_ulong addr, int type)
1435 {
1436     int n;
1437
1438     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1439            <= ARRAY_SIZE(hw_debug_points));
1440
1441     for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1442         if (hw_debug_points[n].addr == addr &&
1443              hw_debug_points[n].type == type) {
1444             return n;
1445         }
1446     }
1447
1448     return -1;
1449 }
1450
1451 static int find_hw_watchpoint(target_ulong addr, int *flag)
1452 {
1453     int n;
1454
1455     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1456     if (n >= 0) {
1457         *flag = BP_MEM_ACCESS;
1458         return n;
1459     }
1460
1461     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1462     if (n >= 0) {
1463         *flag = BP_MEM_WRITE;
1464         return n;
1465     }
1466
1467     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1468     if (n >= 0) {
1469         *flag = BP_MEM_READ;
1470         return n;
1471     }
1472
1473     return -1;
1474 }
1475
1476 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1477                                   target_ulong len, int type)
1478 {
1479     if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1480         return -ENOBUFS;
1481     }
1482
1483     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1484     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1485
1486     switch (type) {
1487     case GDB_BREAKPOINT_HW:
1488         if (nb_hw_breakpoint >= max_hw_breakpoint) {
1489             return -ENOBUFS;
1490         }
1491
1492         if (find_hw_breakpoint(addr, type) >= 0) {
1493             return -EEXIST;
1494         }
1495
1496         nb_hw_breakpoint++;
1497         break;
1498
1499     case GDB_WATCHPOINT_WRITE:
1500     case GDB_WATCHPOINT_READ:
1501     case GDB_WATCHPOINT_ACCESS:
1502         if (nb_hw_watchpoint >= max_hw_watchpoint) {
1503             return -ENOBUFS;
1504         }
1505
1506         if (find_hw_breakpoint(addr, type) >= 0) {
1507             return -EEXIST;
1508         }
1509
1510         nb_hw_watchpoint++;
1511         break;
1512
1513     default:
1514         return -ENOSYS;
1515     }
1516
1517     return 0;
1518 }
1519
1520 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1521                                   target_ulong len, int type)
1522 {
1523     int n;
1524
1525     n = find_hw_breakpoint(addr, type);
1526     if (n < 0) {
1527         return -ENOENT;
1528     }
1529
1530     switch (type) {
1531     case GDB_BREAKPOINT_HW:
1532         nb_hw_breakpoint--;
1533         break;
1534
1535     case GDB_WATCHPOINT_WRITE:
1536     case GDB_WATCHPOINT_READ:
1537     case GDB_WATCHPOINT_ACCESS:
1538         nb_hw_watchpoint--;
1539         break;
1540
1541     default:
1542         return -ENOSYS;
1543     }
1544     hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1545
1546     return 0;
1547 }
1548
1549 void kvm_arch_remove_all_hw_breakpoints(void)
1550 {
1551     nb_hw_breakpoint = nb_hw_watchpoint = 0;
1552 }
1553
1554 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1555 {
1556     int n;
1557
1558     /* Software Breakpoint updates */
1559     if (kvm_sw_breakpoints_active(cs)) {
1560         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1561     }
1562
1563     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1564            <= ARRAY_SIZE(hw_debug_points));
1565     assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1566
1567     if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1568         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1569         memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1570         for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1571             switch (hw_debug_points[n].type) {
1572             case GDB_BREAKPOINT_HW:
1573                 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1574                 break;
1575             case GDB_WATCHPOINT_WRITE:
1576                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1577                 break;
1578             case GDB_WATCHPOINT_READ:
1579                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1580                 break;
1581             case GDB_WATCHPOINT_ACCESS:
1582                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1583                                         KVMPPC_DEBUG_WATCH_READ;
1584                 break;
1585             default:
1586                 cpu_abort(cs, "Unsupported breakpoint type\n");
1587             }
1588             dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1589         }
1590     }
1591 }
1592
1593 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1594 {
1595     CPUState *cs = CPU(cpu);
1596     CPUPPCState *env = &cpu->env;
1597     struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1598     int handle = 0;
1599     int n;
1600     int flag = 0;
1601
1602     if (cs->singlestep_enabled) {
1603         handle = 1;
1604     } else if (arch_info->status) {
1605         if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1606             if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1607                 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1608                 if (n >= 0) {
1609                     handle = 1;
1610                 }
1611             } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1612                                             KVMPPC_DEBUG_WATCH_WRITE)) {
1613                 n = find_hw_watchpoint(arch_info->address,  &flag);
1614                 if (n >= 0) {
1615                     handle = 1;
1616                     cs->watchpoint_hit = &hw_watchpoint;
1617                     hw_watchpoint.vaddr = hw_debug_points[n].addr;
1618                     hw_watchpoint.flags = flag;
1619                 }
1620             }
1621         }
1622     } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1623         handle = 1;
1624     } else {
1625         /* QEMU is not able to handle debug exception, so inject
1626          * program exception to guest;
1627          * Yes program exception NOT debug exception !!
1628          * When QEMU is using debug resources then debug exception must
1629          * be always set. To achieve this we set MSR_DE and also set
1630          * MSRP_DEP so guest cannot change MSR_DE.
1631          * When emulating debug resource for guest we want guest
1632          * to control MSR_DE (enable/disable debug interrupt on need).
1633          * Supporting both configurations are NOT possible.
1634          * So the result is that we cannot share debug resources
1635          * between QEMU and Guest on BOOKE architecture.
1636          * In the current design QEMU gets the priority over guest,
1637          * this means that if QEMU is using debug resources then guest
1638          * cannot use them;
1639          * For software breakpoint QEMU uses a privileged instruction;
1640          * So there cannot be any reason that we are here for guest
1641          * set debug exception, only possibility is guest executed a
1642          * privileged / illegal instruction and that's why we are
1643          * injecting a program interrupt.
1644          */
1645
1646         cpu_synchronize_state(cs);
1647         /* env->nip is PC, so increment this by 4 to use
1648          * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1649          */
1650         env->nip += 4;
1651         cs->exception_index = POWERPC_EXCP_PROGRAM;
1652         env->error_code = POWERPC_EXCP_INVAL;
1653         ppc_cpu_do_interrupt(cs);
1654     }
1655
1656     return handle;
1657 }
1658
1659 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1660 {
1661     PowerPCCPU *cpu = POWERPC_CPU(cs);
1662     CPUPPCState *env = &cpu->env;
1663     int ret;
1664
1665     qemu_mutex_lock_iothread();
1666
1667     switch (run->exit_reason) {
1668     case KVM_EXIT_DCR:
1669         if (run->dcr.is_write) {
1670             DPRINTF("handle dcr write\n");
1671             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1672         } else {
1673             DPRINTF("handle dcr read\n");
1674             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1675         }
1676         break;
1677     case KVM_EXIT_HLT:
1678         DPRINTF("handle halt\n");
1679         ret = kvmppc_handle_halt(cpu);
1680         break;
1681 #if defined(TARGET_PPC64)
1682     case KVM_EXIT_PAPR_HCALL:
1683         DPRINTF("handle PAPR hypercall\n");
1684         run->papr_hcall.ret = spapr_hypercall(cpu,
1685                                               run->papr_hcall.nr,
1686                                               run->papr_hcall.args);
1687         ret = 0;
1688         break;
1689 #endif
1690     case KVM_EXIT_EPR:
1691         DPRINTF("handle epr\n");
1692         run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1693         ret = 0;
1694         break;
1695     case KVM_EXIT_WATCHDOG:
1696         DPRINTF("handle watchdog expiry\n");
1697         watchdog_perform_action();
1698         ret = 0;
1699         break;
1700
1701     case KVM_EXIT_DEBUG:
1702         DPRINTF("handle debug exception\n");
1703         if (kvm_handle_debug(cpu, run)) {
1704             ret = EXCP_DEBUG;
1705             break;
1706         }
1707         /* re-enter, this exception was guest-internal */
1708         ret = 0;
1709         break;
1710
1711     default:
1712         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1713         ret = -1;
1714         break;
1715     }
1716
1717     qemu_mutex_unlock_iothread();
1718     return ret;
1719 }
1720
1721 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1722 {
1723     CPUState *cs = CPU(cpu);
1724     uint32_t bits = tsr_bits;
1725     struct kvm_one_reg reg = {
1726         .id = KVM_REG_PPC_OR_TSR,
1727         .addr = (uintptr_t) &bits,
1728     };
1729
1730     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1731 }
1732
1733 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1734 {
1735
1736     CPUState *cs = CPU(cpu);
1737     uint32_t bits = tsr_bits;
1738     struct kvm_one_reg reg = {
1739         .id = KVM_REG_PPC_CLEAR_TSR,
1740         .addr = (uintptr_t) &bits,
1741     };
1742
1743     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1744 }
1745
1746 int kvmppc_set_tcr(PowerPCCPU *cpu)
1747 {
1748     CPUState *cs = CPU(cpu);
1749     CPUPPCState *env = &cpu->env;
1750     uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1751
1752     struct kvm_one_reg reg = {
1753         .id = KVM_REG_PPC_TCR,
1754         .addr = (uintptr_t) &tcr,
1755     };
1756
1757     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1758 }
1759
1760 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1761 {
1762     CPUState *cs = CPU(cpu);
1763     int ret;
1764
1765     if (!kvm_enabled()) {
1766         return -1;
1767     }
1768
1769     if (!cap_ppc_watchdog) {
1770         printf("warning: KVM does not support watchdog");
1771         return -1;
1772     }
1773
1774     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1775     if (ret < 0) {
1776         fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1777                 __func__, strerror(-ret));
1778         return ret;
1779     }
1780
1781     return ret;
1782 }
1783
1784 static int read_cpuinfo(const char *field, char *value, int len)
1785 {
1786     FILE *f;
1787     int ret = -1;
1788     int field_len = strlen(field);
1789     char line[512];
1790
1791     f = fopen("/proc/cpuinfo", "r");
1792     if (!f) {
1793         return -1;
1794     }
1795
1796     do {
1797         if (!fgets(line, sizeof(line), f)) {
1798             break;
1799         }
1800         if (!strncmp(line, field, field_len)) {
1801             pstrcpy(value, len, line);
1802             ret = 0;
1803             break;
1804         }
1805     } while(*line);
1806
1807     fclose(f);
1808
1809     return ret;
1810 }
1811
1812 uint32_t kvmppc_get_tbfreq(void)
1813 {
1814     char line[512];
1815     char *ns;
1816     uint32_t retval = NANOSECONDS_PER_SECOND;
1817
1818     if (read_cpuinfo("timebase", line, sizeof(line))) {
1819         return retval;
1820     }
1821
1822     if (!(ns = strchr(line, ':'))) {
1823         return retval;
1824     }
1825
1826     ns++;
1827
1828     return atoi(ns);
1829 }
1830
1831 bool kvmppc_get_host_serial(char **value)
1832 {
1833     return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1834                                NULL);
1835 }
1836
1837 bool kvmppc_get_host_model(char **value)
1838 {
1839     return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1840 }
1841
1842 /* Try to find a device tree node for a CPU with clock-frequency property */
1843 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1844 {
1845     struct dirent *dirp;
1846     DIR *dp;
1847
1848     if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1849         printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1850         return -1;
1851     }
1852
1853     buf[0] = '\0';
1854     while ((dirp = readdir(dp)) != NULL) {
1855         FILE *f;
1856         snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1857                  dirp->d_name);
1858         f = fopen(buf, "r");
1859         if (f) {
1860             snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1861             fclose(f);
1862             break;
1863         }
1864         buf[0] = '\0';
1865     }
1866     closedir(dp);
1867     if (buf[0] == '\0') {
1868         printf("Unknown host!\n");
1869         return -1;
1870     }
1871
1872     return 0;
1873 }
1874
1875 static uint64_t kvmppc_read_int_dt(const char *filename)
1876 {
1877     union {
1878         uint32_t v32;
1879         uint64_t v64;
1880     } u;
1881     FILE *f;
1882     int len;
1883
1884     f = fopen(filename, "rb");
1885     if (!f) {
1886         return -1;
1887     }
1888
1889     len = fread(&u, 1, sizeof(u), f);
1890     fclose(f);
1891     switch (len) {
1892     case 4:
1893         /* property is a 32-bit quantity */
1894         return be32_to_cpu(u.v32);
1895     case 8:
1896         return be64_to_cpu(u.v64);
1897     }
1898
1899     return 0;
1900 }
1901
1902 /* Read a CPU node property from the host device tree that's a single
1903  * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1904  * (can't find or open the property, or doesn't understand the
1905  * format) */
1906 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1907 {
1908     char buf[PATH_MAX], *tmp;
1909     uint64_t val;
1910
1911     if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1912         return -1;
1913     }
1914
1915     tmp = g_strdup_printf("%s/%s", buf, propname);
1916     val = kvmppc_read_int_dt(tmp);
1917     g_free(tmp);
1918
1919     return val;
1920 }
1921
1922 uint64_t kvmppc_get_clockfreq(void)
1923 {
1924     return kvmppc_read_int_cpu_dt("clock-frequency");
1925 }
1926
1927 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1928  {
1929      PowerPCCPU *cpu = ppc_env_get_cpu(env);
1930      CPUState *cs = CPU(cpu);
1931
1932     if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1933         !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1934         return 0;
1935     }
1936
1937     return 1;
1938 }
1939
1940 int kvmppc_get_hasidle(CPUPPCState *env)
1941 {
1942     struct kvm_ppc_pvinfo pvinfo;
1943
1944     if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1945         (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1946         return 1;
1947     }
1948
1949     return 0;
1950 }
1951
1952 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1953 {
1954     uint32_t *hc = (uint32_t*)buf;
1955     struct kvm_ppc_pvinfo pvinfo;
1956
1957     if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1958         memcpy(buf, pvinfo.hcall, buf_len);
1959         return 0;
1960     }
1961
1962     /*
1963      * Fallback to always fail hypercalls regardless of endianness:
1964      *
1965      *     tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
1966      *     li r3, -1
1967      *     b .+8       (becomes nop in wrong endian)
1968      *     bswap32(li r3, -1)
1969      */
1970
1971     hc[0] = cpu_to_be32(0x08000048);
1972     hc[1] = cpu_to_be32(0x3860ffff);
1973     hc[2] = cpu_to_be32(0x48000008);
1974     hc[3] = cpu_to_be32(bswap32(0x3860ffff));
1975
1976     return 1;
1977 }
1978
1979 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
1980 {
1981     return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
1982 }
1983
1984 void kvmppc_enable_logical_ci_hcalls(void)
1985 {
1986     /*
1987      * FIXME: it would be nice if we could detect the cases where
1988      * we're using a device which requires the in kernel
1989      * implementation of these hcalls, but the kernel lacks them and
1990      * produce a warning.
1991      */
1992     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
1993     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
1994 }
1995
1996 void kvmppc_enable_set_mode_hcall(void)
1997 {
1998     kvmppc_enable_hcall(kvm_state, H_SET_MODE);
1999 }
2000
2001 void kvmppc_enable_clear_ref_mod_hcalls(void)
2002 {
2003     kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
2004     kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
2005 }
2006
2007 void kvmppc_set_papr(PowerPCCPU *cpu)
2008 {
2009     CPUState *cs = CPU(cpu);
2010     int ret;
2011
2012     if (!kvm_enabled()) {
2013         return;
2014     }
2015
2016     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2017     if (ret) {
2018         error_report("This vCPU type or KVM version does not support PAPR");
2019         exit(1);
2020     }
2021
2022     /* Update the capability flag so we sync the right information
2023      * with kvm */
2024     cap_papr = 1;
2025 }
2026
2027 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr)
2028 {
2029     return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr);
2030 }
2031
2032 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2033 {
2034     CPUState *cs = CPU(cpu);
2035     int ret;
2036
2037     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2038     if (ret && mpic_proxy) {
2039         error_report("This KVM version does not support EPR");
2040         exit(1);
2041     }
2042 }
2043
2044 int kvmppc_smt_threads(void)
2045 {
2046     return cap_ppc_smt ? cap_ppc_smt : 1;
2047 }
2048
2049 int kvmppc_set_smt_threads(int smt)
2050 {
2051     int ret;
2052
2053     ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_SMT, 0, smt, 0);
2054     if (!ret) {
2055         cap_ppc_smt = smt;
2056     }
2057     return ret;
2058 }
2059
2060 void kvmppc_hint_smt_possible(Error **errp)
2061 {
2062     int i;
2063     GString *g;
2064     char *s;
2065
2066     assert(kvm_enabled());
2067     if (cap_ppc_smt_possible) {
2068         g = g_string_new("Available VSMT modes:");
2069         for (i = 63; i >= 0; i--) {
2070             if ((1UL << i) & cap_ppc_smt_possible) {
2071                 g_string_append_printf(g, " %lu", (1UL << i));
2072             }
2073         }
2074         s = g_string_free(g, false);
2075         error_append_hint(errp, "%s.\n", s);
2076         g_free(s);
2077     } else {
2078         error_append_hint(errp,
2079                           "This KVM seems to be too old to support VSMT.\n");
2080     }
2081 }
2082
2083
2084 #ifdef TARGET_PPC64
2085 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2086 {
2087     struct kvm_ppc_smmu_info info;
2088     long rampagesize, best_page_shift;
2089     int i;
2090
2091     /* Find the largest hardware supported page size that's less than
2092      * or equal to the (logical) backing page size of guest RAM */
2093     kvm_get_smmu_info(&info, &error_fatal);
2094     rampagesize = qemu_getrampagesize();
2095     best_page_shift = 0;
2096
2097     for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2098         struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2099
2100         if (!sps->page_shift) {
2101             continue;
2102         }
2103
2104         if ((sps->page_shift > best_page_shift)
2105             && ((1UL << sps->page_shift) <= rampagesize)) {
2106             best_page_shift = sps->page_shift;
2107         }
2108     }
2109
2110     return MIN(current_size,
2111                1ULL << (best_page_shift + hash_shift - 7));
2112 }
2113 #endif
2114
2115 bool kvmppc_spapr_use_multitce(void)
2116 {
2117     return cap_spapr_multitce;
2118 }
2119
2120 int kvmppc_spapr_enable_inkernel_multitce(void)
2121 {
2122     int ret;
2123
2124     ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2125                             H_PUT_TCE_INDIRECT, 1);
2126     if (!ret) {
2127         ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2128                                 H_STUFF_TCE, 1);
2129     }
2130
2131     return ret;
2132 }
2133
2134 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
2135                               uint64_t bus_offset, uint32_t nb_table,
2136                               int *pfd, bool need_vfio)
2137 {
2138     long len;
2139     int fd;
2140     void *table;
2141
2142     /* Must set fd to -1 so we don't try to munmap when called for
2143      * destroying the table, which the upper layers -will- do
2144      */
2145     *pfd = -1;
2146     if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2147         return NULL;
2148     }
2149
2150     if (cap_spapr_tce_64) {
2151         struct kvm_create_spapr_tce_64 args = {
2152             .liobn = liobn,
2153             .page_shift = page_shift,
2154             .offset = bus_offset >> page_shift,
2155             .size = nb_table,
2156             .flags = 0
2157         };
2158         fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args);
2159         if (fd < 0) {
2160             fprintf(stderr,
2161                     "KVM: Failed to create TCE64 table for liobn 0x%x\n",
2162                     liobn);
2163             return NULL;
2164         }
2165     } else if (cap_spapr_tce) {
2166         uint64_t window_size = (uint64_t) nb_table << page_shift;
2167         struct kvm_create_spapr_tce args = {
2168             .liobn = liobn,
2169             .window_size = window_size,
2170         };
2171         if ((window_size != args.window_size) || bus_offset) {
2172             return NULL;
2173         }
2174         fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2175         if (fd < 0) {
2176             fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2177                     liobn);
2178             return NULL;
2179         }
2180     } else {
2181         return NULL;
2182     }
2183
2184     len = nb_table * sizeof(uint64_t);
2185     /* FIXME: round this up to page size */
2186
2187     table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2188     if (table == MAP_FAILED) {
2189         fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2190                 liobn);
2191         close(fd);
2192         return NULL;
2193     }
2194
2195     *pfd = fd;
2196     return table;
2197 }
2198
2199 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2200 {
2201     long len;
2202
2203     if (fd < 0) {
2204         return -1;
2205     }
2206
2207     len = nb_table * sizeof(uint64_t);
2208     if ((munmap(table, len) < 0) ||
2209         (close(fd) < 0)) {
2210         fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2211                 strerror(errno));
2212         /* Leak the table */
2213     }
2214
2215     return 0;
2216 }
2217
2218 int kvmppc_reset_htab(int shift_hint)
2219 {
2220     uint32_t shift = shift_hint;
2221
2222     if (!kvm_enabled()) {
2223         /* Full emulation, tell caller to allocate htab itself */
2224         return 0;
2225     }
2226     if (kvm_vm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2227         int ret;
2228         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2229         if (ret == -ENOTTY) {
2230             /* At least some versions of PR KVM advertise the
2231              * capability, but don't implement the ioctl().  Oops.
2232              * Return 0 so that we allocate the htab in qemu, as is
2233              * correct for PR. */
2234             return 0;
2235         } else if (ret < 0) {
2236             return ret;
2237         }
2238         return shift;
2239     }
2240
2241     /* We have a kernel that predates the htab reset calls.  For PR
2242      * KVM, we need to allocate the htab ourselves, for an HV KVM of
2243      * this era, it has allocated a 16MB fixed size hash table already. */
2244     if (kvmppc_is_pr(kvm_state)) {
2245         /* PR - tell caller to allocate htab */
2246         return 0;
2247     } else {
2248         /* HV - assume 16MB kernel allocated htab */
2249         return 24;
2250     }
2251 }
2252
2253 static inline uint32_t mfpvr(void)
2254 {
2255     uint32_t pvr;
2256
2257     asm ("mfpvr %0"
2258          : "=r"(pvr));
2259     return pvr;
2260 }
2261
2262 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2263 {
2264     if (on) {
2265         *word |= flags;
2266     } else {
2267         *word &= ~flags;
2268     }
2269 }
2270
2271 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2272 {
2273     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2274     uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2275     uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2276
2277     /* Now fix up the class with information we can query from the host */
2278     pcc->pvr = mfpvr();
2279
2280     alter_insns(&pcc->insns_flags, PPC_ALTIVEC,
2281                 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_ALTIVEC);
2282     alter_insns(&pcc->insns_flags2, PPC2_VSX,
2283                 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_VSX);
2284     alter_insns(&pcc->insns_flags2, PPC2_DFP,
2285                 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_DFP);
2286
2287     if (dcache_size != -1) {
2288         pcc->l1_dcache_size = dcache_size;
2289     }
2290
2291     if (icache_size != -1) {
2292         pcc->l1_icache_size = icache_size;
2293     }
2294
2295 #if defined(TARGET_PPC64)
2296     pcc->radix_page_info = kvm_get_radix_page_info();
2297
2298     if ((pcc->pvr & 0xffffff00) == CPU_POWERPC_POWER9_DD1) {
2299         /*
2300          * POWER9 DD1 has some bugs which make it not really ISA 3.00
2301          * compliant.  More importantly, advertising ISA 3.00
2302          * architected mode may prevent guests from activating
2303          * necessary DD1 workarounds.
2304          */
2305         pcc->pcr_supported &= ~(PCR_COMPAT_3_00 | PCR_COMPAT_2_07
2306                                 | PCR_COMPAT_2_06 | PCR_COMPAT_2_05);
2307     }
2308 #endif /* defined(TARGET_PPC64) */
2309 }
2310
2311 bool kvmppc_has_cap_epr(void)
2312 {
2313     return cap_epr;
2314 }
2315
2316 bool kvmppc_has_cap_fixup_hcalls(void)
2317 {
2318     return cap_fixup_hcalls;
2319 }
2320
2321 bool kvmppc_has_cap_htm(void)
2322 {
2323     return cap_htm;
2324 }
2325
2326 bool kvmppc_has_cap_mmu_radix(void)
2327 {
2328     return cap_mmu_radix;
2329 }
2330
2331 bool kvmppc_has_cap_mmu_hash_v3(void)
2332 {
2333     return cap_mmu_hash_v3;
2334 }
2335
2336 static bool kvmppc_power8_host(void)
2337 {
2338     bool ret = false;
2339 #ifdef TARGET_PPC64
2340     {
2341         uint32_t base_pvr = CPU_POWERPC_POWER_SERVER_MASK & mfpvr();
2342         ret = (base_pvr == CPU_POWERPC_POWER8E_BASE) ||
2343               (base_pvr == CPU_POWERPC_POWER8NVL_BASE) ||
2344               (base_pvr == CPU_POWERPC_POWER8_BASE);
2345     }
2346 #endif /* TARGET_PPC64 */
2347     return ret;
2348 }
2349
2350 static int parse_cap_ppc_safe_cache(struct kvm_ppc_cpu_char c)
2351 {
2352     bool l1d_thread_priv_req = !kvmppc_power8_host();
2353
2354     if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_L1D_FLUSH_PR) {
2355         return 2;
2356     } else if ((!l1d_thread_priv_req ||
2357                 c.character & c.character_mask & H_CPU_CHAR_L1D_THREAD_PRIV) &&
2358                (c.character & c.character_mask
2359                 & (H_CPU_CHAR_L1D_FLUSH_ORI30 | H_CPU_CHAR_L1D_FLUSH_TRIG2))) {
2360         return 1;
2361     }
2362
2363     return 0;
2364 }
2365
2366 static int parse_cap_ppc_safe_bounds_check(struct kvm_ppc_cpu_char c)
2367 {
2368     if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_BNDS_CHK_SPEC_BAR) {
2369         return 2;
2370     } else if (c.character & c.character_mask & H_CPU_CHAR_SPEC_BAR_ORI31) {
2371         return 1;
2372     }
2373
2374     return 0;
2375 }
2376
2377 static int parse_cap_ppc_safe_indirect_branch(struct kvm_ppc_cpu_char c)
2378 {
2379     if (c.character & c.character_mask & H_CPU_CHAR_CACHE_COUNT_DIS) {
2380         return  SPAPR_CAP_FIXED_CCD;
2381     } else if (c.character & c.character_mask & H_CPU_CHAR_BCCTRL_SERIALISED) {
2382         return SPAPR_CAP_FIXED_IBS;
2383     }
2384
2385     return 0;
2386 }
2387
2388 static void kvmppc_get_cpu_characteristics(KVMState *s)
2389 {
2390     struct kvm_ppc_cpu_char c;
2391     int ret;
2392
2393     /* Assume broken */
2394     cap_ppc_safe_cache = 0;
2395     cap_ppc_safe_bounds_check = 0;
2396     cap_ppc_safe_indirect_branch = 0;
2397
2398     ret = kvm_vm_check_extension(s, KVM_CAP_PPC_GET_CPU_CHAR);
2399     if (!ret) {
2400         return;
2401     }
2402     ret = kvm_vm_ioctl(s, KVM_PPC_GET_CPU_CHAR, &c);
2403     if (ret < 0) {
2404         return;
2405     }
2406
2407     cap_ppc_safe_cache = parse_cap_ppc_safe_cache(c);
2408     cap_ppc_safe_bounds_check = parse_cap_ppc_safe_bounds_check(c);
2409     cap_ppc_safe_indirect_branch = parse_cap_ppc_safe_indirect_branch(c);
2410 }
2411
2412 int kvmppc_get_cap_safe_cache(void)
2413 {
2414     return cap_ppc_safe_cache;
2415 }
2416
2417 int kvmppc_get_cap_safe_bounds_check(void)
2418 {
2419     return cap_ppc_safe_bounds_check;
2420 }
2421
2422 int kvmppc_get_cap_safe_indirect_branch(void)
2423 {
2424     return cap_ppc_safe_indirect_branch;
2425 }
2426
2427 bool kvmppc_has_cap_spapr_vfio(void)
2428 {
2429     return cap_spapr_vfio;
2430 }
2431
2432 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2433 {
2434     uint32_t host_pvr = mfpvr();
2435     PowerPCCPUClass *pvr_pcc;
2436
2437     pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2438     if (pvr_pcc == NULL) {
2439         pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2440     }
2441
2442     return pvr_pcc;
2443 }
2444
2445 static int kvm_ppc_register_host_cpu_type(MachineState *ms)
2446 {
2447     TypeInfo type_info = {
2448         .name = TYPE_HOST_POWERPC_CPU,
2449         .class_init = kvmppc_host_cpu_class_init,
2450     };
2451     MachineClass *mc = MACHINE_GET_CLASS(ms);
2452     PowerPCCPUClass *pvr_pcc;
2453     ObjectClass *oc;
2454     DeviceClass *dc;
2455     int i;
2456
2457     pvr_pcc = kvm_ppc_get_host_cpu_class();
2458     if (pvr_pcc == NULL) {
2459         return -1;
2460     }
2461     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2462     type_register(&type_info);
2463     if (object_dynamic_cast(OBJECT(ms), TYPE_SPAPR_MACHINE)) {
2464         /* override TCG default cpu type with 'host' cpu model */
2465         mc->default_cpu_type = TYPE_HOST_POWERPC_CPU;
2466     }
2467
2468     oc = object_class_by_name(type_info.name);
2469     g_assert(oc);
2470
2471     /*
2472      * Update generic CPU family class alias (e.g. on a POWER8NVL host,
2473      * we want "POWER8" to be a "family" alias that points to the current
2474      * host CPU type, too)
2475      */
2476     dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc));
2477     for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) {
2478         if (strcasecmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) {
2479             char *suffix;
2480
2481             ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc));
2482             suffix = strstr(ppc_cpu_aliases[i].model, POWERPC_CPU_TYPE_SUFFIX);
2483             if (suffix) {
2484                 *suffix = 0;
2485             }
2486             break;
2487         }
2488     }
2489
2490     return 0;
2491 }
2492
2493 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2494 {
2495     struct kvm_rtas_token_args args = {
2496         .token = token,
2497     };
2498
2499     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2500         return -ENOENT;
2501     }
2502
2503     strncpy(args.name, function, sizeof(args.name));
2504
2505     return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2506 }
2507
2508 int kvmppc_get_htab_fd(bool write, uint64_t index, Error **errp)
2509 {
2510     struct kvm_get_htab_fd s = {
2511         .flags = write ? KVM_GET_HTAB_WRITE : 0,
2512         .start_index = index,
2513     };
2514     int ret;
2515
2516     if (!cap_htab_fd) {
2517         error_setg(errp, "KVM version doesn't support %s the HPT",
2518                    write ? "writing" : "reading");
2519         return -ENOTSUP;
2520     }
2521
2522     ret = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2523     if (ret < 0) {
2524         error_setg(errp, "Unable to open fd for %s HPT %s KVM: %s",
2525                    write ? "writing" : "reading", write ? "to" : "from",
2526                    strerror(errno));
2527         return -errno;
2528     }
2529
2530     return ret;
2531 }
2532
2533 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2534 {
2535     int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2536     uint8_t buf[bufsize];
2537     ssize_t rc;
2538
2539     do {
2540         rc = read(fd, buf, bufsize);
2541         if (rc < 0) {
2542             fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2543                     strerror(errno));
2544             return rc;
2545         } else if (rc) {
2546             uint8_t *buffer = buf;
2547             ssize_t n = rc;
2548             while (n) {
2549                 struct kvm_get_htab_header *head =
2550                     (struct kvm_get_htab_header *) buffer;
2551                 size_t chunksize = sizeof(*head) +
2552                      HASH_PTE_SIZE_64 * head->n_valid;
2553
2554                 qemu_put_be32(f, head->index);
2555                 qemu_put_be16(f, head->n_valid);
2556                 qemu_put_be16(f, head->n_invalid);
2557                 qemu_put_buffer(f, (void *)(head + 1),
2558                                 HASH_PTE_SIZE_64 * head->n_valid);
2559
2560                 buffer += chunksize;
2561                 n -= chunksize;
2562             }
2563         }
2564     } while ((rc != 0)
2565              && ((max_ns < 0)
2566                  || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2567
2568     return (rc == 0) ? 1 : 0;
2569 }
2570
2571 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2572                            uint16_t n_valid, uint16_t n_invalid)
2573 {
2574     struct kvm_get_htab_header *buf;
2575     size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2576     ssize_t rc;
2577
2578     buf = alloca(chunksize);
2579     buf->index = index;
2580     buf->n_valid = n_valid;
2581     buf->n_invalid = n_invalid;
2582
2583     qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2584
2585     rc = write(fd, buf, chunksize);
2586     if (rc < 0) {
2587         fprintf(stderr, "Error writing KVM hash table: %s\n",
2588                 strerror(errno));
2589         return rc;
2590     }
2591     if (rc != chunksize) {
2592         /* We should never get a short write on a single chunk */
2593         fprintf(stderr, "Short write, restoring KVM hash table\n");
2594         return -1;
2595     }
2596     return 0;
2597 }
2598
2599 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2600 {
2601     return true;
2602 }
2603
2604 void kvm_arch_init_irq_routing(KVMState *s)
2605 {
2606 }
2607
2608 void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n)
2609 {
2610     int fd, rc;
2611     int i;
2612
2613     fd = kvmppc_get_htab_fd(false, ptex, &error_abort);
2614
2615     i = 0;
2616     while (i < n) {
2617         struct kvm_get_htab_header *hdr;
2618         int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP;
2619         char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64];
2620
2621         rc = read(fd, buf, sizeof(buf));
2622         if (rc < 0) {
2623             hw_error("kvmppc_read_hptes: Unable to read HPTEs");
2624         }
2625
2626         hdr = (struct kvm_get_htab_header *)buf;
2627         while ((i < n) && ((char *)hdr < (buf + rc))) {
2628             int invalid = hdr->n_invalid, valid = hdr->n_valid;
2629
2630             if (hdr->index != (ptex + i)) {
2631                 hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32
2632                          " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i);
2633             }
2634
2635             if (n - i < valid) {
2636                 valid = n - i;
2637             }
2638             memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * valid);
2639             i += valid;
2640
2641             if ((n - i) < invalid) {
2642                 invalid = n - i;
2643             }
2644             memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64);
2645             i += invalid;
2646
2647             hdr = (struct kvm_get_htab_header *)
2648                 ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid);
2649         }
2650     }
2651
2652     close(fd);
2653 }
2654
2655 void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1)
2656 {
2657     int fd, rc;
2658     struct {
2659         struct kvm_get_htab_header hdr;
2660         uint64_t pte0;
2661         uint64_t pte1;
2662     } buf;
2663
2664     fd = kvmppc_get_htab_fd(true, 0 /* Ignored */, &error_abort);
2665
2666     buf.hdr.n_valid = 1;
2667     buf.hdr.n_invalid = 0;
2668     buf.hdr.index = ptex;
2669     buf.pte0 = cpu_to_be64(pte0);
2670     buf.pte1 = cpu_to_be64(pte1);
2671
2672     rc = write(fd, &buf, sizeof(buf));
2673     if (rc != sizeof(buf)) {
2674         hw_error("kvmppc_write_hpte: Unable to update KVM HPT");
2675     }
2676     close(fd);
2677 }
2678
2679 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2680                              uint64_t address, uint32_t data, PCIDevice *dev)
2681 {
2682     return 0;
2683 }
2684
2685 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2686                                 int vector, PCIDevice *dev)
2687 {
2688     return 0;
2689 }
2690
2691 int kvm_arch_release_virq_post(int virq)
2692 {
2693     return 0;
2694 }
2695
2696 int kvm_arch_msi_data_to_gsi(uint32_t data)
2697 {
2698     return data & 0xffff;
2699 }
2700
2701 int kvmppc_enable_hwrng(void)
2702 {
2703     if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2704         return -1;
2705     }
2706
2707     return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2708 }
2709
2710 void kvmppc_check_papr_resize_hpt(Error **errp)
2711 {
2712     if (!kvm_enabled()) {
2713         return; /* No KVM, we're good */
2714     }
2715
2716     if (cap_resize_hpt) {
2717         return; /* Kernel has explicit support, we're good */
2718     }
2719
2720     /* Otherwise fallback on looking for PR KVM */
2721     if (kvmppc_is_pr(kvm_state)) {
2722         return;
2723     }
2724
2725     error_setg(errp,
2726                "Hash page table resizing not available with this KVM version");
2727 }
2728
2729 int kvmppc_resize_hpt_prepare(PowerPCCPU *cpu, target_ulong flags, int shift)
2730 {
2731     CPUState *cs = CPU(cpu);
2732     struct kvm_ppc_resize_hpt rhpt = {
2733         .flags = flags,
2734         .shift = shift,
2735     };
2736
2737     if (!cap_resize_hpt) {
2738         return -ENOSYS;
2739     }
2740
2741     return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_PREPARE, &rhpt);
2742 }
2743
2744 int kvmppc_resize_hpt_commit(PowerPCCPU *cpu, target_ulong flags, int shift)
2745 {
2746     CPUState *cs = CPU(cpu);
2747     struct kvm_ppc_resize_hpt rhpt = {
2748         .flags = flags,
2749         .shift = shift,
2750     };
2751
2752     if (!cap_resize_hpt) {
2753         return -ENOSYS;
2754     }
2755
2756     return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_COMMIT, &rhpt);
2757 }
2758
2759 /*
2760  * This is a helper function to detect a post migration scenario
2761  * in which a guest, running as KVM-HV, freezes in cpu_post_load because
2762  * the guest kernel can't handle a PVR value other than the actual host
2763  * PVR in KVM_SET_SREGS, even if pvr_match() returns true.
2764  *
2765  * If we don't have cap_ppc_pvr_compat and we're not running in PR
2766  * (so, we're HV), return true. The workaround itself is done in
2767  * cpu_post_load.
2768  *
2769  * The order here is important: we'll only check for KVM PR as a
2770  * fallback if the guest kernel can't handle the situation itself.
2771  * We need to avoid as much as possible querying the running KVM type
2772  * in QEMU level.
2773  */
2774 bool kvmppc_pvr_workaround_required(PowerPCCPU *cpu)
2775 {
2776     CPUState *cs = CPU(cpu);
2777
2778     if (!kvm_enabled()) {
2779         return false;
2780     }
2781
2782     if (cap_ppc_pvr_compat) {
2783         return false;
2784     }
2785
2786     return !kvmppc_is_pr(cs->kvm_state);
2787 }