target/ppc/kvm.c

   1 /*
   2  * PowerPC implementation of KVM hooks
   3  *
   4  * Copyright IBM Corp. 2007
   5  * Copyright (C) 2011 Freescale Semiconductor, Inc.
   6  *
   7  * Authors:
   8  *  Jerone Young <jyoung5@us.ibm.com>
   9  *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
  10  *  Hollis Blanchard <hollisb@us.ibm.com>
  11  *
  12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  13  * See the COPYING file in the top-level directory.
  14  *
  15  */
  16
  17 #include "qemu/osdep.h"
  18 #include <dirent.h>
  19 #include <sys/ioctl.h>
  20 #include <sys/vfs.h>
  21
  22 #include <linux/kvm.h>
  23
  24 #include "qemu-common.h"
  25 #include "qapi/error.h"
  26 #include "qemu/error-report.h"
  27 #include "cpu.h"
  28 #include "cpu-models.h"
  29 #include "qemu/timer.h"
  30 #include "sysemu/sysemu.h"
  31 #include "sysemu/hw_accel.h"
  32 #include "kvm_ppc.h"
  33 #include "sysemu/cpus.h"
  34 #include "sysemu/device_tree.h"
  35 #include "mmu-hash64.h"
  36
  37 #include "hw/sysbus.h"
  38 #include "hw/ppc/spapr.h"
  39 #include "hw/ppc/spapr_vio.h"
  40 #include "hw/ppc/spapr_cpu_core.h"
  41 #include "hw/ppc/ppc.h"
  42 #include "sysemu/watchdog.h"
  43 #include "trace.h"
  44 #include "exec/gdbstub.h"
  45 #include "exec/memattrs.h"
  46 #include "exec/ram_addr.h"
  47 #include "sysemu/hostmem.h"
  48 #include "qemu/cutils.h"
  49 #include "qemu/mmap-alloc.h"
  50 #include "elf.h"
  51 #include "sysemu/kvm_int.h"
  52
  53 //#define DEBUG_KVM
  54
  55 #ifdef DEBUG_KVM
  56 #define DPRINTF(fmt, ...) \
  57     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
  58 #else
  59 #define DPRINTF(fmt, ...) \
  60     do { } while (0)
  61 #endif
  62
  63 #define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
  64
  65 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
  66     KVM_CAP_LAST_INFO
  67 };
  68
  69 static int cap_interrupt_unset = false;
  70 static int cap_interrupt_level = false;
  71 static int cap_segstate;
  72 static int cap_booke_sregs;
  73 static int cap_ppc_smt;
  74 static int cap_ppc_smt_possible;
  75 static int cap_spapr_tce;
  76 static int cap_spapr_tce_64;
  77 static int cap_spapr_multitce;
  78 static int cap_spapr_vfio;
  79 static int cap_hior;
  80 static int cap_one_reg;
  81 static int cap_epr;
  82 static int cap_ppc_watchdog;
  83 static int cap_papr;
  84 static int cap_htab_fd;
  85 static int cap_fixup_hcalls;
  86 static int cap_htm;             /* Hardware transactional memory support */
  87 static int cap_mmu_radix;
  88 static int cap_mmu_hash_v3;
  89 static int cap_resize_hpt;
  90 static int cap_ppc_pvr_compat;
  91 static int cap_ppc_safe_cache;
  92 static int cap_ppc_safe_bounds_check;
  93 static int cap_ppc_safe_indirect_branch;
  94
  95 static uint32_t debug_inst_opcode;
  96
  97 /* XXX We have a race condition where we actually have a level triggered
  98  *     interrupt, but the infrastructure can't expose that yet, so the guest
  99  *     takes but ignores it, goes to sleep and never gets notified that there's
 100  *     still an interrupt pending.
 101  *
 102  *     As a quick workaround, let's just wake up again 20 ms after we injected
 103  *     an interrupt. That way we can assure that we're always reinjecting
 104  *     interrupts in case the guest swallowed them.
 105  */
 106 static QEMUTimer *idle_timer;
 107
 108 static void kvm_kick_cpu(void *opaque)
 109 {
 110     PowerPCCPU *cpu = opaque;
 111
 112     qemu_cpu_kick(CPU(cpu));
 113 }
 114
 115 /* Check whether we are running with KVM-PR (instead of KVM-HV).  This
 116  * should only be used for fallback tests - generally we should use
 117  * explicit capabilities for the features we want, rather than
 118  * assuming what is/isn't available depending on the KVM variant. */
 119 static bool kvmppc_is_pr(KVMState *ks)
 120 {
 121     /* Assume KVM-PR if the GET_PVINFO capability is available */
 122     return kvm_vm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0;
 123 }
 124
 125 static int kvm_ppc_register_host_cpu_type(MachineState *ms);
 126 static void kvmppc_get_cpu_characteristics(KVMState *s);
 127
 128 int kvm_arch_init(MachineState *ms, KVMState *s)
 129 {
 130     cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
 131     cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
 132     cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
 133     cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
 134     cap_ppc_smt_possible = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT_POSSIBLE);
 135     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
 136     cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64);
 137     cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
 138     cap_spapr_vfio = kvm_vm_check_extension(s, KVM_CAP_SPAPR_TCE_VFIO);
 139     cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
 140     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
 141     cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
 142     cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
 143     /* Note: we don't set cap_papr here, because this capability is
 144      * only activated after this by kvmppc_set_papr() */
 145     cap_htab_fd = kvm_vm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
 146     cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
 147     cap_ppc_smt = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT);
 148     cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
 149     cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX);
 150     cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3);
 151     cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT);
 152     kvmppc_get_cpu_characteristics(s);
 153     /*
 154      * Note: setting it to false because there is not such capability
 155      * in KVM at this moment.
 156      *
 157      * TODO: call kvm_vm_check_extension() with the right capability
 158      * after the kernel starts implementing it.*/
 159     cap_ppc_pvr_compat = false;
 160
 161     if (!cap_interrupt_level) {
 162         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
 163                         "VM to stall at times!\n");
 164     }
 165
 166     kvm_ppc_register_host_cpu_type(ms);
 167
 168     return 0;
 169 }
 170
 171 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
 172 {
 173     return 0;
 174 }
 175
 176 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
 177 {
 178     CPUPPCState *cenv = &cpu->env;
 179     CPUState *cs = CPU(cpu);
 180     struct kvm_sregs sregs;
 181     int ret;
 182
 183     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 184         /* What we're really trying to say is "if we're on BookE, we use
 185            the native PVR for now". This is the only sane way to check
 186            it though, so we potentially confuse users that they can run
 187            BookE guests on BookS. Let's hope nobody dares enough :) */
 188         return 0;
 189     } else {
 190         if (!cap_segstate) {
 191             fprintf(stderr, "kvm error: missing PVR setting capability\n");
 192             return -ENOSYS;
 193         }
 194     }
 195
 196     ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
 197     if (ret) {
 198         return ret;
 199     }
 200
 201     sregs.pvr = cenv->spr[SPR_PVR];
 202     return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
 203 }
 204
 205 /* Set up a shared TLB array with KVM */
 206 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
 207 {
 208     CPUPPCState *env = &cpu->env;
 209     CPUState *cs = CPU(cpu);
 210     struct kvm_book3e_206_tlb_params params = {};
 211     struct kvm_config_tlb cfg = {};
 212     unsigned int entries = 0;
 213     int ret, i;
 214
 215     if (!kvm_enabled() ||
 216         !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
 217         return 0;
 218     }
 219
 220     assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
 221
 222     for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
 223         params.tlb_sizes[i] = booke206_tlb_size(env, i);
 224         params.tlb_ways[i] = booke206_tlb_ways(env, i);
 225         entries += params.tlb_sizes[i];
 226     }
 227
 228     assert(entries == env->nb_tlb);
 229     assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
 230
 231     env->tlb_dirty = true;
 232
 233     cfg.array = (uintptr_t)env->tlb.tlbm;
 234     cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
 235     cfg.params = (uintptr_t)&params;
 236     cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
 237
 238     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
 239     if (ret < 0) {
 240         fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
 241                 __func__, strerror(-ret));
 242         return ret;
 243     }
 244
 245     env->kvm_sw_tlb = true;
 246     return 0;
 247 }
 248
 249
 250 #if defined(TARGET_PPC64)
 251 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
 252                                        struct kvm_ppc_smmu_info *info)
 253 {
 254     CPUPPCState *env = &cpu->env;
 255     CPUState *cs = CPU(cpu);
 256
 257     memset(info, 0, sizeof(*info));
 258
 259     /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
 260      * need to "guess" what the supported page sizes are.
 261      *
 262      * For that to work we make a few assumptions:
 263      *
 264      * - Check whether we are running "PR" KVM which only supports 4K
 265      *   and 16M pages, but supports them regardless of the backing
 266      *   store characteritics. We also don't support 1T segments.
 267      *
 268      *   This is safe as if HV KVM ever supports that capability or PR
 269      *   KVM grows supports for more page/segment sizes, those versions
 270      *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
 271      *   will not hit this fallback
 272      *
 273      * - Else we are running HV KVM. This means we only support page
 274      *   sizes that fit in the backing store. Additionally we only
 275      *   advertize 64K pages if the processor is ARCH 2.06 and we assume
 276      *   P7 encodings for the SLB and hash table. Here too, we assume
 277      *   support for any newer processor will mean a kernel that
 278      *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
 279      *   this fallback.
 280      */
 281     if (kvmppc_is_pr(cs->kvm_state)) {
 282         /* No flags */
 283         info->flags = 0;
 284         info->slb_size = 64;
 285
 286         /* Standard 4k base page size segment */
 287         info->sps[0].page_shift = 12;
 288         info->sps[0].slb_enc = 0;
 289         info->sps[0].enc[0].page_shift = 12;
 290         info->sps[0].enc[0].pte_enc = 0;
 291
 292         /* Standard 16M large page size segment */
 293         info->sps[1].page_shift = 24;
 294         info->sps[1].slb_enc = SLB_VSID_L;
 295         info->sps[1].enc[0].page_shift = 24;
 296         info->sps[1].enc[0].pte_enc = 0;
 297     } else {
 298         int i = 0;
 299
 300         /* HV KVM has backing store size restrictions */
 301         info->flags = KVM_PPC_PAGE_SIZES_REAL;
 302
 303         if (ppc_hash64_has(cpu, PPC_HASH64_1TSEG)) {
 304             info->flags |= KVM_PPC_1T_SEGMENTS;
 305         }
 306
 307         if (env->mmu_model == POWERPC_MMU_2_06 ||
 308             env->mmu_model == POWERPC_MMU_2_07) {
 309             info->slb_size = 32;
 310         } else {
 311             info->slb_size = 64;
 312         }
 313
 314         /* Standard 4k base page size segment */
 315         info->sps[i].page_shift = 12;
 316         info->sps[i].slb_enc = 0;
 317         info->sps[i].enc[0].page_shift = 12;
 318         info->sps[i].enc[0].pte_enc = 0;
 319         i++;
 320
 321         /* 64K on MMU 2.06 and later */
 322         if (env->mmu_model == POWERPC_MMU_2_06 ||
 323             env->mmu_model == POWERPC_MMU_2_07) {
 324             info->sps[i].page_shift = 16;
 325             info->sps[i].slb_enc = 0x110;
 326             info->sps[i].enc[0].page_shift = 16;
 327             info->sps[i].enc[0].pte_enc = 1;
 328             i++;
 329         }
 330
 331         /* Standard 16M large page size segment */
 332         info->sps[i].page_shift = 24;
 333         info->sps[i].slb_enc = SLB_VSID_L;
 334         info->sps[i].enc[0].page_shift = 24;
 335         info->sps[i].enc[0].pte_enc = 0;
 336     }
 337 }
 338
 339 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
 340 {
 341     CPUState *cs = CPU(cpu);
 342     int ret;
 343
 344     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
 345         ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
 346         if (ret == 0) {
 347             return;
 348         }
 349     }
 350
 351     kvm_get_fallback_smmu_info(cpu, info);
 352 }
 353
 354 struct ppc_radix_page_info *kvm_get_radix_page_info(void)
 355 {
 356     KVMState *s = KVM_STATE(current_machine->accelerator);
 357     struct ppc_radix_page_info *radix_page_info;
 358     struct kvm_ppc_rmmu_info rmmu_info;
 359     int i;
 360
 361     if (!kvm_check_extension(s, KVM_CAP_PPC_MMU_RADIX)) {
 362         return NULL;
 363     }
 364     if (kvm_vm_ioctl(s, KVM_PPC_GET_RMMU_INFO, &rmmu_info)) {
 365         return NULL;
 366     }
 367     radix_page_info = g_malloc0(sizeof(*radix_page_info));
 368     radix_page_info->count = 0;
 369     for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) {
 370         if (rmmu_info.ap_encodings[i]) {
 371             radix_page_info->entries[i] = rmmu_info.ap_encodings[i];
 372             radix_page_info->count++;
 373         }
 374     }
 375     return radix_page_info;
 376 }
 377
 378 target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu,
 379                                      bool radix, bool gtse,
 380                                      uint64_t proc_tbl)
 381 {
 382     CPUState *cs = CPU(cpu);
 383     int ret;
 384     uint64_t flags = 0;
 385     struct kvm_ppc_mmuv3_cfg cfg = {
 386         .process_table = proc_tbl,
 387     };
 388
 389     if (radix) {
 390         flags |= KVM_PPC_MMUV3_RADIX;
 391     }
 392     if (gtse) {
 393         flags |= KVM_PPC_MMUV3_GTSE;
 394     }
 395     cfg.flags = flags;
 396     ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_CONFIGURE_V3_MMU, &cfg);
 397     switch (ret) {
 398     case 0:
 399         return H_SUCCESS;
 400     case -EINVAL:
 401         return H_PARAMETER;
 402     case -ENODEV:
 403         return H_NOT_AVAILABLE;
 404     default:
 405         return H_HARDWARE;
 406     }
 407 }
 408
 409 bool kvmppc_hpt_needs_host_contiguous_pages(void)
 410 {
 411     PowerPCCPU *cpu = POWERPC_CPU(first_cpu);
 412     static struct kvm_ppc_smmu_info smmu_info;
 413
 414     if (!kvm_enabled()) {
 415         return false;
 416     }
 417
 418     kvm_get_smmu_info(cpu, &smmu_info);
 419     return !!(smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL);
 420 }
 421
 422 void kvm_check_mmu(PowerPCCPU *cpu, Error **errp)
 423 {
 424     struct kvm_ppc_smmu_info smmu_info;
 425     int iq, ik, jq, jk;
 426
 427     /* For now, we only have anything to check on hash64 MMUs */
 428     if (!cpu->hash64_opts || !kvm_enabled()) {
 429         return;
 430     }
 431
 432     kvm_get_smmu_info(cpu, &smmu_info);
 433
 434     if (ppc_hash64_has(cpu, PPC_HASH64_1TSEG)
 435         && !(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
 436         error_setg(errp,
 437                    "KVM does not support 1TiB segments which guest expects");
 438         return;
 439     }
 440
 441     if (smmu_info.slb_size < cpu->hash64_opts->slb_size) {
 442         error_setg(errp, "KVM only supports %u SLB entries, but guest needs %u",
 443                    smmu_info.slb_size, cpu->hash64_opts->slb_size);
 444         return;
 445     }
 446
 447     /*
 448      * Verify that every pagesize supported by the cpu model is
 449      * supported by KVM with the same encodings
 450      */
 451     for (iq = 0; iq < ARRAY_SIZE(cpu->hash64_opts->sps); iq++) {
 452         PPCHash64SegmentPageSizes *qsps = &cpu->hash64_opts->sps[iq];
 453         struct kvm_ppc_one_seg_page_size *ksps;
 454
 455         for (ik = 0; ik < ARRAY_SIZE(smmu_info.sps); ik++) {
 456             if (qsps->page_shift == smmu_info.sps[ik].page_shift) {
 457                 break;
 458             }
 459         }
 460         if (ik >= ARRAY_SIZE(smmu_info.sps)) {
 461             error_setg(errp, "KVM doesn't support for base page shift %u",
 462                        qsps->page_shift);
 463             return;
 464         }
 465
 466         ksps = &smmu_info.sps[ik];
 467         if (ksps->slb_enc != qsps->slb_enc) {
 468             error_setg(errp,
 469 "KVM uses SLB encoding 0x%x for page shift %u, but guest expects 0x%x",
 470                        ksps->slb_enc, ksps->page_shift, qsps->slb_enc);
 471             return;
 472         }
 473
 474         for (jq = 0; jq < ARRAY_SIZE(qsps->enc); jq++) {
 475             for (jk = 0; jk < ARRAY_SIZE(ksps->enc); jk++) {
 476                 if (qsps->enc[jq].page_shift == ksps->enc[jk].page_shift) {
 477                     break;
 478                 }
 479             }
 480
 481             if (jk >= ARRAY_SIZE(ksps->enc)) {
 482                 error_setg(errp, "KVM doesn't support page shift %u/%u",
 483                            qsps->enc[jq].page_shift, qsps->page_shift);
 484                 return;
 485             }
 486             if (qsps->enc[jq].pte_enc != ksps->enc[jk].pte_enc) {
 487                 error_setg(errp,
 488 "KVM uses PTE encoding 0x%x for page shift %u/%u, but guest expects 0x%x",
 489                            ksps->enc[jk].pte_enc, qsps->enc[jq].page_shift,
 490                            qsps->page_shift, qsps->enc[jq].pte_enc);
 491                 return;
 492             }
 493         }
 494     }
 495
 496     if (ppc_hash64_has(cpu, PPC_HASH64_CI_LARGEPAGE)) {
 497         /* Mostly what guest pagesizes we can use are related to the
 498          * host pages used to map guest RAM, which is handled in the
 499          * platform code. Cache-Inhibited largepages (64k) however are
 500          * used for I/O, so if they're mapped to the host at all it
 501          * will be a normal mapping, not a special hugepage one used
 502          * for RAM. */
 503         if (getpagesize() < 0x10000) {
 504             error_setg(errp,
 505                        "KVM can't supply 64kiB CI pages, which guest expects");
 506         }
 507     }
 508 }
 509 #endif /* !defined (TARGET_PPC64) */
 510
 511 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
 512 {
 513     return POWERPC_CPU(cpu)->vcpu_id;
 514 }
 515
 516 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
 517  * book3s supports only 1 watchpoint, so array size
 518  * of 4 is sufficient for now.
 519  */
 520 #define MAX_HW_BKPTS 4
 521
 522 static struct HWBreakpoint {
 523     target_ulong addr;
 524     int type;
 525 } hw_debug_points[MAX_HW_BKPTS];
 526
 527 static CPUWatchpoint hw_watchpoint;
 528
 529 /* Default there is no breakpoint and watchpoint supported */
 530 static int max_hw_breakpoint;
 531 static int max_hw_watchpoint;
 532 static int nb_hw_breakpoint;
 533 static int nb_hw_watchpoint;
 534
 535 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
 536 {
 537     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 538         max_hw_breakpoint = 2;
 539         max_hw_watchpoint = 2;
 540     }
 541
 542     if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
 543         fprintf(stderr, "Error initializing h/w breakpoints\n");
 544         return;
 545     }
 546 }
 547
 548 int kvm_arch_init_vcpu(CPUState *cs)
 549 {
 550     PowerPCCPU *cpu = POWERPC_CPU(cs);
 551     CPUPPCState *cenv = &cpu->env;
 552     int ret;
 553
 554     /* Synchronize sregs with kvm */
 555     ret = kvm_arch_sync_sregs(cpu);
 556     if (ret) {
 557         if (ret == -EINVAL) {
 558             error_report("Register sync failed... If you're using kvm-hv.ko,"
 559                          " only \"-cpu host\" is possible");
 560         }
 561         return ret;
 562     }
 563
 564     idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
 565
 566     switch (cenv->mmu_model) {
 567     case POWERPC_MMU_BOOKE206:
 568         /* This target supports access to KVM's guest TLB */
 569         ret = kvm_booke206_tlb_init(cpu);
 570         break;
 571     case POWERPC_MMU_2_07:
 572         if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) {
 573             /* KVM-HV has transactional memory on POWER8 also without the
 574              * KVM_CAP_PPC_HTM extension, so enable it here instead as
 575              * long as it's availble to userspace on the host. */
 576             if (qemu_getauxval(AT_HWCAP2) & PPC_FEATURE2_HAS_HTM) {
 577                 cap_htm = true;
 578             }
 579         }
 580         break;
 581     default:
 582         break;
 583     }
 584
 585     kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
 586     kvmppc_hw_debug_points_init(cenv);
 587
 588     return ret;
 589 }
 590
 591 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
 592 {
 593     CPUPPCState *env = &cpu->env;
 594     CPUState *cs = CPU(cpu);
 595     struct kvm_dirty_tlb dirty_tlb;
 596     unsigned char *bitmap;
 597     int ret;
 598
 599     if (!env->kvm_sw_tlb) {
 600         return;
 601     }
 602
 603     bitmap = g_malloc((env->nb_tlb + 7) / 8);
 604     memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
 605
 606     dirty_tlb.bitmap = (uintptr_t)bitmap;
 607     dirty_tlb.num_dirty = env->nb_tlb;
 608
 609     ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
 610     if (ret) {
 611         fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
 612                 __func__, strerror(-ret));
 613     }
 614
 615     g_free(bitmap);
 616 }
 617
 618 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
 619 {
 620     PowerPCCPU *cpu = POWERPC_CPU(cs);
 621     CPUPPCState *env = &cpu->env;
 622     union {
 623         uint32_t u32;
 624         uint64_t u64;
 625     } val;
 626     struct kvm_one_reg reg = {
 627         .id = id,
 628         .addr = (uintptr_t) &val,
 629     };
 630     int ret;
 631
 632     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 633     if (ret != 0) {
 634         trace_kvm_failed_spr_get(spr, strerror(errno));
 635     } else {
 636         switch (id & KVM_REG_SIZE_MASK) {
 637         case KVM_REG_SIZE_U32:
 638             env->spr[spr] = val.u32;
 639             break;
 640
 641         case KVM_REG_SIZE_U64:
 642             env->spr[spr] = val.u64;
 643             break;
 644
 645         default:
 646             /* Don't handle this size yet */
 647             abort();
 648         }
 649     }
 650 }
 651
 652 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
 653 {
 654     PowerPCCPU *cpu = POWERPC_CPU(cs);
 655     CPUPPCState *env = &cpu->env;
 656     union {
 657         uint32_t u32;
 658         uint64_t u64;
 659     } val;
 660     struct kvm_one_reg reg = {
 661         .id = id,
 662         .addr = (uintptr_t) &val,
 663     };
 664     int ret;
 665
 666     switch (id & KVM_REG_SIZE_MASK) {
 667     case KVM_REG_SIZE_U32:
 668         val.u32 = env->spr[spr];
 669         break;
 670
 671     case KVM_REG_SIZE_U64:
 672         val.u64 = env->spr[spr];
 673         break;
 674
 675     default:
 676         /* Don't handle this size yet */
 677         abort();
 678     }
 679
 680     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 681     if (ret != 0) {
 682         trace_kvm_failed_spr_set(spr, strerror(errno));
 683     }
 684 }
 685
 686 static int kvm_put_fp(CPUState *cs)
 687 {
 688     PowerPCCPU *cpu = POWERPC_CPU(cs);
 689     CPUPPCState *env = &cpu->env;
 690     struct kvm_one_reg reg;
 691     int i;
 692     int ret;
 693
 694     if (env->insns_flags & PPC_FLOAT) {
 695         uint64_t fpscr = env->fpscr;
 696         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 697
 698         reg.id = KVM_REG_PPC_FPSCR;
 699         reg.addr = (uintptr_t)&fpscr;
 700         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 701         if (ret < 0) {
 702             DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
 703             return ret;
 704         }
 705
 706         for (i = 0; i < 32; i++) {
 707             uint64_t vsr[2];
 708
 709 #ifdef HOST_WORDS_BIGENDIAN
 710             vsr[0] = float64_val(env->fpr[i]);
 711             vsr[1] = env->vsr[i];
 712 #else
 713             vsr[0] = env->vsr[i];
 714             vsr[1] = float64_val(env->fpr[i]);
 715 #endif
 716             reg.addr = (uintptr_t) &vsr;
 717             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 718
 719             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 720             if (ret < 0) {
 721                 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
 722                         i, strerror(errno));
 723                 return ret;
 724             }
 725         }
 726     }
 727
 728     if (env->insns_flags & PPC_ALTIVEC) {
 729         reg.id = KVM_REG_PPC_VSCR;
 730         reg.addr = (uintptr_t)&env->vscr;
 731         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 732         if (ret < 0) {
 733             DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
 734             return ret;
 735         }
 736
 737         for (i = 0; i < 32; i++) {
 738             reg.id = KVM_REG_PPC_VR(i);
 739             reg.addr = (uintptr_t)&env->avr[i];
 740             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 741             if (ret < 0) {
 742                 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
 743                 return ret;
 744             }
 745         }
 746     }
 747
 748     return 0;
 749 }
 750
 751 static int kvm_get_fp(CPUState *cs)
 752 {
 753     PowerPCCPU *cpu = POWERPC_CPU(cs);
 754     CPUPPCState *env = &cpu->env;
 755     struct kvm_one_reg reg;
 756     int i;
 757     int ret;
 758
 759     if (env->insns_flags & PPC_FLOAT) {
 760         uint64_t fpscr;
 761         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 762
 763         reg.id = KVM_REG_PPC_FPSCR;
 764         reg.addr = (uintptr_t)&fpscr;
 765         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 766         if (ret < 0) {
 767             DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
 768             return ret;
 769         } else {
 770             env->fpscr = fpscr;
 771         }
 772
 773         for (i = 0; i < 32; i++) {
 774             uint64_t vsr[2];
 775
 776             reg.addr = (uintptr_t) &vsr;
 777             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 778
 779             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 780             if (ret < 0) {
 781                 DPRINTF("Unable to get %s%d from KVM: %s\n",
 782                         vsx ? "VSR" : "FPR", i, strerror(errno));
 783                 return ret;
 784             } else {
 785 #ifdef HOST_WORDS_BIGENDIAN
 786                 env->fpr[i] = vsr[0];
 787                 if (vsx) {
 788                     env->vsr[i] = vsr[1];
 789                 }
 790 #else
 791                 env->fpr[i] = vsr[1];
 792                 if (vsx) {
 793                     env->vsr[i] = vsr[0];
 794                 }
 795 #endif
 796             }
 797         }
 798     }
 799
 800     if (env->insns_flags & PPC_ALTIVEC) {
 801         reg.id = KVM_REG_PPC_VSCR;
 802         reg.addr = (uintptr_t)&env->vscr;
 803         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 804         if (ret < 0) {
 805             DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
 806             return ret;
 807         }
 808
 809         for (i = 0; i < 32; i++) {
 810             reg.id = KVM_REG_PPC_VR(i);
 811             reg.addr = (uintptr_t)&env->avr[i];
 812             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 813             if (ret < 0) {
 814                 DPRINTF("Unable to get VR%d from KVM: %s\n",
 815                         i, strerror(errno));
 816                 return ret;
 817             }
 818         }
 819     }
 820
 821     return 0;
 822 }
 823
 824 #if defined(TARGET_PPC64)
 825 static int kvm_get_vpa(CPUState *cs)
 826 {
 827     PowerPCCPU *cpu = POWERPC_CPU(cs);
 828     sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu);
 829     struct kvm_one_reg reg;
 830     int ret;
 831
 832     reg.id = KVM_REG_PPC_VPA_ADDR;
 833     reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
 834     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 835     if (ret < 0) {
 836         DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
 837         return ret;
 838     }
 839
 840     assert((uintptr_t)&spapr_cpu->slb_shadow_size
 841            == ((uintptr_t)&spapr_cpu->slb_shadow_addr + 8));
 842     reg.id = KVM_REG_PPC_VPA_SLB;
 843     reg.addr = (uintptr_t)&spapr_cpu->slb_shadow_addr;
 844     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 845     if (ret < 0) {
 846         DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
 847                 strerror(errno));
 848         return ret;
 849     }
 850
 851     assert((uintptr_t)&spapr_cpu->dtl_size
 852            == ((uintptr_t)&spapr_cpu->dtl_addr + 8));
 853     reg.id = KVM_REG_PPC_VPA_DTL;
 854     reg.addr = (uintptr_t)&spapr_cpu->dtl_addr;
 855     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 856     if (ret < 0) {
 857         DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
 858                 strerror(errno));
 859         return ret;
 860     }
 861
 862     return 0;
 863 }
 864
 865 static int kvm_put_vpa(CPUState *cs)
 866 {
 867     PowerPCCPU *cpu = POWERPC_CPU(cs);
 868     sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu);
 869     struct kvm_one_reg reg;
 870     int ret;
 871
 872     /* SLB shadow or DTL can't be registered unless a master VPA is
 873      * registered.  That means when restoring state, if a VPA *is*
 874      * registered, we need to set that up first.  If not, we need to
 875      * deregister the others before deregistering the master VPA */
 876     assert(spapr_cpu->vpa_addr
 877            || !(spapr_cpu->slb_shadow_addr || spapr_cpu->dtl_addr));
 878
 879     if (spapr_cpu->vpa_addr) {
 880         reg.id = KVM_REG_PPC_VPA_ADDR;
 881         reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
 882         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 883         if (ret < 0) {
 884             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 885             return ret;
 886         }
 887     }
 888
 889     assert((uintptr_t)&spapr_cpu->slb_shadow_size
 890            == ((uintptr_t)&spapr_cpu->slb_shadow_addr + 8));
 891     reg.id = KVM_REG_PPC_VPA_SLB;
 892     reg.addr = (uintptr_t)&spapr_cpu->slb_shadow_addr;
 893     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 894     if (ret < 0) {
 895         DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
 896         return ret;
 897     }
 898
 899     assert((uintptr_t)&spapr_cpu->dtl_size
 900            == ((uintptr_t)&spapr_cpu->dtl_addr + 8));
 901     reg.id = KVM_REG_PPC_VPA_DTL;
 902     reg.addr = (uintptr_t)&spapr_cpu->dtl_addr;
 903     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 904     if (ret < 0) {
 905         DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
 906                 strerror(errno));
 907         return ret;
 908     }
 909
 910     if (!spapr_cpu->vpa_addr) {
 911         reg.id = KVM_REG_PPC_VPA_ADDR;
 912         reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
 913         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 914         if (ret < 0) {
 915             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 916             return ret;
 917         }
 918     }
 919
 920     return 0;
 921 }
 922 #endif /* TARGET_PPC64 */
 923
 924 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
 925 {
 926     CPUPPCState *env = &cpu->env;
 927     struct kvm_sregs sregs;
 928     int i;
 929
 930     sregs.pvr = env->spr[SPR_PVR];
 931
 932     if (cpu->vhyp) {
 933         PPCVirtualHypervisorClass *vhc =
 934             PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
 935         sregs.u.s.sdr1 = vhc->encode_hpt_for_kvm_pr(cpu->vhyp);
 936     } else {
 937         sregs.u.s.sdr1 = env->spr[SPR_SDR1];
 938     }
 939
 940     /* Sync SLB */
 941 #ifdef TARGET_PPC64
 942     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
 943         sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
 944         if (env->slb[i].esid & SLB_ESID_V) {
 945             sregs.u.s.ppc64.slb[i].slbe |= i;
 946         }
 947         sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
 948     }
 949 #endif
 950
 951     /* Sync SRs */
 952     for (i = 0; i < 16; i++) {
 953         sregs.u.s.ppc32.sr[i] = env->sr[i];
 954     }
 955
 956     /* Sync BATs */
 957     for (i = 0; i < 8; i++) {
 958         /* Beware. We have to swap upper and lower bits here */
 959         sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
 960             | env->DBAT[1][i];
 961         sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
 962             | env->IBAT[1][i];
 963     }
 964
 965     return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
 966 }
 967
 968 int kvm_arch_put_registers(CPUState *cs, int level)
 969 {
 970     PowerPCCPU *cpu = POWERPC_CPU(cs);
 971     CPUPPCState *env = &cpu->env;
 972     struct kvm_regs regs;
 973     int ret;
 974     int i;
 975
 976     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
 977     if (ret < 0) {
 978         return ret;
 979     }
 980
 981     regs.ctr = env->ctr;
 982     regs.lr  = env->lr;
 983     regs.xer = cpu_read_xer(env);
 984     regs.msr = env->msr;
 985     regs.pc = env->nip;
 986
 987     regs.srr0 = env->spr[SPR_SRR0];
 988     regs.srr1 = env->spr[SPR_SRR1];
 989
 990     regs.sprg0 = env->spr[SPR_SPRG0];
 991     regs.sprg1 = env->spr[SPR_SPRG1];
 992     regs.sprg2 = env->spr[SPR_SPRG2];
 993     regs.sprg3 = env->spr[SPR_SPRG3];
 994     regs.sprg4 = env->spr[SPR_SPRG4];
 995     regs.sprg5 = env->spr[SPR_SPRG5];
 996     regs.sprg6 = env->spr[SPR_SPRG6];
 997     regs.sprg7 = env->spr[SPR_SPRG7];
 998
 999     regs.pid = env->spr[SPR_BOOKE_PID];
1000
1001     for (i = 0;i < 32; i++)
1002         regs.gpr[i] = env->gpr[i];
1003
1004     regs.cr = 0;
1005     for (i = 0; i < 8; i++) {
1006         regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
1007     }
1008
1009     ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
1010     if (ret < 0)
1011         return ret;
1012
1013     kvm_put_fp(cs);
1014
1015     if (env->tlb_dirty) {
1016         kvm_sw_tlb_put(cpu);
1017         env->tlb_dirty = false;
1018     }
1019
1020     if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
1021         ret = kvmppc_put_books_sregs(cpu);
1022         if (ret < 0) {
1023             return ret;
1024         }
1025     }
1026
1027     if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
1028         kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1029     }
1030
1031     if (cap_one_reg) {
1032         int i;
1033
1034         /* We deliberately ignore errors here, for kernels which have
1035          * the ONE_REG calls, but don't support the specific
1036          * registers, there's a reasonable chance things will still
1037          * work, at least until we try to migrate. */
1038         for (i = 0; i < 1024; i++) {
1039             uint64_t id = env->spr_cb[i].one_reg_id;
1040
1041             if (id != 0) {
1042                 kvm_put_one_spr(cs, id, i);
1043             }
1044         }
1045
1046 #ifdef TARGET_PPC64
1047         if (msr_ts) {
1048             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1049                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1050             }
1051             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1052                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1053             }
1054             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1055             kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1056             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1057             kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1058             kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1059             kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1060             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1061             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1062             kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1063             kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1064         }
1065
1066         if (cap_papr) {
1067             if (kvm_put_vpa(cs) < 0) {
1068                 DPRINTF("Warning: Unable to set VPA information to KVM\n");
1069             }
1070         }
1071
1072         kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1073 #endif /* TARGET_PPC64 */
1074     }
1075
1076     return ret;
1077 }
1078
1079 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1080 {
1081      env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1082 }
1083
1084 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1085 {
1086     CPUPPCState *env = &cpu->env;
1087     struct kvm_sregs sregs;
1088     int ret;
1089
1090     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1091     if (ret < 0) {
1092         return ret;
1093     }
1094
1095     if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1096         env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1097         env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1098         env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1099         env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1100         env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1101         env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1102         env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1103         env->spr[SPR_DECR] = sregs.u.e.dec;
1104         env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1105         env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1106         env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1107     }
1108
1109     if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1110         env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1111         env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1112         env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1113         env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1114         env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1115     }
1116
1117     if (sregs.u.e.features & KVM_SREGS_E_64) {
1118         env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1119     }
1120
1121     if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1122         env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1123     }
1124
1125     if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1126         env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1127         kvm_sync_excp(env, POWERPC_EXCP_CRITICAL,  SPR_BOOKE_IVOR0);
1128         env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1129         kvm_sync_excp(env, POWERPC_EXCP_MCHECK,  SPR_BOOKE_IVOR1);
1130         env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1131         kvm_sync_excp(env, POWERPC_EXCP_DSI,  SPR_BOOKE_IVOR2);
1132         env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1133         kvm_sync_excp(env, POWERPC_EXCP_ISI,  SPR_BOOKE_IVOR3);
1134         env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1135         kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL,  SPR_BOOKE_IVOR4);
1136         env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1137         kvm_sync_excp(env, POWERPC_EXCP_ALIGN,  SPR_BOOKE_IVOR5);
1138         env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1139         kvm_sync_excp(env, POWERPC_EXCP_PROGRAM,  SPR_BOOKE_IVOR6);
1140         env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1141         kvm_sync_excp(env, POWERPC_EXCP_FPU,  SPR_BOOKE_IVOR7);
1142         env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1143         kvm_sync_excp(env, POWERPC_EXCP_SYSCALL,  SPR_BOOKE_IVOR8);
1144         env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1145         kvm_sync_excp(env, POWERPC_EXCP_APU,  SPR_BOOKE_IVOR9);
1146         env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1147         kvm_sync_excp(env, POWERPC_EXCP_DECR,  SPR_BOOKE_IVOR10);
1148         env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1149         kvm_sync_excp(env, POWERPC_EXCP_FIT,  SPR_BOOKE_IVOR11);
1150         env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1151         kvm_sync_excp(env, POWERPC_EXCP_WDT,  SPR_BOOKE_IVOR12);
1152         env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1153         kvm_sync_excp(env, POWERPC_EXCP_DTLB,  SPR_BOOKE_IVOR13);
1154         env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1155         kvm_sync_excp(env, POWERPC_EXCP_ITLB,  SPR_BOOKE_IVOR14);
1156         env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1157         kvm_sync_excp(env, POWERPC_EXCP_DEBUG,  SPR_BOOKE_IVOR15);
1158
1159         if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1160             env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1161             kvm_sync_excp(env, POWERPC_EXCP_SPEU,  SPR_BOOKE_IVOR32);
1162             env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1163             kvm_sync_excp(env, POWERPC_EXCP_EFPDI,  SPR_BOOKE_IVOR33);
1164             env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1165             kvm_sync_excp(env, POWERPC_EXCP_EFPRI,  SPR_BOOKE_IVOR34);
1166         }
1167
1168         if (sregs.u.e.features & KVM_SREGS_E_PM) {
1169             env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1170             kvm_sync_excp(env, POWERPC_EXCP_EPERFM,  SPR_BOOKE_IVOR35);
1171         }
1172
1173         if (sregs.u.e.features & KVM_SREGS_E_PC) {
1174             env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1175             kvm_sync_excp(env, POWERPC_EXCP_DOORI,  SPR_BOOKE_IVOR36);
1176             env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1177             kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1178         }
1179     }
1180
1181     if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1182         env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1183         env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1184         env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1185         env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1186         env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1187         env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1188         env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1189         env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1190         env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1191         env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1192     }
1193
1194     if (sregs.u.e.features & KVM_SREGS_EXP) {
1195         env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1196     }
1197
1198     if (sregs.u.e.features & KVM_SREGS_E_PD) {
1199         env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1200         env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1201     }
1202
1203     if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1204         env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1205         env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1206         env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1207
1208         if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1209             env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1210             env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1211         }
1212     }
1213
1214     return 0;
1215 }
1216
1217 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1218 {
1219     CPUPPCState *env = &cpu->env;
1220     struct kvm_sregs sregs;
1221     int ret;
1222     int i;
1223
1224     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1225     if (ret < 0) {
1226         return ret;
1227     }
1228
1229     if (!cpu->vhyp) {
1230         ppc_store_sdr1(env, sregs.u.s.sdr1);
1231     }
1232
1233     /* Sync SLB */
1234 #ifdef TARGET_PPC64
1235     /*
1236      * The packed SLB array we get from KVM_GET_SREGS only contains
1237      * information about valid entries. So we flush our internal copy
1238      * to get rid of stale ones, then put all valid SLB entries back
1239      * in.
1240      */
1241     memset(env->slb, 0, sizeof(env->slb));
1242     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1243         target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1244         target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1245         /*
1246          * Only restore valid entries
1247          */
1248         if (rb & SLB_ESID_V) {
1249             ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1250         }
1251     }
1252 #endif
1253
1254     /* Sync SRs */
1255     for (i = 0; i < 16; i++) {
1256         env->sr[i] = sregs.u.s.ppc32.sr[i];
1257     }
1258
1259     /* Sync BATs */
1260     for (i = 0; i < 8; i++) {
1261         env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1262         env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1263         env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1264         env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1265     }
1266
1267     return 0;
1268 }
1269
1270 int kvm_arch_get_registers(CPUState *cs)
1271 {
1272     PowerPCCPU *cpu = POWERPC_CPU(cs);
1273     CPUPPCState *env = &cpu->env;
1274     struct kvm_regs regs;
1275     uint32_t cr;
1276     int i, ret;
1277
1278     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1279     if (ret < 0)
1280         return ret;
1281
1282     cr = regs.cr;
1283     for (i = 7; i >= 0; i--) {
1284         env->crf[i] = cr & 15;
1285         cr >>= 4;
1286     }
1287
1288     env->ctr = regs.ctr;
1289     env->lr = regs.lr;
1290     cpu_write_xer(env, regs.xer);
1291     env->msr = regs.msr;
1292     env->nip = regs.pc;
1293
1294     env->spr[SPR_SRR0] = regs.srr0;
1295     env->spr[SPR_SRR1] = regs.srr1;
1296
1297     env->spr[SPR_SPRG0] = regs.sprg0;
1298     env->spr[SPR_SPRG1] = regs.sprg1;
1299     env->spr[SPR_SPRG2] = regs.sprg2;
1300     env->spr[SPR_SPRG3] = regs.sprg3;
1301     env->spr[SPR_SPRG4] = regs.sprg4;
1302     env->spr[SPR_SPRG5] = regs.sprg5;
1303     env->spr[SPR_SPRG6] = regs.sprg6;
1304     env->spr[SPR_SPRG7] = regs.sprg7;
1305
1306     env->spr[SPR_BOOKE_PID] = regs.pid;
1307
1308     for (i = 0;i < 32; i++)
1309         env->gpr[i] = regs.gpr[i];
1310
1311     kvm_get_fp(cs);
1312
1313     if (cap_booke_sregs) {
1314         ret = kvmppc_get_booke_sregs(cpu);
1315         if (ret < 0) {
1316             return ret;
1317         }
1318     }
1319
1320     if (cap_segstate) {
1321         ret = kvmppc_get_books_sregs(cpu);
1322         if (ret < 0) {
1323             return ret;
1324         }
1325     }
1326
1327     if (cap_hior) {
1328         kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1329     }
1330
1331     if (cap_one_reg) {
1332         int i;
1333
1334         /* We deliberately ignore errors here, for kernels which have
1335          * the ONE_REG calls, but don't support the specific
1336          * registers, there's a reasonable chance things will still
1337          * work, at least until we try to migrate. */
1338         for (i = 0; i < 1024; i++) {
1339             uint64_t id = env->spr_cb[i].one_reg_id;
1340
1341             if (id != 0) {
1342                 kvm_get_one_spr(cs, id, i);
1343             }
1344         }
1345
1346 #ifdef TARGET_PPC64
1347         if (msr_ts) {
1348             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1349                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1350             }
1351             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1352                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1353             }
1354             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1355             kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1356             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1357             kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1358             kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1359             kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1360             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1361             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1362             kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1363             kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1364         }
1365
1366         if (cap_papr) {
1367             if (kvm_get_vpa(cs) < 0) {
1368                 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1369             }
1370         }
1371
1372         kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1373 #endif
1374     }
1375
1376     return 0;
1377 }
1378
1379 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1380 {
1381     unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1382
1383     if (irq != PPC_INTERRUPT_EXT) {
1384         return 0;
1385     }
1386
1387     if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1388         return 0;
1389     }
1390
1391     kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1392
1393     return 0;
1394 }
1395
1396 #if defined(TARGET_PPCEMB)
1397 #define PPC_INPUT_INT PPC40x_INPUT_INT
1398 #elif defined(TARGET_PPC64)
1399 #define PPC_INPUT_INT PPC970_INPUT_INT
1400 #else
1401 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1402 #endif
1403
1404 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1405 {
1406     PowerPCCPU *cpu = POWERPC_CPU(cs);
1407     CPUPPCState *env = &cpu->env;
1408     int r;
1409     unsigned irq;
1410
1411     qemu_mutex_lock_iothread();
1412
1413     /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1414      * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1415     if (!cap_interrupt_level &&
1416         run->ready_for_interrupt_injection &&
1417         (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1418         (env->irq_input_state & (1<<PPC_INPUT_INT)))
1419     {
1420         /* For now KVM disregards the 'irq' argument. However, in the
1421          * future KVM could cache it in-kernel to avoid a heavyweight exit
1422          * when reading the UIC.
1423          */
1424         irq = KVM_INTERRUPT_SET;
1425
1426         DPRINTF("injected interrupt %d\n", irq);
1427         r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1428         if (r < 0) {
1429             printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1430         }
1431
1432         /* Always wake up soon in case the interrupt was level based */
1433         timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1434                        (NANOSECONDS_PER_SECOND / 50));
1435     }
1436
1437     /* We don't know if there are more interrupts pending after this. However,
1438      * the guest will return to userspace in the course of handling this one
1439      * anyways, so we will get a chance to deliver the rest. */
1440
1441     qemu_mutex_unlock_iothread();
1442 }
1443
1444 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1445 {
1446     return MEMTXATTRS_UNSPECIFIED;
1447 }
1448
1449 int kvm_arch_process_async_events(CPUState *cs)
1450 {
1451     return cs->halted;
1452 }
1453
1454 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1455 {
1456     CPUState *cs = CPU(cpu);
1457     CPUPPCState *env = &cpu->env;
1458
1459     if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1460         cs->halted = 1;
1461         cs->exception_index = EXCP_HLT;
1462     }
1463
1464     return 0;
1465 }
1466
1467 /* map dcr access to existing qemu dcr emulation */
1468 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1469 {
1470     if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1471         fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1472
1473     return 0;
1474 }
1475
1476 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1477 {
1478     if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1479         fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1480
1481     return 0;
1482 }
1483
1484 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1485 {
1486     /* Mixed endian case is not handled */
1487     uint32_t sc = debug_inst_opcode;
1488
1489     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1490                             sizeof(sc), 0) ||
1491         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1492         return -EINVAL;
1493     }
1494
1495     return 0;
1496 }
1497
1498 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1499 {
1500     uint32_t sc;
1501
1502     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1503         sc != debug_inst_opcode ||
1504         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1505                             sizeof(sc), 1)) {
1506         return -EINVAL;
1507     }
1508
1509     return 0;
1510 }
1511
1512 static int find_hw_breakpoint(target_ulong addr, int type)
1513 {
1514     int n;
1515
1516     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1517            <= ARRAY_SIZE(hw_debug_points));
1518
1519     for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1520         if (hw_debug_points[n].addr == addr &&
1521              hw_debug_points[n].type == type) {
1522             return n;
1523         }
1524     }
1525
1526     return -1;
1527 }
1528
1529 static int find_hw_watchpoint(target_ulong addr, int *flag)
1530 {
1531     int n;
1532
1533     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1534     if (n >= 0) {
1535         *flag = BP_MEM_ACCESS;
1536         return n;
1537     }
1538
1539     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1540     if (n >= 0) {
1541         *flag = BP_MEM_WRITE;
1542         return n;
1543     }
1544
1545     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1546     if (n >= 0) {
1547         *flag = BP_MEM_READ;
1548         return n;
1549     }
1550
1551     return -1;
1552 }
1553
1554 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1555                                   target_ulong len, int type)
1556 {
1557     if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1558         return -ENOBUFS;
1559     }
1560
1561     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1562     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1563
1564     switch (type) {
1565     case GDB_BREAKPOINT_HW:
1566         if (nb_hw_breakpoint >= max_hw_breakpoint) {
1567             return -ENOBUFS;
1568         }
1569
1570         if (find_hw_breakpoint(addr, type) >= 0) {
1571             return -EEXIST;
1572         }
1573
1574         nb_hw_breakpoint++;
1575         break;
1576
1577     case GDB_WATCHPOINT_WRITE:
1578     case GDB_WATCHPOINT_READ:
1579     case GDB_WATCHPOINT_ACCESS:
1580         if (nb_hw_watchpoint >= max_hw_watchpoint) {
1581             return -ENOBUFS;
1582         }
1583
1584         if (find_hw_breakpoint(addr, type) >= 0) {
1585             return -EEXIST;
1586         }
1587
1588         nb_hw_watchpoint++;
1589         break;
1590
1591     default:
1592         return -ENOSYS;
1593     }
1594
1595     return 0;
1596 }
1597
1598 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1599                                   target_ulong len, int type)
1600 {
1601     int n;
1602
1603     n = find_hw_breakpoint(addr, type);
1604     if (n < 0) {
1605         return -ENOENT;
1606     }
1607
1608     switch (type) {
1609     case GDB_BREAKPOINT_HW:
1610         nb_hw_breakpoint--;
1611         break;
1612
1613     case GDB_WATCHPOINT_WRITE:
1614     case GDB_WATCHPOINT_READ:
1615     case GDB_WATCHPOINT_ACCESS:
1616         nb_hw_watchpoint--;
1617         break;
1618
1619     default:
1620         return -ENOSYS;
1621     }
1622     hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1623
1624     return 0;
1625 }
1626
1627 void kvm_arch_remove_all_hw_breakpoints(void)
1628 {
1629     nb_hw_breakpoint = nb_hw_watchpoint = 0;
1630 }
1631
1632 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1633 {
1634     int n;
1635
1636     /* Software Breakpoint updates */
1637     if (kvm_sw_breakpoints_active(cs)) {
1638         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1639     }
1640
1641     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1642            <= ARRAY_SIZE(hw_debug_points));
1643     assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1644
1645     if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1646         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1647         memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1648         for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1649             switch (hw_debug_points[n].type) {
1650             case GDB_BREAKPOINT_HW:
1651                 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1652                 break;
1653             case GDB_WATCHPOINT_WRITE:
1654                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1655                 break;
1656             case GDB_WATCHPOINT_READ:
1657                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1658                 break;
1659             case GDB_WATCHPOINT_ACCESS:
1660                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1661                                         KVMPPC_DEBUG_WATCH_READ;
1662                 break;
1663             default:
1664                 cpu_abort(cs, "Unsupported breakpoint type\n");
1665             }
1666             dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1667         }
1668     }
1669 }
1670
1671 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1672 {
1673     CPUState *cs = CPU(cpu);
1674     CPUPPCState *env = &cpu->env;
1675     struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1676     int handle = 0;
1677     int n;
1678     int flag = 0;
1679
1680     if (cs->singlestep_enabled) {
1681         handle = 1;
1682     } else if (arch_info->status) {
1683         if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1684             if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1685                 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1686                 if (n >= 0) {
1687                     handle = 1;
1688                 }
1689             } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1690                                             KVMPPC_DEBUG_WATCH_WRITE)) {
1691                 n = find_hw_watchpoint(arch_info->address,  &flag);
1692                 if (n >= 0) {
1693                     handle = 1;
1694                     cs->watchpoint_hit = &hw_watchpoint;
1695                     hw_watchpoint.vaddr = hw_debug_points[n].addr;
1696                     hw_watchpoint.flags = flag;
1697                 }
1698             }
1699         }
1700     } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1701         handle = 1;
1702     } else {
1703         /* QEMU is not able to handle debug exception, so inject
1704          * program exception to guest;
1705          * Yes program exception NOT debug exception !!
1706          * When QEMU is using debug resources then debug exception must
1707          * be always set. To achieve this we set MSR_DE and also set
1708          * MSRP_DEP so guest cannot change MSR_DE.
1709          * When emulating debug resource for guest we want guest
1710          * to control MSR_DE (enable/disable debug interrupt on need).
1711          * Supporting both configurations are NOT possible.
1712          * So the result is that we cannot share debug resources
1713          * between QEMU and Guest on BOOKE architecture.
1714          * In the current design QEMU gets the priority over guest,
1715          * this means that if QEMU is using debug resources then guest
1716          * cannot use them;
1717          * For software breakpoint QEMU uses a privileged instruction;
1718          * So there cannot be any reason that we are here for guest
1719          * set debug exception, only possibility is guest executed a
1720          * privileged / illegal instruction and that's why we are
1721          * injecting a program interrupt.
1722          */
1723
1724         cpu_synchronize_state(cs);
1725         /* env->nip is PC, so increment this by 4 to use
1726          * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1727          */
1728         env->nip += 4;
1729         cs->exception_index = POWERPC_EXCP_PROGRAM;
1730         env->error_code = POWERPC_EXCP_INVAL;
1731         ppc_cpu_do_interrupt(cs);
1732     }
1733
1734     return handle;
1735 }
1736
1737 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1738 {
1739     PowerPCCPU *cpu = POWERPC_CPU(cs);
1740     CPUPPCState *env = &cpu->env;
1741     int ret;
1742
1743     qemu_mutex_lock_iothread();
1744
1745     switch (run->exit_reason) {
1746     case KVM_EXIT_DCR:
1747         if (run->dcr.is_write) {
1748             DPRINTF("handle dcr write\n");
1749             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1750         } else {
1751             DPRINTF("handle dcr read\n");
1752             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1753         }
1754         break;
1755     case KVM_EXIT_HLT:
1756         DPRINTF("handle halt\n");
1757         ret = kvmppc_handle_halt(cpu);
1758         break;
1759 #if defined(TARGET_PPC64)
1760     case KVM_EXIT_PAPR_HCALL:
1761         DPRINTF("handle PAPR hypercall\n");
1762         run->papr_hcall.ret = spapr_hypercall(cpu,
1763                                               run->papr_hcall.nr,
1764                                               run->papr_hcall.args);
1765         ret = 0;
1766         break;
1767 #endif
1768     case KVM_EXIT_EPR:
1769         DPRINTF("handle epr\n");
1770         run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1771         ret = 0;
1772         break;
1773     case KVM_EXIT_WATCHDOG:
1774         DPRINTF("handle watchdog expiry\n");
1775         watchdog_perform_action();
1776         ret = 0;
1777         break;
1778
1779     case KVM_EXIT_DEBUG:
1780         DPRINTF("handle debug exception\n");
1781         if (kvm_handle_debug(cpu, run)) {
1782             ret = EXCP_DEBUG;
1783             break;
1784         }
1785         /* re-enter, this exception was guest-internal */
1786         ret = 0;
1787         break;
1788
1789     default:
1790         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1791         ret = -1;
1792         break;
1793     }
1794
1795     qemu_mutex_unlock_iothread();
1796     return ret;
1797 }
1798
1799 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1800 {
1801     CPUState *cs = CPU(cpu);
1802     uint32_t bits = tsr_bits;
1803     struct kvm_one_reg reg = {
1804         .id = KVM_REG_PPC_OR_TSR,
1805         .addr = (uintptr_t) &bits,
1806     };
1807
1808     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1809 }
1810
1811 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1812 {
1813
1814     CPUState *cs = CPU(cpu);
1815     uint32_t bits = tsr_bits;
1816     struct kvm_one_reg reg = {
1817         .id = KVM_REG_PPC_CLEAR_TSR,
1818         .addr = (uintptr_t) &bits,
1819     };
1820
1821     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1822 }
1823
1824 int kvmppc_set_tcr(PowerPCCPU *cpu)
1825 {
1826     CPUState *cs = CPU(cpu);
1827     CPUPPCState *env = &cpu->env;
1828     uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1829
1830     struct kvm_one_reg reg = {
1831         .id = KVM_REG_PPC_TCR,
1832         .addr = (uintptr_t) &tcr,
1833     };
1834
1835     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1836 }
1837
1838 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1839 {
1840     CPUState *cs = CPU(cpu);
1841     int ret;
1842
1843     if (!kvm_enabled()) {
1844         return -1;
1845     }
1846
1847     if (!cap_ppc_watchdog) {
1848         printf("warning: KVM does not support watchdog");
1849         return -1;
1850     }
1851
1852     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1853     if (ret < 0) {
1854         fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1855                 __func__, strerror(-ret));
1856         return ret;
1857     }
1858
1859     return ret;
1860 }
1861
1862 static int read_cpuinfo(const char *field, char *value, int len)
1863 {
1864     FILE *f;
1865     int ret = -1;
1866     int field_len = strlen(field);
1867     char line[512];
1868
1869     f = fopen("/proc/cpuinfo", "r");
1870     if (!f) {
1871         return -1;
1872     }
1873
1874     do {
1875         if (!fgets(line, sizeof(line), f)) {
1876             break;
1877         }
1878         if (!strncmp(line, field, field_len)) {
1879             pstrcpy(value, len, line);
1880             ret = 0;
1881             break;
1882         }
1883     } while(*line);
1884
1885     fclose(f);
1886
1887     return ret;
1888 }
1889
1890 uint32_t kvmppc_get_tbfreq(void)
1891 {
1892     char line[512];
1893     char *ns;
1894     uint32_t retval = NANOSECONDS_PER_SECOND;
1895
1896     if (read_cpuinfo("timebase", line, sizeof(line))) {
1897         return retval;
1898     }
1899
1900     if (!(ns = strchr(line, ':'))) {
1901         return retval;
1902     }
1903
1904     ns++;
1905
1906     return atoi(ns);
1907 }
1908
1909 bool kvmppc_get_host_serial(char **value)
1910 {
1911     return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1912                                NULL);
1913 }
1914
1915 bool kvmppc_get_host_model(char **value)
1916 {
1917     return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1918 }
1919
1920 /* Try to find a device tree node for a CPU with clock-frequency property */
1921 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1922 {
1923     struct dirent *dirp;
1924     DIR *dp;
1925
1926     if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1927         printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1928         return -1;
1929     }
1930
1931     buf[0] = '\0';
1932     while ((dirp = readdir(dp)) != NULL) {
1933         FILE *f;
1934         snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1935                  dirp->d_name);
1936         f = fopen(buf, "r");
1937         if (f) {
1938             snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1939             fclose(f);
1940             break;
1941         }
1942         buf[0] = '\0';
1943     }
1944     closedir(dp);
1945     if (buf[0] == '\0') {
1946         printf("Unknown host!\n");
1947         return -1;
1948     }
1949
1950     return 0;
1951 }
1952
1953 static uint64_t kvmppc_read_int_dt(const char *filename)
1954 {
1955     union {
1956         uint32_t v32;
1957         uint64_t v64;
1958     } u;
1959     FILE *f;
1960     int len;
1961
1962     f = fopen(filename, "rb");
1963     if (!f) {
1964         return -1;
1965     }
1966
1967     len = fread(&u, 1, sizeof(u), f);
1968     fclose(f);
1969     switch (len) {
1970     case 4:
1971         /* property is a 32-bit quantity */
1972         return be32_to_cpu(u.v32);
1973     case 8:
1974         return be64_to_cpu(u.v64);
1975     }
1976
1977     return 0;
1978 }
1979
1980 /* Read a CPU node property from the host device tree that's a single
1981  * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1982  * (can't find or open the property, or doesn't understand the
1983  * format) */
1984 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1985 {
1986     char buf[PATH_MAX], *tmp;
1987     uint64_t val;
1988
1989     if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1990         return -1;
1991     }
1992
1993     tmp = g_strdup_printf("%s/%s", buf, propname);
1994     val = kvmppc_read_int_dt(tmp);
1995     g_free(tmp);
1996
1997     return val;
1998 }
1999
2000 uint64_t kvmppc_get_clockfreq(void)
2001 {
2002     return kvmppc_read_int_cpu_dt("clock-frequency");
2003 }
2004
2005 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
2006  {
2007      PowerPCCPU *cpu = ppc_env_get_cpu(env);
2008      CPUState *cs = CPU(cpu);
2009
2010     if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
2011         !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
2012         return 0;
2013     }
2014
2015     return 1;
2016 }
2017
2018 int kvmppc_get_hasidle(CPUPPCState *env)
2019 {
2020     struct kvm_ppc_pvinfo pvinfo;
2021
2022     if (!kvmppc_get_pvinfo(env, &pvinfo) &&
2023         (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
2024         return 1;
2025     }
2026
2027     return 0;
2028 }
2029
2030 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
2031 {
2032     uint32_t *hc = (uint32_t*)buf;
2033     struct kvm_ppc_pvinfo pvinfo;
2034
2035     if (!kvmppc_get_pvinfo(env, &pvinfo)) {
2036         memcpy(buf, pvinfo.hcall, buf_len);
2037         return 0;
2038     }
2039
2040     /*
2041      * Fallback to always fail hypercalls regardless of endianness:
2042      *
2043      *     tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
2044      *     li r3, -1
2045      *     b .+8       (becomes nop in wrong endian)
2046      *     bswap32(li r3, -1)
2047      */
2048
2049     hc[0] = cpu_to_be32(0x08000048);
2050     hc[1] = cpu_to_be32(0x3860ffff);
2051     hc[2] = cpu_to_be32(0x48000008);
2052     hc[3] = cpu_to_be32(bswap32(0x3860ffff));
2053
2054     return 1;
2055 }
2056
2057 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
2058 {
2059     return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2060 }
2061
2062 void kvmppc_enable_logical_ci_hcalls(void)
2063 {
2064     /*
2065      * FIXME: it would be nice if we could detect the cases where
2066      * we're using a device which requires the in kernel
2067      * implementation of these hcalls, but the kernel lacks them and
2068      * produce a warning.
2069      */
2070     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2071     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2072 }
2073
2074 void kvmppc_enable_set_mode_hcall(void)
2075 {
2076     kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2077 }
2078
2079 void kvmppc_enable_clear_ref_mod_hcalls(void)
2080 {
2081     kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
2082     kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
2083 }
2084
2085 void kvmppc_set_papr(PowerPCCPU *cpu)
2086 {
2087     CPUState *cs = CPU(cpu);
2088     int ret;
2089
2090     if (!kvm_enabled()) {
2091         return;
2092     }
2093
2094     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2095     if (ret) {
2096         error_report("This vCPU type or KVM version does not support PAPR");
2097         exit(1);
2098     }
2099
2100     /* Update the capability flag so we sync the right information
2101      * with kvm */
2102     cap_papr = 1;
2103 }
2104
2105 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr)
2106 {
2107     return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr);
2108 }
2109
2110 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2111 {
2112     CPUState *cs = CPU(cpu);
2113     int ret;
2114
2115     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2116     if (ret && mpic_proxy) {
2117         error_report("This KVM version does not support EPR");
2118         exit(1);
2119     }
2120 }
2121
2122 int kvmppc_smt_threads(void)
2123 {
2124     return cap_ppc_smt ? cap_ppc_smt : 1;
2125 }
2126
2127 int kvmppc_set_smt_threads(int smt)
2128 {
2129     int ret;
2130
2131     ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_SMT, 0, smt, 0);
2132     if (!ret) {
2133         cap_ppc_smt = smt;
2134     }
2135     return ret;
2136 }
2137
2138 void kvmppc_hint_smt_possible(Error **errp)
2139 {
2140     int i;
2141     GString *g;
2142     char *s;
2143
2144     assert(kvm_enabled());
2145     if (cap_ppc_smt_possible) {
2146         g = g_string_new("Available VSMT modes:");
2147         for (i = 63; i >= 0; i--) {
2148             if ((1UL << i) & cap_ppc_smt_possible) {
2149                 g_string_append_printf(g, " %lu", (1UL << i));
2150             }
2151         }
2152         s = g_string_free(g, false);
2153         error_append_hint(errp, "%s.\n", s);
2154         g_free(s);
2155     } else {
2156         error_append_hint(errp,
2157                           "This KVM seems to be too old to support VSMT.\n");
2158     }
2159 }
2160
2161
2162 #ifdef TARGET_PPC64
2163 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2164 {
2165     struct kvm_ppc_smmu_info info;
2166     long rampagesize, best_page_shift;
2167     int i;
2168
2169     /* Find the largest hardware supported page size that's less than
2170      * or equal to the (logical) backing page size of guest RAM */
2171     kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2172     rampagesize = qemu_getrampagesize();
2173     best_page_shift = 0;
2174
2175     for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2176         struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2177
2178         if (!sps->page_shift) {
2179             continue;
2180         }
2181
2182         if ((sps->page_shift > best_page_shift)
2183             && ((1UL << sps->page_shift) <= rampagesize)) {
2184             best_page_shift = sps->page_shift;
2185         }
2186     }
2187
2188     return MIN(current_size,
2189                1ULL << (best_page_shift + hash_shift - 7));
2190 }
2191 #endif
2192
2193 bool kvmppc_spapr_use_multitce(void)
2194 {
2195     return cap_spapr_multitce;
2196 }
2197
2198 int kvmppc_spapr_enable_inkernel_multitce(void)
2199 {
2200     int ret;
2201
2202     ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2203                             H_PUT_TCE_INDIRECT, 1);
2204     if (!ret) {
2205         ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2206                                 H_STUFF_TCE, 1);
2207     }
2208
2209     return ret;
2210 }
2211
2212 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
2213                               uint64_t bus_offset, uint32_t nb_table,
2214                               int *pfd, bool need_vfio)
2215 {
2216     long len;
2217     int fd;
2218     void *table;
2219
2220     /* Must set fd to -1 so we don't try to munmap when called for
2221      * destroying the table, which the upper layers -will- do
2222      */
2223     *pfd = -1;
2224     if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2225         return NULL;
2226     }
2227
2228     if (cap_spapr_tce_64) {
2229         struct kvm_create_spapr_tce_64 args = {
2230             .liobn = liobn,
2231             .page_shift = page_shift,
2232             .offset = bus_offset >> page_shift,
2233             .size = nb_table,
2234             .flags = 0
2235         };
2236         fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args);
2237         if (fd < 0) {
2238             fprintf(stderr,
2239                     "KVM: Failed to create TCE64 table for liobn 0x%x\n",
2240                     liobn);
2241             return NULL;
2242         }
2243     } else if (cap_spapr_tce) {
2244         uint64_t window_size = (uint64_t) nb_table << page_shift;
2245         struct kvm_create_spapr_tce args = {
2246             .liobn = liobn,
2247             .window_size = window_size,
2248         };
2249         if ((window_size != args.window_size) || bus_offset) {
2250             return NULL;
2251         }
2252         fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2253         if (fd < 0) {
2254             fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2255                     liobn);
2256             return NULL;
2257         }
2258     } else {
2259         return NULL;
2260     }
2261
2262     len = nb_table * sizeof(uint64_t);
2263     /* FIXME: round this up to page size */
2264
2265     table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2266     if (table == MAP_FAILED) {
2267         fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2268                 liobn);
2269         close(fd);
2270         return NULL;
2271     }
2272
2273     *pfd = fd;
2274     return table;
2275 }
2276
2277 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2278 {
2279     long len;
2280
2281     if (fd < 0) {
2282         return -1;
2283     }
2284
2285     len = nb_table * sizeof(uint64_t);
2286     if ((munmap(table, len) < 0) ||
2287         (close(fd) < 0)) {
2288         fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2289                 strerror(errno));
2290         /* Leak the table */
2291     }
2292
2293     return 0;
2294 }
2295
2296 int kvmppc_reset_htab(int shift_hint)
2297 {
2298     uint32_t shift = shift_hint;
2299
2300     if (!kvm_enabled()) {
2301         /* Full emulation, tell caller to allocate htab itself */
2302         return 0;
2303     }
2304     if (kvm_vm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2305         int ret;
2306         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2307         if (ret == -ENOTTY) {
2308             /* At least some versions of PR KVM advertise the
2309              * capability, but don't implement the ioctl().  Oops.
2310              * Return 0 so that we allocate the htab in qemu, as is
2311              * correct for PR. */
2312             return 0;
2313         } else if (ret < 0) {
2314             return ret;
2315         }
2316         return shift;
2317     }
2318
2319     /* We have a kernel that predates the htab reset calls.  For PR
2320      * KVM, we need to allocate the htab ourselves, for an HV KVM of
2321      * this era, it has allocated a 16MB fixed size hash table already. */
2322     if (kvmppc_is_pr(kvm_state)) {
2323         /* PR - tell caller to allocate htab */
2324         return 0;
2325     } else {
2326         /* HV - assume 16MB kernel allocated htab */
2327         return 24;
2328     }
2329 }
2330
2331 static inline uint32_t mfpvr(void)
2332 {
2333     uint32_t pvr;
2334
2335     asm ("mfpvr %0"
2336          : "=r"(pvr));
2337     return pvr;
2338 }
2339
2340 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2341 {
2342     if (on) {
2343         *word |= flags;
2344     } else {
2345         *word &= ~flags;
2346     }
2347 }
2348
2349 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2350 {
2351     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2352     uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2353     uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2354
2355     /* Now fix up the class with information we can query from the host */
2356     pcc->pvr = mfpvr();
2357
2358     alter_insns(&pcc->insns_flags, PPC_ALTIVEC,
2359                 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_ALTIVEC);
2360     alter_insns(&pcc->insns_flags2, PPC2_VSX,
2361                 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_VSX);
2362     alter_insns(&pcc->insns_flags2, PPC2_DFP,
2363                 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_DFP);
2364
2365     if (dcache_size != -1) {
2366         pcc->l1_dcache_size = dcache_size;
2367     }
2368
2369     if (icache_size != -1) {
2370         pcc->l1_icache_size = icache_size;
2371     }
2372
2373 #if defined(TARGET_PPC64)
2374     pcc->radix_page_info = kvm_get_radix_page_info();
2375
2376     if ((pcc->pvr & 0xffffff00) == CPU_POWERPC_POWER9_DD1) {
2377         /*
2378          * POWER9 DD1 has some bugs which make it not really ISA 3.00
2379          * compliant.  More importantly, advertising ISA 3.00
2380          * architected mode may prevent guests from activating
2381          * necessary DD1 workarounds.
2382          */
2383         pcc->pcr_supported &= ~(PCR_COMPAT_3_00 | PCR_COMPAT_2_07
2384                                 | PCR_COMPAT_2_06 | PCR_COMPAT_2_05);
2385     }
2386 #endif /* defined(TARGET_PPC64) */
2387 }
2388
2389 bool kvmppc_has_cap_epr(void)
2390 {
2391     return cap_epr;
2392 }
2393
2394 bool kvmppc_has_cap_fixup_hcalls(void)
2395 {
2396     return cap_fixup_hcalls;
2397 }
2398
2399 bool kvmppc_has_cap_htm(void)
2400 {
2401     return cap_htm;
2402 }
2403
2404 bool kvmppc_has_cap_mmu_radix(void)
2405 {
2406     return cap_mmu_radix;
2407 }
2408
2409 bool kvmppc_has_cap_mmu_hash_v3(void)
2410 {
2411     return cap_mmu_hash_v3;
2412 }
2413
2414 static bool kvmppc_power8_host(void)
2415 {
2416     bool ret = false;
2417 #ifdef TARGET_PPC64
2418     {
2419         uint32_t base_pvr = CPU_POWERPC_POWER_SERVER_MASK & mfpvr();
2420         ret = (base_pvr == CPU_POWERPC_POWER8E_BASE) ||
2421               (base_pvr == CPU_POWERPC_POWER8NVL_BASE) ||
2422               (base_pvr == CPU_POWERPC_POWER8_BASE);
2423     }
2424 #endif /* TARGET_PPC64 */
2425     return ret;
2426 }
2427
2428 static int parse_cap_ppc_safe_cache(struct kvm_ppc_cpu_char c)
2429 {
2430     bool l1d_thread_priv_req = !kvmppc_power8_host();
2431
2432     if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_L1D_FLUSH_PR) {
2433         return 2;
2434     } else if ((!l1d_thread_priv_req ||
2435                 c.character & c.character_mask & H_CPU_CHAR_L1D_THREAD_PRIV) &&
2436                (c.character & c.character_mask
2437                 & (H_CPU_CHAR_L1D_FLUSH_ORI30 | H_CPU_CHAR_L1D_FLUSH_TRIG2))) {
2438         return 1;
2439     }
2440
2441     return 0;
2442 }
2443
2444 static int parse_cap_ppc_safe_bounds_check(struct kvm_ppc_cpu_char c)
2445 {
2446     if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_BNDS_CHK_SPEC_BAR) {
2447         return 2;
2448     } else if (c.character & c.character_mask & H_CPU_CHAR_SPEC_BAR_ORI31) {
2449         return 1;
2450     }
2451
2452     return 0;
2453 }
2454
2455 static int parse_cap_ppc_safe_indirect_branch(struct kvm_ppc_cpu_char c)
2456 {
2457     if (c.character & c.character_mask & H_CPU_CHAR_CACHE_COUNT_DIS) {
2458         return  SPAPR_CAP_FIXED_CCD;
2459     } else if (c.character & c.character_mask & H_CPU_CHAR_BCCTRL_SERIALISED) {
2460         return SPAPR_CAP_FIXED_IBS;
2461     }
2462
2463     return 0;
2464 }
2465
2466 static void kvmppc_get_cpu_characteristics(KVMState *s)
2467 {
2468     struct kvm_ppc_cpu_char c;
2469     int ret;
2470
2471     /* Assume broken */
2472     cap_ppc_safe_cache = 0;
2473     cap_ppc_safe_bounds_check = 0;
2474     cap_ppc_safe_indirect_branch = 0;
2475
2476     ret = kvm_vm_check_extension(s, KVM_CAP_PPC_GET_CPU_CHAR);
2477     if (!ret) {
2478         return;
2479     }
2480     ret = kvm_vm_ioctl(s, KVM_PPC_GET_CPU_CHAR, &c);
2481     if (ret < 0) {
2482         return;
2483     }
2484
2485     cap_ppc_safe_cache = parse_cap_ppc_safe_cache(c);
2486     cap_ppc_safe_bounds_check = parse_cap_ppc_safe_bounds_check(c);
2487     cap_ppc_safe_indirect_branch = parse_cap_ppc_safe_indirect_branch(c);
2488 }
2489
2490 int kvmppc_get_cap_safe_cache(void)
2491 {
2492     return cap_ppc_safe_cache;
2493 }
2494
2495 int kvmppc_get_cap_safe_bounds_check(void)
2496 {
2497     return cap_ppc_safe_bounds_check;
2498 }
2499
2500 int kvmppc_get_cap_safe_indirect_branch(void)
2501 {
2502     return cap_ppc_safe_indirect_branch;
2503 }
2504
2505 bool kvmppc_has_cap_spapr_vfio(void)
2506 {
2507     return cap_spapr_vfio;
2508 }
2509
2510 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2511 {
2512     uint32_t host_pvr = mfpvr();
2513     PowerPCCPUClass *pvr_pcc;
2514
2515     pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2516     if (pvr_pcc == NULL) {
2517         pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2518     }
2519
2520     return pvr_pcc;
2521 }
2522
2523 static int kvm_ppc_register_host_cpu_type(MachineState *ms)
2524 {
2525     TypeInfo type_info = {
2526         .name = TYPE_HOST_POWERPC_CPU,
2527         .class_init = kvmppc_host_cpu_class_init,
2528     };
2529     MachineClass *mc = MACHINE_GET_CLASS(ms);
2530     PowerPCCPUClass *pvr_pcc;
2531     ObjectClass *oc;
2532     DeviceClass *dc;
2533     int i;
2534
2535     pvr_pcc = kvm_ppc_get_host_cpu_class();
2536     if (pvr_pcc == NULL) {
2537         return -1;
2538     }
2539     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2540     type_register(&type_info);
2541     if (object_dynamic_cast(OBJECT(ms), TYPE_SPAPR_MACHINE)) {
2542         /* override TCG default cpu type with 'host' cpu model */
2543         mc->default_cpu_type = TYPE_HOST_POWERPC_CPU;
2544     }
2545
2546     oc = object_class_by_name(type_info.name);
2547     g_assert(oc);
2548
2549     /*
2550      * Update generic CPU family class alias (e.g. on a POWER8NVL host,
2551      * we want "POWER8" to be a "family" alias that points to the current
2552      * host CPU type, too)
2553      */
2554     dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc));
2555     for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) {
2556         if (strcasecmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) {
2557             char *suffix;
2558
2559             ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc));
2560             suffix = strstr(ppc_cpu_aliases[i].model, POWERPC_CPU_TYPE_SUFFIX);
2561             if (suffix) {
2562                 *suffix = 0;
2563             }
2564             break;
2565         }
2566     }
2567
2568     return 0;
2569 }
2570
2571 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2572 {
2573     struct kvm_rtas_token_args args = {
2574         .token = token,
2575     };
2576
2577     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2578         return -ENOENT;
2579     }
2580
2581     strncpy(args.name, function, sizeof(args.name));
2582
2583     return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2584 }
2585
2586 int kvmppc_get_htab_fd(bool write, uint64_t index, Error **errp)
2587 {
2588     struct kvm_get_htab_fd s = {
2589         .flags = write ? KVM_GET_HTAB_WRITE : 0,
2590         .start_index = index,
2591     };
2592     int ret;
2593
2594     if (!cap_htab_fd) {
2595         error_setg(errp, "KVM version doesn't support %s the HPT",
2596                    write ? "writing" : "reading");
2597         return -ENOTSUP;
2598     }
2599
2600     ret = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2601     if (ret < 0) {
2602         error_setg(errp, "Unable to open fd for %s HPT %s KVM: %s",
2603                    write ? "writing" : "reading", write ? "to" : "from",
2604                    strerror(errno));
2605         return -errno;
2606     }
2607
2608     return ret;
2609 }
2610
2611 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2612 {
2613     int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2614     uint8_t buf[bufsize];
2615     ssize_t rc;
2616
2617     do {
2618         rc = read(fd, buf, bufsize);
2619         if (rc < 0) {
2620             fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2621                     strerror(errno));
2622             return rc;
2623         } else if (rc) {
2624             uint8_t *buffer = buf;
2625             ssize_t n = rc;
2626             while (n) {
2627                 struct kvm_get_htab_header *head =
2628                     (struct kvm_get_htab_header *) buffer;
2629                 size_t chunksize = sizeof(*head) +
2630                      HASH_PTE_SIZE_64 * head->n_valid;
2631
2632                 qemu_put_be32(f, head->index);
2633                 qemu_put_be16(f, head->n_valid);
2634                 qemu_put_be16(f, head->n_invalid);
2635                 qemu_put_buffer(f, (void *)(head + 1),
2636                                 HASH_PTE_SIZE_64 * head->n_valid);
2637
2638                 buffer += chunksize;
2639                 n -= chunksize;
2640             }
2641         }
2642     } while ((rc != 0)
2643              && ((max_ns < 0)
2644                  || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2645
2646     return (rc == 0) ? 1 : 0;
2647 }
2648
2649 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2650                            uint16_t n_valid, uint16_t n_invalid)
2651 {
2652     struct kvm_get_htab_header *buf;
2653     size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2654     ssize_t rc;
2655
2656     buf = alloca(chunksize);
2657     buf->index = index;
2658     buf->n_valid = n_valid;
2659     buf->n_invalid = n_invalid;
2660
2661     qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2662
2663     rc = write(fd, buf, chunksize);
2664     if (rc < 0) {
2665         fprintf(stderr, "Error writing KVM hash table: %s\n",
2666                 strerror(errno));
2667         return rc;
2668     }
2669     if (rc != chunksize) {
2670         /* We should never get a short write on a single chunk */
2671         fprintf(stderr, "Short write, restoring KVM hash table\n");
2672         return -1;
2673     }
2674     return 0;
2675 }
2676
2677 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2678 {
2679     return true;
2680 }
2681
2682 void kvm_arch_init_irq_routing(KVMState *s)
2683 {
2684 }
2685
2686 void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n)
2687 {
2688     int fd, rc;
2689     int i;
2690
2691     fd = kvmppc_get_htab_fd(false, ptex, &error_abort);
2692
2693     i = 0;
2694     while (i < n) {
2695         struct kvm_get_htab_header *hdr;
2696         int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP;
2697         char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64];
2698
2699         rc = read(fd, buf, sizeof(buf));
2700         if (rc < 0) {
2701             hw_error("kvmppc_read_hptes: Unable to read HPTEs");
2702         }
2703
2704         hdr = (struct kvm_get_htab_header *)buf;
2705         while ((i < n) && ((char *)hdr < (buf + rc))) {
2706             int invalid = hdr->n_invalid, valid = hdr->n_valid;
2707
2708             if (hdr->index != (ptex + i)) {
2709                 hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32
2710                          " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i);
2711             }
2712
2713             if (n - i < valid) {
2714                 valid = n - i;
2715             }
2716             memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * valid);
2717             i += valid;
2718
2719             if ((n - i) < invalid) {
2720                 invalid = n - i;
2721             }
2722             memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64);
2723             i += invalid;
2724
2725             hdr = (struct kvm_get_htab_header *)
2726                 ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid);
2727         }
2728     }
2729
2730     close(fd);
2731 }
2732
2733 void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1)
2734 {
2735     int fd, rc;
2736     struct {
2737         struct kvm_get_htab_header hdr;
2738         uint64_t pte0;
2739         uint64_t pte1;
2740     } buf;
2741
2742     fd = kvmppc_get_htab_fd(true, 0 /* Ignored */, &error_abort);
2743
2744     buf.hdr.n_valid = 1;
2745     buf.hdr.n_invalid = 0;
2746     buf.hdr.index = ptex;
2747     buf.pte0 = cpu_to_be64(pte0);
2748     buf.pte1 = cpu_to_be64(pte1);
2749
2750     rc = write(fd, &buf, sizeof(buf));
2751     if (rc != sizeof(buf)) {
2752         hw_error("kvmppc_write_hpte: Unable to update KVM HPT");
2753     }
2754     close(fd);
2755 }
2756
2757 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2758                              uint64_t address, uint32_t data, PCIDevice *dev)
2759 {
2760     return 0;
2761 }
2762
2763 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2764                                 int vector, PCIDevice *dev)
2765 {
2766     return 0;
2767 }
2768
2769 int kvm_arch_release_virq_post(int virq)
2770 {
2771     return 0;
2772 }
2773
2774 int kvm_arch_msi_data_to_gsi(uint32_t data)
2775 {
2776     return data & 0xffff;
2777 }
2778
2779 int kvmppc_enable_hwrng(void)
2780 {
2781     if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2782         return -1;
2783     }
2784
2785     return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2786 }
2787
2788 void kvmppc_check_papr_resize_hpt(Error **errp)
2789 {
2790     if (!kvm_enabled()) {
2791         return; /* No KVM, we're good */
2792     }
2793
2794     if (cap_resize_hpt) {
2795         return; /* Kernel has explicit support, we're good */
2796     }
2797
2798     /* Otherwise fallback on looking for PR KVM */
2799     if (kvmppc_is_pr(kvm_state)) {
2800         return;
2801     }
2802
2803     error_setg(errp,
2804                "Hash page table resizing not available with this KVM version");
2805 }
2806
2807 int kvmppc_resize_hpt_prepare(PowerPCCPU *cpu, target_ulong flags, int shift)
2808 {
2809     CPUState *cs = CPU(cpu);
2810     struct kvm_ppc_resize_hpt rhpt = {
2811         .flags = flags,
2812         .shift = shift,
2813     };
2814
2815     if (!cap_resize_hpt) {
2816         return -ENOSYS;
2817     }
2818
2819     return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_PREPARE, &rhpt);
2820 }
2821
2822 int kvmppc_resize_hpt_commit(PowerPCCPU *cpu, target_ulong flags, int shift)
2823 {
2824     CPUState *cs = CPU(cpu);
2825     struct kvm_ppc_resize_hpt rhpt = {
2826         .flags = flags,
2827         .shift = shift,
2828     };
2829
2830     if (!cap_resize_hpt) {
2831         return -ENOSYS;
2832     }
2833
2834     return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_COMMIT, &rhpt);
2835 }
2836
2837 /*
2838  * This is a helper function to detect a post migration scenario
2839  * in which a guest, running as KVM-HV, freezes in cpu_post_load because
2840  * the guest kernel can't handle a PVR value other than the actual host
2841  * PVR in KVM_SET_SREGS, even if pvr_match() returns true.
2842  *
2843  * If we don't have cap_ppc_pvr_compat and we're not running in PR
2844  * (so, we're HV), return true. The workaround itself is done in
2845  * cpu_post_load.
2846  *
2847  * The order here is important: we'll only check for KVM PR as a
2848  * fallback if the guest kernel can't handle the situation itself.
2849  * We need to avoid as much as possible querying the running KVM type
2850  * in QEMU level.
2851  */
2852 bool kvmppc_pvr_workaround_required(PowerPCCPU *cpu)
2853 {
2854     CPUState *cs = CPU(cpu);
2855
2856     if (!kvm_enabled()) {
2857         return false;
2858     }
2859
2860     if (cap_ppc_pvr_compat) {
2861         return false;
2862     }
2863
2864     return !kvmppc_is_pr(cs->kvm_state);
2865 }