target/ppc/kvm.c

   1 /*
   2  * PowerPC implementation of KVM hooks
   3  *
   4  * Copyright IBM Corp. 2007
   5  * Copyright (C) 2011 Freescale Semiconductor, Inc.
   6  *
   7  * Authors:
   8  *  Jerone Young <jyoung5@us.ibm.com>
   9  *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
  10  *  Hollis Blanchard <hollisb@us.ibm.com>
  11  *
  12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  13  * See the COPYING file in the top-level directory.
  14  *
  15  */
  16
  17 #include "qemu/osdep.h"
  18 #include <dirent.h>
  19 #include <sys/ioctl.h>
  20 #include <sys/vfs.h>
  21
  22 #include <linux/kvm.h>
  23
  24 #include "qemu-common.h"
  25 #include "qemu/error-report.h"
  26 #include "cpu.h"
  27 #include "cpu-models.h"
  28 #include "qemu/timer.h"
  29 #include "sysemu/sysemu.h"
  30 #include "sysemu/hw_accel.h"
  31 #include "kvm_ppc.h"
  32 #include "sysemu/cpus.h"
  33 #include "sysemu/device_tree.h"
  34 #include "mmu-hash64.h"
  35
  36 #include "hw/sysbus.h"
  37 #include "hw/ppc/spapr.h"
  38 #include "hw/ppc/spapr_vio.h"
  39 #include "hw/ppc/spapr_cpu_core.h"
  40 #include "hw/ppc/ppc.h"
  41 #include "sysemu/watchdog.h"
  42 #include "trace.h"
  43 #include "exec/gdbstub.h"
  44 #include "exec/memattrs.h"
  45 #include "exec/ram_addr.h"
  46 #include "sysemu/hostmem.h"
  47 #include "qemu/cutils.h"
  48 #include "qemu/mmap-alloc.h"
  49 #if defined(TARGET_PPC64)
  50 #include "hw/ppc/spapr_cpu_core.h"
  51 #endif
  52 #include "elf.h"
  53 #include "sysemu/kvm_int.h"
  54
  55 //#define DEBUG_KVM
  56
  57 #ifdef DEBUG_KVM
  58 #define DPRINTF(fmt, ...) \
  59     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
  60 #else
  61 #define DPRINTF(fmt, ...) \
  62     do { } while (0)
  63 #endif
  64
  65 #define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
  66
  67 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
  68     KVM_CAP_LAST_INFO
  69 };
  70
  71 static int cap_interrupt_unset = false;
  72 static int cap_interrupt_level = false;
  73 static int cap_segstate;
  74 static int cap_booke_sregs;
  75 static int cap_ppc_smt;
  76 static int cap_ppc_rma;
  77 static int cap_spapr_tce;
  78 static int cap_spapr_tce_64;
  79 static int cap_spapr_multitce;
  80 static int cap_spapr_vfio;
  81 static int cap_hior;
  82 static int cap_one_reg;
  83 static int cap_epr;
  84 static int cap_ppc_watchdog;
  85 static int cap_papr;
  86 static int cap_htab_fd;
  87 static int cap_fixup_hcalls;
  88 static int cap_htm;             /* Hardware transactional memory support */
  89 static int cap_mmu_radix;
  90 static int cap_mmu_hash_v3;
  91
  92 static uint32_t debug_inst_opcode;
  93
  94 /* XXX We have a race condition where we actually have a level triggered
  95  *     interrupt, but the infrastructure can't expose that yet, so the guest
  96  *     takes but ignores it, goes to sleep and never gets notified that there's
  97  *     still an interrupt pending.
  98  *
  99  *     As a quick workaround, let's just wake up again 20 ms after we injected
 100  *     an interrupt. That way we can assure that we're always reinjecting
 101  *     interrupts in case the guest swallowed them.
 102  */
 103 static QEMUTimer *idle_timer;
 104
 105 static void kvm_kick_cpu(void *opaque)
 106 {
 107     PowerPCCPU *cpu = opaque;
 108
 109     qemu_cpu_kick(CPU(cpu));
 110 }
 111
 112 /* Check whether we are running with KVM-PR (instead of KVM-HV).  This
 113  * should only be used for fallback tests - generally we should use
 114  * explicit capabilities for the features we want, rather than
 115  * assuming what is/isn't available depending on the KVM variant. */
 116 static bool kvmppc_is_pr(KVMState *ks)
 117 {
 118     /* Assume KVM-PR if the GET_PVINFO capability is available */
 119     return kvm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0;
 120 }
 121
 122 static int kvm_ppc_register_host_cpu_type(void);
 123
 124 int kvm_arch_init(MachineState *ms, KVMState *s)
 125 {
 126     cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
 127     cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
 128     cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
 129     cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
 130     cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
 131     cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
 132     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
 133     cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64);
 134     cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
 135     cap_spapr_vfio = false;
 136     cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
 137     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
 138     cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
 139     cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
 140     /* Note: we don't set cap_papr here, because this capability is
 141      * only activated after this by kvmppc_set_papr() */
 142     cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
 143     cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
 144     cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
 145     cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX);
 146     cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3);
 147
 148     if (!cap_interrupt_level) {
 149         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
 150                         "VM to stall at times!\n");
 151     }
 152
 153     kvm_ppc_register_host_cpu_type();
 154
 155     return 0;
 156 }
 157
 158 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
 159 {
 160     return 0;
 161 }
 162
 163 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
 164 {
 165     CPUPPCState *cenv = &cpu->env;
 166     CPUState *cs = CPU(cpu);
 167     struct kvm_sregs sregs;
 168     int ret;
 169
 170     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 171         /* What we're really trying to say is "if we're on BookE, we use
 172            the native PVR for now". This is the only sane way to check
 173            it though, so we potentially confuse users that they can run
 174            BookE guests on BookS. Let's hope nobody dares enough :) */
 175         return 0;
 176     } else {
 177         if (!cap_segstate) {
 178             fprintf(stderr, "kvm error: missing PVR setting capability\n");
 179             return -ENOSYS;
 180         }
 181     }
 182
 183     ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
 184     if (ret) {
 185         return ret;
 186     }
 187
 188     sregs.pvr = cenv->spr[SPR_PVR];
 189     return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
 190 }
 191
 192 /* Set up a shared TLB array with KVM */
 193 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
 194 {
 195     CPUPPCState *env = &cpu->env;
 196     CPUState *cs = CPU(cpu);
 197     struct kvm_book3e_206_tlb_params params = {};
 198     struct kvm_config_tlb cfg = {};
 199     unsigned int entries = 0;
 200     int ret, i;
 201
 202     if (!kvm_enabled() ||
 203         !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
 204         return 0;
 205     }
 206
 207     assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
 208
 209     for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
 210         params.tlb_sizes[i] = booke206_tlb_size(env, i);
 211         params.tlb_ways[i] = booke206_tlb_ways(env, i);
 212         entries += params.tlb_sizes[i];
 213     }
 214
 215     assert(entries == env->nb_tlb);
 216     assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
 217
 218     env->tlb_dirty = true;
 219
 220     cfg.array = (uintptr_t)env->tlb.tlbm;
 221     cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
 222     cfg.params = (uintptr_t)&params;
 223     cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
 224
 225     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
 226     if (ret < 0) {
 227         fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
 228                 __func__, strerror(-ret));
 229         return ret;
 230     }
 231
 232     env->kvm_sw_tlb = true;
 233     return 0;
 234 }
 235
 236
 237 #if defined(TARGET_PPC64)
 238 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
 239                                        struct kvm_ppc_smmu_info *info)
 240 {
 241     CPUPPCState *env = &cpu->env;
 242     CPUState *cs = CPU(cpu);
 243
 244     memset(info, 0, sizeof(*info));
 245
 246     /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
 247      * need to "guess" what the supported page sizes are.
 248      *
 249      * For that to work we make a few assumptions:
 250      *
 251      * - Check whether we are running "PR" KVM which only supports 4K
 252      *   and 16M pages, but supports them regardless of the backing
 253      *   store characteritics. We also don't support 1T segments.
 254      *
 255      *   This is safe as if HV KVM ever supports that capability or PR
 256      *   KVM grows supports for more page/segment sizes, those versions
 257      *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
 258      *   will not hit this fallback
 259      *
 260      * - Else we are running HV KVM. This means we only support page
 261      *   sizes that fit in the backing store. Additionally we only
 262      *   advertize 64K pages if the processor is ARCH 2.06 and we assume
 263      *   P7 encodings for the SLB and hash table. Here too, we assume
 264      *   support for any newer processor will mean a kernel that
 265      *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
 266      *   this fallback.
 267      */
 268     if (kvmppc_is_pr(cs->kvm_state)) {
 269         /* No flags */
 270         info->flags = 0;
 271         info->slb_size = 64;
 272
 273         /* Standard 4k base page size segment */
 274         info->sps[0].page_shift = 12;
 275         info->sps[0].slb_enc = 0;
 276         info->sps[0].enc[0].page_shift = 12;
 277         info->sps[0].enc[0].pte_enc = 0;
 278
 279         /* Standard 16M large page size segment */
 280         info->sps[1].page_shift = 24;
 281         info->sps[1].slb_enc = SLB_VSID_L;
 282         info->sps[1].enc[0].page_shift = 24;
 283         info->sps[1].enc[0].pte_enc = 0;
 284     } else {
 285         int i = 0;
 286
 287         /* HV KVM has backing store size restrictions */
 288         info->flags = KVM_PPC_PAGE_SIZES_REAL;
 289
 290         if (env->mmu_model & POWERPC_MMU_1TSEG) {
 291             info->flags |= KVM_PPC_1T_SEGMENTS;
 292         }
 293
 294         if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 ||
 295            POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) {
 296             info->slb_size = 32;
 297         } else {
 298             info->slb_size = 64;
 299         }
 300
 301         /* Standard 4k base page size segment */
 302         info->sps[i].page_shift = 12;
 303         info->sps[i].slb_enc = 0;
 304         info->sps[i].enc[0].page_shift = 12;
 305         info->sps[i].enc[0].pte_enc = 0;
 306         i++;
 307
 308         /* 64K on MMU 2.06 and later */
 309         if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 ||
 310             POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) {
 311             info->sps[i].page_shift = 16;
 312             info->sps[i].slb_enc = 0x110;
 313             info->sps[i].enc[0].page_shift = 16;
 314             info->sps[i].enc[0].pte_enc = 1;
 315             i++;
 316         }
 317
 318         /* Standard 16M large page size segment */
 319         info->sps[i].page_shift = 24;
 320         info->sps[i].slb_enc = SLB_VSID_L;
 321         info->sps[i].enc[0].page_shift = 24;
 322         info->sps[i].enc[0].pte_enc = 0;
 323     }
 324 }
 325
 326 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
 327 {
 328     CPUState *cs = CPU(cpu);
 329     int ret;
 330
 331     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
 332         ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
 333         if (ret == 0) {
 334             return;
 335         }
 336     }
 337
 338     kvm_get_fallback_smmu_info(cpu, info);
 339 }
 340
 341 struct ppc_radix_page_info *kvm_get_radix_page_info(void)
 342 {
 343     KVMState *s = KVM_STATE(current_machine->accelerator);
 344     struct ppc_radix_page_info *radix_page_info;
 345     struct kvm_ppc_rmmu_info rmmu_info;
 346     int i;
 347
 348     if (!kvm_check_extension(s, KVM_CAP_PPC_MMU_RADIX)) {
 349         return NULL;
 350     }
 351     if (kvm_vm_ioctl(s, KVM_PPC_GET_RMMU_INFO, &rmmu_info)) {
 352         return NULL;
 353     }
 354     radix_page_info = g_malloc0(sizeof(*radix_page_info));
 355     radix_page_info->count = 0;
 356     for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) {
 357         if (rmmu_info.ap_encodings[i]) {
 358             radix_page_info->entries[i] = rmmu_info.ap_encodings[i];
 359             radix_page_info->count++;
 360         }
 361     }
 362     return radix_page_info;
 363 }
 364
 365 target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu,
 366                                      bool radix, bool gtse,
 367                                      uint64_t proc_tbl)
 368 {
 369     CPUState *cs = CPU(cpu);
 370     int ret;
 371     uint64_t flags = 0;
 372     struct kvm_ppc_mmuv3_cfg cfg = {
 373         .process_table = proc_tbl,
 374     };
 375
 376     if (radix) {
 377         flags |= KVM_PPC_MMUV3_RADIX;
 378     }
 379     if (gtse) {
 380         flags |= KVM_PPC_MMUV3_GTSE;
 381     }
 382     cfg.flags = flags;
 383     ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_CONFIGURE_V3_MMU, &cfg);
 384     switch (ret) {
 385     case 0:
 386         return H_SUCCESS;
 387     case -EINVAL:
 388         return H_PARAMETER;
 389     case -ENODEV:
 390         return H_NOT_AVAILABLE;
 391     default:
 392         return H_HARDWARE;
 393     }
 394 }
 395
 396 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
 397 {
 398     if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
 399         return true;
 400     }
 401
 402     return (1ul << shift) <= rampgsize;
 403 }
 404
 405 static long max_cpu_page_size;
 406
 407 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 408 {
 409     static struct kvm_ppc_smmu_info smmu_info;
 410     static bool has_smmu_info;
 411     CPUPPCState *env = &cpu->env;
 412     int iq, ik, jq, jk;
 413     bool has_64k_pages = false;
 414
 415     /* We only handle page sizes for 64-bit server guests for now */
 416     if (!(env->mmu_model & POWERPC_MMU_64)) {
 417         return;
 418     }
 419
 420     /* Collect MMU info from kernel if not already */
 421     if (!has_smmu_info) {
 422         kvm_get_smmu_info(cpu, &smmu_info);
 423         has_smmu_info = true;
 424     }
 425
 426     if (!max_cpu_page_size) {
 427         max_cpu_page_size = qemu_getrampagesize();
 428     }
 429
 430     /* Convert to QEMU form */
 431     memset(&env->sps, 0, sizeof(env->sps));
 432
 433     /* If we have HV KVM, we need to forbid CI large pages if our
 434      * host page size is smaller than 64K.
 435      */
 436     if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
 437         env->ci_large_pages = getpagesize() >= 0x10000;
 438     }
 439
 440     /*
 441      * XXX This loop should be an entry wide AND of the capabilities that
 442      *     the selected CPU has with the capabilities that KVM supports.
 443      */
 444     for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
 445         struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
 446         struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
 447
 448         if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
 449                                  ksps->page_shift)) {
 450             continue;
 451         }
 452         qsps->page_shift = ksps->page_shift;
 453         qsps->slb_enc = ksps->slb_enc;
 454         for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
 455             if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
 456                                      ksps->enc[jk].page_shift)) {
 457                 continue;
 458             }
 459             if (ksps->enc[jk].page_shift == 16) {
 460                 has_64k_pages = true;
 461             }
 462             qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
 463             qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
 464             if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
 465                 break;
 466             }
 467         }
 468         if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
 469             break;
 470         }
 471     }
 472     env->slb_nr = smmu_info.slb_size;
 473     if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
 474         env->mmu_model &= ~POWERPC_MMU_1TSEG;
 475     }
 476     if (!has_64k_pages) {
 477         env->mmu_model &= ~POWERPC_MMU_64K;
 478     }
 479 }
 480
 481 bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path)
 482 {
 483     Object *mem_obj = object_resolve_path(obj_path, NULL);
 484     char *mempath = object_property_get_str(mem_obj, "mem-path", NULL);
 485     long pagesize;
 486
 487     if (mempath) {
 488         pagesize = qemu_mempath_getpagesize(mempath);
 489         g_free(mempath);
 490     } else {
 491         pagesize = getpagesize();
 492     }
 493
 494     return pagesize >= max_cpu_page_size;
 495 }
 496
 497 #else /* defined (TARGET_PPC64) */
 498
 499 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 500 {
 501 }
 502
 503 bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path)
 504 {
 505     return true;
 506 }
 507
 508 #endif /* !defined (TARGET_PPC64) */
 509
 510 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
 511 {
 512     return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
 513 }
 514
 515 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
 516  * book3s supports only 1 watchpoint, so array size
 517  * of 4 is sufficient for now.
 518  */
 519 #define MAX_HW_BKPTS 4
 520
 521 static struct HWBreakpoint {
 522     target_ulong addr;
 523     int type;
 524 } hw_debug_points[MAX_HW_BKPTS];
 525
 526 static CPUWatchpoint hw_watchpoint;
 527
 528 /* Default there is no breakpoint and watchpoint supported */
 529 static int max_hw_breakpoint;
 530 static int max_hw_watchpoint;
 531 static int nb_hw_breakpoint;
 532 static int nb_hw_watchpoint;
 533
 534 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
 535 {
 536     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 537         max_hw_breakpoint = 2;
 538         max_hw_watchpoint = 2;
 539     }
 540
 541     if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
 542         fprintf(stderr, "Error initializing h/w breakpoints\n");
 543         return;
 544     }
 545 }
 546
 547 int kvm_arch_init_vcpu(CPUState *cs)
 548 {
 549     PowerPCCPU *cpu = POWERPC_CPU(cs);
 550     CPUPPCState *cenv = &cpu->env;
 551     int ret;
 552
 553     /* Gather server mmu info from KVM and update the CPU state */
 554     kvm_fixup_page_sizes(cpu);
 555
 556     /* Synchronize sregs with kvm */
 557     ret = kvm_arch_sync_sregs(cpu);
 558     if (ret) {
 559         if (ret == -EINVAL) {
 560             error_report("Register sync failed... If you're using kvm-hv.ko,"
 561                          " only \"-cpu host\" is possible");
 562         }
 563         return ret;
 564     }
 565
 566     idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
 567
 568     switch (cenv->mmu_model) {
 569     case POWERPC_MMU_BOOKE206:
 570         /* This target supports access to KVM's guest TLB */
 571         ret = kvm_booke206_tlb_init(cpu);
 572         break;
 573     case POWERPC_MMU_2_07:
 574         if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) {
 575             /* KVM-HV has transactional memory on POWER8 also without the
 576              * KVM_CAP_PPC_HTM extension, so enable it here instead as
 577              * long as it's availble to userspace on the host. */
 578             if (qemu_getauxval(AT_HWCAP2) & PPC_FEATURE2_HAS_HTM) {
 579                 cap_htm = true;
 580             }
 581         }
 582         break;
 583     default:
 584         break;
 585     }
 586
 587     kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
 588     kvmppc_hw_debug_points_init(cenv);
 589
 590     return ret;
 591 }
 592
 593 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
 594 {
 595     CPUPPCState *env = &cpu->env;
 596     CPUState *cs = CPU(cpu);
 597     struct kvm_dirty_tlb dirty_tlb;
 598     unsigned char *bitmap;
 599     int ret;
 600
 601     if (!env->kvm_sw_tlb) {
 602         return;
 603     }
 604
 605     bitmap = g_malloc((env->nb_tlb + 7) / 8);
 606     memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
 607
 608     dirty_tlb.bitmap = (uintptr_t)bitmap;
 609     dirty_tlb.num_dirty = env->nb_tlb;
 610
 611     ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
 612     if (ret) {
 613         fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
 614                 __func__, strerror(-ret));
 615     }
 616
 617     g_free(bitmap);
 618 }
 619
 620 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
 621 {
 622     PowerPCCPU *cpu = POWERPC_CPU(cs);
 623     CPUPPCState *env = &cpu->env;
 624     union {
 625         uint32_t u32;
 626         uint64_t u64;
 627     } val;
 628     struct kvm_one_reg reg = {
 629         .id = id,
 630         .addr = (uintptr_t) &val,
 631     };
 632     int ret;
 633
 634     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 635     if (ret != 0) {
 636         trace_kvm_failed_spr_get(spr, strerror(errno));
 637     } else {
 638         switch (id & KVM_REG_SIZE_MASK) {
 639         case KVM_REG_SIZE_U32:
 640             env->spr[spr] = val.u32;
 641             break;
 642
 643         case KVM_REG_SIZE_U64:
 644             env->spr[spr] = val.u64;
 645             break;
 646
 647         default:
 648             /* Don't handle this size yet */
 649             abort();
 650         }
 651     }
 652 }
 653
 654 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
 655 {
 656     PowerPCCPU *cpu = POWERPC_CPU(cs);
 657     CPUPPCState *env = &cpu->env;
 658     union {
 659         uint32_t u32;
 660         uint64_t u64;
 661     } val;
 662     struct kvm_one_reg reg = {
 663         .id = id,
 664         .addr = (uintptr_t) &val,
 665     };
 666     int ret;
 667
 668     switch (id & KVM_REG_SIZE_MASK) {
 669     case KVM_REG_SIZE_U32:
 670         val.u32 = env->spr[spr];
 671         break;
 672
 673     case KVM_REG_SIZE_U64:
 674         val.u64 = env->spr[spr];
 675         break;
 676
 677     default:
 678         /* Don't handle this size yet */
 679         abort();
 680     }
 681
 682     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 683     if (ret != 0) {
 684         trace_kvm_failed_spr_set(spr, strerror(errno));
 685     }
 686 }
 687
 688 static int kvm_put_fp(CPUState *cs)
 689 {
 690     PowerPCCPU *cpu = POWERPC_CPU(cs);
 691     CPUPPCState *env = &cpu->env;
 692     struct kvm_one_reg reg;
 693     int i;
 694     int ret;
 695
 696     if (env->insns_flags & PPC_FLOAT) {
 697         uint64_t fpscr = env->fpscr;
 698         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 699
 700         reg.id = KVM_REG_PPC_FPSCR;
 701         reg.addr = (uintptr_t)&fpscr;
 702         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 703         if (ret < 0) {
 704             DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
 705             return ret;
 706         }
 707
 708         for (i = 0; i < 32; i++) {
 709             uint64_t vsr[2];
 710
 711 #ifdef HOST_WORDS_BIGENDIAN
 712             vsr[0] = float64_val(env->fpr[i]);
 713             vsr[1] = env->vsr[i];
 714 #else
 715             vsr[0] = env->vsr[i];
 716             vsr[1] = float64_val(env->fpr[i]);
 717 #endif
 718             reg.addr = (uintptr_t) &vsr;
 719             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 720
 721             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 722             if (ret < 0) {
 723                 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
 724                         i, strerror(errno));
 725                 return ret;
 726             }
 727         }
 728     }
 729
 730     if (env->insns_flags & PPC_ALTIVEC) {
 731         reg.id = KVM_REG_PPC_VSCR;
 732         reg.addr = (uintptr_t)&env->vscr;
 733         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 734         if (ret < 0) {
 735             DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
 736             return ret;
 737         }
 738
 739         for (i = 0; i < 32; i++) {
 740             reg.id = KVM_REG_PPC_VR(i);
 741             reg.addr = (uintptr_t)&env->avr[i];
 742             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 743             if (ret < 0) {
 744                 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
 745                 return ret;
 746             }
 747         }
 748     }
 749
 750     return 0;
 751 }
 752
 753 static int kvm_get_fp(CPUState *cs)
 754 {
 755     PowerPCCPU *cpu = POWERPC_CPU(cs);
 756     CPUPPCState *env = &cpu->env;
 757     struct kvm_one_reg reg;
 758     int i;
 759     int ret;
 760
 761     if (env->insns_flags & PPC_FLOAT) {
 762         uint64_t fpscr;
 763         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 764
 765         reg.id = KVM_REG_PPC_FPSCR;
 766         reg.addr = (uintptr_t)&fpscr;
 767         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 768         if (ret < 0) {
 769             DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
 770             return ret;
 771         } else {
 772             env->fpscr = fpscr;
 773         }
 774
 775         for (i = 0; i < 32; i++) {
 776             uint64_t vsr[2];
 777
 778             reg.addr = (uintptr_t) &vsr;
 779             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 780
 781             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 782             if (ret < 0) {
 783                 DPRINTF("Unable to get %s%d from KVM: %s\n",
 784                         vsx ? "VSR" : "FPR", i, strerror(errno));
 785                 return ret;
 786             } else {
 787 #ifdef HOST_WORDS_BIGENDIAN
 788                 env->fpr[i] = vsr[0];
 789                 if (vsx) {
 790                     env->vsr[i] = vsr[1];
 791                 }
 792 #else
 793                 env->fpr[i] = vsr[1];
 794                 if (vsx) {
 795                     env->vsr[i] = vsr[0];
 796                 }
 797 #endif
 798             }
 799         }
 800     }
 801
 802     if (env->insns_flags & PPC_ALTIVEC) {
 803         reg.id = KVM_REG_PPC_VSCR;
 804         reg.addr = (uintptr_t)&env->vscr;
 805         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 806         if (ret < 0) {
 807             DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
 808             return ret;
 809         }
 810
 811         for (i = 0; i < 32; i++) {
 812             reg.id = KVM_REG_PPC_VR(i);
 813             reg.addr = (uintptr_t)&env->avr[i];
 814             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 815             if (ret < 0) {
 816                 DPRINTF("Unable to get VR%d from KVM: %s\n",
 817                         i, strerror(errno));
 818                 return ret;
 819             }
 820         }
 821     }
 822
 823     return 0;
 824 }
 825
 826 #if defined(TARGET_PPC64)
 827 static int kvm_get_vpa(CPUState *cs)
 828 {
 829     PowerPCCPU *cpu = POWERPC_CPU(cs);
 830     CPUPPCState *env = &cpu->env;
 831     struct kvm_one_reg reg;
 832     int ret;
 833
 834     reg.id = KVM_REG_PPC_VPA_ADDR;
 835     reg.addr = (uintptr_t)&env->vpa_addr;
 836     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 837     if (ret < 0) {
 838         DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
 839         return ret;
 840     }
 841
 842     assert((uintptr_t)&env->slb_shadow_size
 843            == ((uintptr_t)&env->slb_shadow_addr + 8));
 844     reg.id = KVM_REG_PPC_VPA_SLB;
 845     reg.addr = (uintptr_t)&env->slb_shadow_addr;
 846     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 847     if (ret < 0) {
 848         DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
 849                 strerror(errno));
 850         return ret;
 851     }
 852
 853     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 854     reg.id = KVM_REG_PPC_VPA_DTL;
 855     reg.addr = (uintptr_t)&env->dtl_addr;
 856     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 857     if (ret < 0) {
 858         DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
 859                 strerror(errno));
 860         return ret;
 861     }
 862
 863     return 0;
 864 }
 865
 866 static int kvm_put_vpa(CPUState *cs)
 867 {
 868     PowerPCCPU *cpu = POWERPC_CPU(cs);
 869     CPUPPCState *env = &cpu->env;
 870     struct kvm_one_reg reg;
 871     int ret;
 872
 873     /* SLB shadow or DTL can't be registered unless a master VPA is
 874      * registered.  That means when restoring state, if a VPA *is*
 875      * registered, we need to set that up first.  If not, we need to
 876      * deregister the others before deregistering the master VPA */
 877     assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
 878
 879     if (env->vpa_addr) {
 880         reg.id = KVM_REG_PPC_VPA_ADDR;
 881         reg.addr = (uintptr_t)&env->vpa_addr;
 882         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 883         if (ret < 0) {
 884             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 885             return ret;
 886         }
 887     }
 888
 889     assert((uintptr_t)&env->slb_shadow_size
 890            == ((uintptr_t)&env->slb_shadow_addr + 8));
 891     reg.id = KVM_REG_PPC_VPA_SLB;
 892     reg.addr = (uintptr_t)&env->slb_shadow_addr;
 893     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 894     if (ret < 0) {
 895         DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
 896         return ret;
 897     }
 898
 899     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 900     reg.id = KVM_REG_PPC_VPA_DTL;
 901     reg.addr = (uintptr_t)&env->dtl_addr;
 902     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 903     if (ret < 0) {
 904         DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
 905                 strerror(errno));
 906         return ret;
 907     }
 908
 909     if (!env->vpa_addr) {
 910         reg.id = KVM_REG_PPC_VPA_ADDR;
 911         reg.addr = (uintptr_t)&env->vpa_addr;
 912         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 913         if (ret < 0) {
 914             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 915             return ret;
 916         }
 917     }
 918
 919     return 0;
 920 }
 921 #endif /* TARGET_PPC64 */
 922
 923 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
 924 {
 925     CPUPPCState *env = &cpu->env;
 926     struct kvm_sregs sregs;
 927     int i;
 928
 929     sregs.pvr = env->spr[SPR_PVR];
 930
 931     sregs.u.s.sdr1 = env->spr[SPR_SDR1];
 932
 933     /* Sync SLB */
 934 #ifdef TARGET_PPC64
 935     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
 936         sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
 937         if (env->slb[i].esid & SLB_ESID_V) {
 938             sregs.u.s.ppc64.slb[i].slbe |= i;
 939         }
 940         sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
 941     }
 942 #endif
 943
 944     /* Sync SRs */
 945     for (i = 0; i < 16; i++) {
 946         sregs.u.s.ppc32.sr[i] = env->sr[i];
 947     }
 948
 949     /* Sync BATs */
 950     for (i = 0; i < 8; i++) {
 951         /* Beware. We have to swap upper and lower bits here */
 952         sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
 953             | env->DBAT[1][i];
 954         sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
 955             | env->IBAT[1][i];
 956     }
 957
 958     return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
 959 }
 960
 961 int kvm_arch_put_registers(CPUState *cs, int level)
 962 {
 963     PowerPCCPU *cpu = POWERPC_CPU(cs);
 964     CPUPPCState *env = &cpu->env;
 965     struct kvm_regs regs;
 966     int ret;
 967     int i;
 968
 969     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
 970     if (ret < 0) {
 971         return ret;
 972     }
 973
 974     regs.ctr = env->ctr;
 975     regs.lr  = env->lr;
 976     regs.xer = cpu_read_xer(env);
 977     regs.msr = env->msr;
 978     regs.pc = env->nip;
 979
 980     regs.srr0 = env->spr[SPR_SRR0];
 981     regs.srr1 = env->spr[SPR_SRR1];
 982
 983     regs.sprg0 = env->spr[SPR_SPRG0];
 984     regs.sprg1 = env->spr[SPR_SPRG1];
 985     regs.sprg2 = env->spr[SPR_SPRG2];
 986     regs.sprg3 = env->spr[SPR_SPRG3];
 987     regs.sprg4 = env->spr[SPR_SPRG4];
 988     regs.sprg5 = env->spr[SPR_SPRG5];
 989     regs.sprg6 = env->spr[SPR_SPRG6];
 990     regs.sprg7 = env->spr[SPR_SPRG7];
 991
 992     regs.pid = env->spr[SPR_BOOKE_PID];
 993
 994     for (i = 0;i < 32; i++)
 995         regs.gpr[i] = env->gpr[i];
 996
 997     regs.cr = 0;
 998     for (i = 0; i < 8; i++) {
 999         regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
1000     }
1001
1002     ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
1003     if (ret < 0)
1004         return ret;
1005
1006     kvm_put_fp(cs);
1007
1008     if (env->tlb_dirty) {
1009         kvm_sw_tlb_put(cpu);
1010         env->tlb_dirty = false;
1011     }
1012
1013     if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
1014         ret = kvmppc_put_books_sregs(cpu);
1015         if (ret < 0) {
1016             return ret;
1017         }
1018     }
1019
1020     if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
1021         kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1022     }
1023
1024     if (cap_one_reg) {
1025         int i;
1026
1027         /* We deliberately ignore errors here, for kernels which have
1028          * the ONE_REG calls, but don't support the specific
1029          * registers, there's a reasonable chance things will still
1030          * work, at least until we try to migrate. */
1031         for (i = 0; i < 1024; i++) {
1032             uint64_t id = env->spr_cb[i].one_reg_id;
1033
1034             if (id != 0) {
1035                 kvm_put_one_spr(cs, id, i);
1036             }
1037         }
1038
1039 #ifdef TARGET_PPC64
1040         if (msr_ts) {
1041             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1042                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1043             }
1044             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1045                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1046             }
1047             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1048             kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1049             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1050             kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1051             kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1052             kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1053             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1054             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1055             kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1056             kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1057         }
1058
1059         if (cap_papr) {
1060             if (kvm_put_vpa(cs) < 0) {
1061                 DPRINTF("Warning: Unable to set VPA information to KVM\n");
1062             }
1063         }
1064
1065         kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1066 #endif /* TARGET_PPC64 */
1067     }
1068
1069     return ret;
1070 }
1071
1072 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1073 {
1074      env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1075 }
1076
1077 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1078 {
1079     CPUPPCState *env = &cpu->env;
1080     struct kvm_sregs sregs;
1081     int ret;
1082
1083     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1084     if (ret < 0) {
1085         return ret;
1086     }
1087
1088     if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1089         env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1090         env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1091         env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1092         env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1093         env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1094         env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1095         env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1096         env->spr[SPR_DECR] = sregs.u.e.dec;
1097         env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1098         env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1099         env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1100     }
1101
1102     if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1103         env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1104         env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1105         env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1106         env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1107         env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1108     }
1109
1110     if (sregs.u.e.features & KVM_SREGS_E_64) {
1111         env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1112     }
1113
1114     if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1115         env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1116     }
1117
1118     if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1119         env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1120         kvm_sync_excp(env, POWERPC_EXCP_CRITICAL,  SPR_BOOKE_IVOR0);
1121         env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1122         kvm_sync_excp(env, POWERPC_EXCP_MCHECK,  SPR_BOOKE_IVOR1);
1123         env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1124         kvm_sync_excp(env, POWERPC_EXCP_DSI,  SPR_BOOKE_IVOR2);
1125         env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1126         kvm_sync_excp(env, POWERPC_EXCP_ISI,  SPR_BOOKE_IVOR3);
1127         env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1128         kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL,  SPR_BOOKE_IVOR4);
1129         env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1130         kvm_sync_excp(env, POWERPC_EXCP_ALIGN,  SPR_BOOKE_IVOR5);
1131         env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1132         kvm_sync_excp(env, POWERPC_EXCP_PROGRAM,  SPR_BOOKE_IVOR6);
1133         env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1134         kvm_sync_excp(env, POWERPC_EXCP_FPU,  SPR_BOOKE_IVOR7);
1135         env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1136         kvm_sync_excp(env, POWERPC_EXCP_SYSCALL,  SPR_BOOKE_IVOR8);
1137         env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1138         kvm_sync_excp(env, POWERPC_EXCP_APU,  SPR_BOOKE_IVOR9);
1139         env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1140         kvm_sync_excp(env, POWERPC_EXCP_DECR,  SPR_BOOKE_IVOR10);
1141         env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1142         kvm_sync_excp(env, POWERPC_EXCP_FIT,  SPR_BOOKE_IVOR11);
1143         env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1144         kvm_sync_excp(env, POWERPC_EXCP_WDT,  SPR_BOOKE_IVOR12);
1145         env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1146         kvm_sync_excp(env, POWERPC_EXCP_DTLB,  SPR_BOOKE_IVOR13);
1147         env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1148         kvm_sync_excp(env, POWERPC_EXCP_ITLB,  SPR_BOOKE_IVOR14);
1149         env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1150         kvm_sync_excp(env, POWERPC_EXCP_DEBUG,  SPR_BOOKE_IVOR15);
1151
1152         if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1153             env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1154             kvm_sync_excp(env, POWERPC_EXCP_SPEU,  SPR_BOOKE_IVOR32);
1155             env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1156             kvm_sync_excp(env, POWERPC_EXCP_EFPDI,  SPR_BOOKE_IVOR33);
1157             env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1158             kvm_sync_excp(env, POWERPC_EXCP_EFPRI,  SPR_BOOKE_IVOR34);
1159         }
1160
1161         if (sregs.u.e.features & KVM_SREGS_E_PM) {
1162             env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1163             kvm_sync_excp(env, POWERPC_EXCP_EPERFM,  SPR_BOOKE_IVOR35);
1164         }
1165
1166         if (sregs.u.e.features & KVM_SREGS_E_PC) {
1167             env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1168             kvm_sync_excp(env, POWERPC_EXCP_DOORI,  SPR_BOOKE_IVOR36);
1169             env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1170             kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1171         }
1172     }
1173
1174     if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1175         env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1176         env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1177         env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1178         env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1179         env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1180         env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1181         env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1182         env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1183         env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1184         env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1185     }
1186
1187     if (sregs.u.e.features & KVM_SREGS_EXP) {
1188         env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1189     }
1190
1191     if (sregs.u.e.features & KVM_SREGS_E_PD) {
1192         env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1193         env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1194     }
1195
1196     if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1197         env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1198         env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1199         env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1200
1201         if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1202             env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1203             env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1204         }
1205     }
1206
1207     return 0;
1208 }
1209
1210 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1211 {
1212     CPUPPCState *env = &cpu->env;
1213     struct kvm_sregs sregs;
1214     int ret;
1215     int i;
1216
1217     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1218     if (ret < 0) {
1219         return ret;
1220     }
1221
1222     if (!cpu->vhyp) {
1223         ppc_store_sdr1(env, sregs.u.s.sdr1);
1224     }
1225
1226     /* Sync SLB */
1227 #ifdef TARGET_PPC64
1228     /*
1229      * The packed SLB array we get from KVM_GET_SREGS only contains
1230      * information about valid entries. So we flush our internal copy
1231      * to get rid of stale ones, then put all valid SLB entries back
1232      * in.
1233      */
1234     memset(env->slb, 0, sizeof(env->slb));
1235     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1236         target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1237         target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1238         /*
1239          * Only restore valid entries
1240          */
1241         if (rb & SLB_ESID_V) {
1242             ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1243         }
1244     }
1245 #endif
1246
1247     /* Sync SRs */
1248     for (i = 0; i < 16; i++) {
1249         env->sr[i] = sregs.u.s.ppc32.sr[i];
1250     }
1251
1252     /* Sync BATs */
1253     for (i = 0; i < 8; i++) {
1254         env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1255         env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1256         env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1257         env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1258     }
1259
1260     return 0;
1261 }
1262
1263 int kvm_arch_get_registers(CPUState *cs)
1264 {
1265     PowerPCCPU *cpu = POWERPC_CPU(cs);
1266     CPUPPCState *env = &cpu->env;
1267     struct kvm_regs regs;
1268     uint32_t cr;
1269     int i, ret;
1270
1271     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1272     if (ret < 0)
1273         return ret;
1274
1275     cr = regs.cr;
1276     for (i = 7; i >= 0; i--) {
1277         env->crf[i] = cr & 15;
1278         cr >>= 4;
1279     }
1280
1281     env->ctr = regs.ctr;
1282     env->lr = regs.lr;
1283     cpu_write_xer(env, regs.xer);
1284     env->msr = regs.msr;
1285     env->nip = regs.pc;
1286
1287     env->spr[SPR_SRR0] = regs.srr0;
1288     env->spr[SPR_SRR1] = regs.srr1;
1289
1290     env->spr[SPR_SPRG0] = regs.sprg0;
1291     env->spr[SPR_SPRG1] = regs.sprg1;
1292     env->spr[SPR_SPRG2] = regs.sprg2;
1293     env->spr[SPR_SPRG3] = regs.sprg3;
1294     env->spr[SPR_SPRG4] = regs.sprg4;
1295     env->spr[SPR_SPRG5] = regs.sprg5;
1296     env->spr[SPR_SPRG6] = regs.sprg6;
1297     env->spr[SPR_SPRG7] = regs.sprg7;
1298
1299     env->spr[SPR_BOOKE_PID] = regs.pid;
1300
1301     for (i = 0;i < 32; i++)
1302         env->gpr[i] = regs.gpr[i];
1303
1304     kvm_get_fp(cs);
1305
1306     if (cap_booke_sregs) {
1307         ret = kvmppc_get_booke_sregs(cpu);
1308         if (ret < 0) {
1309             return ret;
1310         }
1311     }
1312
1313     if (cap_segstate) {
1314         ret = kvmppc_get_books_sregs(cpu);
1315         if (ret < 0) {
1316             return ret;
1317         }
1318     }
1319
1320     if (cap_hior) {
1321         kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1322     }
1323
1324     if (cap_one_reg) {
1325         int i;
1326
1327         /* We deliberately ignore errors here, for kernels which have
1328          * the ONE_REG calls, but don't support the specific
1329          * registers, there's a reasonable chance things will still
1330          * work, at least until we try to migrate. */
1331         for (i = 0; i < 1024; i++) {
1332             uint64_t id = env->spr_cb[i].one_reg_id;
1333
1334             if (id != 0) {
1335                 kvm_get_one_spr(cs, id, i);
1336             }
1337         }
1338
1339 #ifdef TARGET_PPC64
1340         if (msr_ts) {
1341             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1342                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1343             }
1344             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1345                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1346             }
1347             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1348             kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1349             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1350             kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1351             kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1352             kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1353             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1354             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1355             kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1356             kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1357         }
1358
1359         if (cap_papr) {
1360             if (kvm_get_vpa(cs) < 0) {
1361                 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1362             }
1363         }
1364
1365         kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1366 #endif
1367     }
1368
1369     return 0;
1370 }
1371
1372 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1373 {
1374     unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1375
1376     if (irq != PPC_INTERRUPT_EXT) {
1377         return 0;
1378     }
1379
1380     if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1381         return 0;
1382     }
1383
1384     kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1385
1386     return 0;
1387 }
1388
1389 #if defined(TARGET_PPCEMB)
1390 #define PPC_INPUT_INT PPC40x_INPUT_INT
1391 #elif defined(TARGET_PPC64)
1392 #define PPC_INPUT_INT PPC970_INPUT_INT
1393 #else
1394 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1395 #endif
1396
1397 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1398 {
1399     PowerPCCPU *cpu = POWERPC_CPU(cs);
1400     CPUPPCState *env = &cpu->env;
1401     int r;
1402     unsigned irq;
1403
1404     qemu_mutex_lock_iothread();
1405
1406     /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1407      * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1408     if (!cap_interrupt_level &&
1409         run->ready_for_interrupt_injection &&
1410         (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1411         (env->irq_input_state & (1<<PPC_INPUT_INT)))
1412     {
1413         /* For now KVM disregards the 'irq' argument. However, in the
1414          * future KVM could cache it in-kernel to avoid a heavyweight exit
1415          * when reading the UIC.
1416          */
1417         irq = KVM_INTERRUPT_SET;
1418
1419         DPRINTF("injected interrupt %d\n", irq);
1420         r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1421         if (r < 0) {
1422             printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1423         }
1424
1425         /* Always wake up soon in case the interrupt was level based */
1426         timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1427                        (NANOSECONDS_PER_SECOND / 50));
1428     }
1429
1430     /* We don't know if there are more interrupts pending after this. However,
1431      * the guest will return to userspace in the course of handling this one
1432      * anyways, so we will get a chance to deliver the rest. */
1433
1434     qemu_mutex_unlock_iothread();
1435 }
1436
1437 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1438 {
1439     return MEMTXATTRS_UNSPECIFIED;
1440 }
1441
1442 int kvm_arch_process_async_events(CPUState *cs)
1443 {
1444     return cs->halted;
1445 }
1446
1447 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1448 {
1449     CPUState *cs = CPU(cpu);
1450     CPUPPCState *env = &cpu->env;
1451
1452     if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1453         cs->halted = 1;
1454         cs->exception_index = EXCP_HLT;
1455     }
1456
1457     return 0;
1458 }
1459
1460 /* map dcr access to existing qemu dcr emulation */
1461 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1462 {
1463     if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1464         fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1465
1466     return 0;
1467 }
1468
1469 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1470 {
1471     if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1472         fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1473
1474     return 0;
1475 }
1476
1477 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1478 {
1479     /* Mixed endian case is not handled */
1480     uint32_t sc = debug_inst_opcode;
1481
1482     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1483                             sizeof(sc), 0) ||
1484         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1485         return -EINVAL;
1486     }
1487
1488     return 0;
1489 }
1490
1491 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1492 {
1493     uint32_t sc;
1494
1495     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1496         sc != debug_inst_opcode ||
1497         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1498                             sizeof(sc), 1)) {
1499         return -EINVAL;
1500     }
1501
1502     return 0;
1503 }
1504
1505 static int find_hw_breakpoint(target_ulong addr, int type)
1506 {
1507     int n;
1508
1509     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1510            <= ARRAY_SIZE(hw_debug_points));
1511
1512     for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1513         if (hw_debug_points[n].addr == addr &&
1514              hw_debug_points[n].type == type) {
1515             return n;
1516         }
1517     }
1518
1519     return -1;
1520 }
1521
1522 static int find_hw_watchpoint(target_ulong addr, int *flag)
1523 {
1524     int n;
1525
1526     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1527     if (n >= 0) {
1528         *flag = BP_MEM_ACCESS;
1529         return n;
1530     }
1531
1532     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1533     if (n >= 0) {
1534         *flag = BP_MEM_WRITE;
1535         return n;
1536     }
1537
1538     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1539     if (n >= 0) {
1540         *flag = BP_MEM_READ;
1541         return n;
1542     }
1543
1544     return -1;
1545 }
1546
1547 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1548                                   target_ulong len, int type)
1549 {
1550     if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1551         return -ENOBUFS;
1552     }
1553
1554     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1555     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1556
1557     switch (type) {
1558     case GDB_BREAKPOINT_HW:
1559         if (nb_hw_breakpoint >= max_hw_breakpoint) {
1560             return -ENOBUFS;
1561         }
1562
1563         if (find_hw_breakpoint(addr, type) >= 0) {
1564             return -EEXIST;
1565         }
1566
1567         nb_hw_breakpoint++;
1568         break;
1569
1570     case GDB_WATCHPOINT_WRITE:
1571     case GDB_WATCHPOINT_READ:
1572     case GDB_WATCHPOINT_ACCESS:
1573         if (nb_hw_watchpoint >= max_hw_watchpoint) {
1574             return -ENOBUFS;
1575         }
1576
1577         if (find_hw_breakpoint(addr, type) >= 0) {
1578             return -EEXIST;
1579         }
1580
1581         nb_hw_watchpoint++;
1582         break;
1583
1584     default:
1585         return -ENOSYS;
1586     }
1587
1588     return 0;
1589 }
1590
1591 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1592                                   target_ulong len, int type)
1593 {
1594     int n;
1595
1596     n = find_hw_breakpoint(addr, type);
1597     if (n < 0) {
1598         return -ENOENT;
1599     }
1600
1601     switch (type) {
1602     case GDB_BREAKPOINT_HW:
1603         nb_hw_breakpoint--;
1604         break;
1605
1606     case GDB_WATCHPOINT_WRITE:
1607     case GDB_WATCHPOINT_READ:
1608     case GDB_WATCHPOINT_ACCESS:
1609         nb_hw_watchpoint--;
1610         break;
1611
1612     default:
1613         return -ENOSYS;
1614     }
1615     hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1616
1617     return 0;
1618 }
1619
1620 void kvm_arch_remove_all_hw_breakpoints(void)
1621 {
1622     nb_hw_breakpoint = nb_hw_watchpoint = 0;
1623 }
1624
1625 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1626 {
1627     int n;
1628
1629     /* Software Breakpoint updates */
1630     if (kvm_sw_breakpoints_active(cs)) {
1631         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1632     }
1633
1634     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1635            <= ARRAY_SIZE(hw_debug_points));
1636     assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1637
1638     if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1639         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1640         memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1641         for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1642             switch (hw_debug_points[n].type) {
1643             case GDB_BREAKPOINT_HW:
1644                 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1645                 break;
1646             case GDB_WATCHPOINT_WRITE:
1647                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1648                 break;
1649             case GDB_WATCHPOINT_READ:
1650                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1651                 break;
1652             case GDB_WATCHPOINT_ACCESS:
1653                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1654                                         KVMPPC_DEBUG_WATCH_READ;
1655                 break;
1656             default:
1657                 cpu_abort(cs, "Unsupported breakpoint type\n");
1658             }
1659             dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1660         }
1661     }
1662 }
1663
1664 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1665 {
1666     CPUState *cs = CPU(cpu);
1667     CPUPPCState *env = &cpu->env;
1668     struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1669     int handle = 0;
1670     int n;
1671     int flag = 0;
1672
1673     if (cs->singlestep_enabled) {
1674         handle = 1;
1675     } else if (arch_info->status) {
1676         if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1677             if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1678                 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1679                 if (n >= 0) {
1680                     handle = 1;
1681                 }
1682             } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1683                                             KVMPPC_DEBUG_WATCH_WRITE)) {
1684                 n = find_hw_watchpoint(arch_info->address,  &flag);
1685                 if (n >= 0) {
1686                     handle = 1;
1687                     cs->watchpoint_hit = &hw_watchpoint;
1688                     hw_watchpoint.vaddr = hw_debug_points[n].addr;
1689                     hw_watchpoint.flags = flag;
1690                 }
1691             }
1692         }
1693     } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1694         handle = 1;
1695     } else {
1696         /* QEMU is not able to handle debug exception, so inject
1697          * program exception to guest;
1698          * Yes program exception NOT debug exception !!
1699          * When QEMU is using debug resources then debug exception must
1700          * be always set. To achieve this we set MSR_DE and also set
1701          * MSRP_DEP so guest cannot change MSR_DE.
1702          * When emulating debug resource for guest we want guest
1703          * to control MSR_DE (enable/disable debug interrupt on need).
1704          * Supporting both configurations are NOT possible.
1705          * So the result is that we cannot share debug resources
1706          * between QEMU and Guest on BOOKE architecture.
1707          * In the current design QEMU gets the priority over guest,
1708          * this means that if QEMU is using debug resources then guest
1709          * cannot use them;
1710          * For software breakpoint QEMU uses a privileged instruction;
1711          * So there cannot be any reason that we are here for guest
1712          * set debug exception, only possibility is guest executed a
1713          * privileged / illegal instruction and that's why we are
1714          * injecting a program interrupt.
1715          */
1716
1717         cpu_synchronize_state(cs);
1718         /* env->nip is PC, so increment this by 4 to use
1719          * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1720          */
1721         env->nip += 4;
1722         cs->exception_index = POWERPC_EXCP_PROGRAM;
1723         env->error_code = POWERPC_EXCP_INVAL;
1724         ppc_cpu_do_interrupt(cs);
1725     }
1726
1727     return handle;
1728 }
1729
1730 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1731 {
1732     PowerPCCPU *cpu = POWERPC_CPU(cs);
1733     CPUPPCState *env = &cpu->env;
1734     int ret;
1735
1736     qemu_mutex_lock_iothread();
1737
1738     switch (run->exit_reason) {
1739     case KVM_EXIT_DCR:
1740         if (run->dcr.is_write) {
1741             DPRINTF("handle dcr write\n");
1742             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1743         } else {
1744             DPRINTF("handle dcr read\n");
1745             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1746         }
1747         break;
1748     case KVM_EXIT_HLT:
1749         DPRINTF("handle halt\n");
1750         ret = kvmppc_handle_halt(cpu);
1751         break;
1752 #if defined(TARGET_PPC64)
1753     case KVM_EXIT_PAPR_HCALL:
1754         DPRINTF("handle PAPR hypercall\n");
1755         run->papr_hcall.ret = spapr_hypercall(cpu,
1756                                               run->papr_hcall.nr,
1757                                               run->papr_hcall.args);
1758         ret = 0;
1759         break;
1760 #endif
1761     case KVM_EXIT_EPR:
1762         DPRINTF("handle epr\n");
1763         run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1764         ret = 0;
1765         break;
1766     case KVM_EXIT_WATCHDOG:
1767         DPRINTF("handle watchdog expiry\n");
1768         watchdog_perform_action();
1769         ret = 0;
1770         break;
1771
1772     case KVM_EXIT_DEBUG:
1773         DPRINTF("handle debug exception\n");
1774         if (kvm_handle_debug(cpu, run)) {
1775             ret = EXCP_DEBUG;
1776             break;
1777         }
1778         /* re-enter, this exception was guest-internal */
1779         ret = 0;
1780         break;
1781
1782     default:
1783         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1784         ret = -1;
1785         break;
1786     }
1787
1788     qemu_mutex_unlock_iothread();
1789     return ret;
1790 }
1791
1792 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1793 {
1794     CPUState *cs = CPU(cpu);
1795     uint32_t bits = tsr_bits;
1796     struct kvm_one_reg reg = {
1797         .id = KVM_REG_PPC_OR_TSR,
1798         .addr = (uintptr_t) &bits,
1799     };
1800
1801     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1802 }
1803
1804 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1805 {
1806
1807     CPUState *cs = CPU(cpu);
1808     uint32_t bits = tsr_bits;
1809     struct kvm_one_reg reg = {
1810         .id = KVM_REG_PPC_CLEAR_TSR,
1811         .addr = (uintptr_t) &bits,
1812     };
1813
1814     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1815 }
1816
1817 int kvmppc_set_tcr(PowerPCCPU *cpu)
1818 {
1819     CPUState *cs = CPU(cpu);
1820     CPUPPCState *env = &cpu->env;
1821     uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1822
1823     struct kvm_one_reg reg = {
1824         .id = KVM_REG_PPC_TCR,
1825         .addr = (uintptr_t) &tcr,
1826     };
1827
1828     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1829 }
1830
1831 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1832 {
1833     CPUState *cs = CPU(cpu);
1834     int ret;
1835
1836     if (!kvm_enabled()) {
1837         return -1;
1838     }
1839
1840     if (!cap_ppc_watchdog) {
1841         printf("warning: KVM does not support watchdog");
1842         return -1;
1843     }
1844
1845     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1846     if (ret < 0) {
1847         fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1848                 __func__, strerror(-ret));
1849         return ret;
1850     }
1851
1852     return ret;
1853 }
1854
1855 static int read_cpuinfo(const char *field, char *value, int len)
1856 {
1857     FILE *f;
1858     int ret = -1;
1859     int field_len = strlen(field);
1860     char line[512];
1861
1862     f = fopen("/proc/cpuinfo", "r");
1863     if (!f) {
1864         return -1;
1865     }
1866
1867     do {
1868         if (!fgets(line, sizeof(line), f)) {
1869             break;
1870         }
1871         if (!strncmp(line, field, field_len)) {
1872             pstrcpy(value, len, line);
1873             ret = 0;
1874             break;
1875         }
1876     } while(*line);
1877
1878     fclose(f);
1879
1880     return ret;
1881 }
1882
1883 uint32_t kvmppc_get_tbfreq(void)
1884 {
1885     char line[512];
1886     char *ns;
1887     uint32_t retval = NANOSECONDS_PER_SECOND;
1888
1889     if (read_cpuinfo("timebase", line, sizeof(line))) {
1890         return retval;
1891     }
1892
1893     if (!(ns = strchr(line, ':'))) {
1894         return retval;
1895     }
1896
1897     ns++;
1898
1899     return atoi(ns);
1900 }
1901
1902 bool kvmppc_get_host_serial(char **value)
1903 {
1904     return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1905                                NULL);
1906 }
1907
1908 bool kvmppc_get_host_model(char **value)
1909 {
1910     return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1911 }
1912
1913 /* Try to find a device tree node for a CPU with clock-frequency property */
1914 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1915 {
1916     struct dirent *dirp;
1917     DIR *dp;
1918
1919     if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1920         printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1921         return -1;
1922     }
1923
1924     buf[0] = '\0';
1925     while ((dirp = readdir(dp)) != NULL) {
1926         FILE *f;
1927         snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1928                  dirp->d_name);
1929         f = fopen(buf, "r");
1930         if (f) {
1931             snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1932             fclose(f);
1933             break;
1934         }
1935         buf[0] = '\0';
1936     }
1937     closedir(dp);
1938     if (buf[0] == '\0') {
1939         printf("Unknown host!\n");
1940         return -1;
1941     }
1942
1943     return 0;
1944 }
1945
1946 static uint64_t kvmppc_read_int_dt(const char *filename)
1947 {
1948     union {
1949         uint32_t v32;
1950         uint64_t v64;
1951     } u;
1952     FILE *f;
1953     int len;
1954
1955     f = fopen(filename, "rb");
1956     if (!f) {
1957         return -1;
1958     }
1959
1960     len = fread(&u, 1, sizeof(u), f);
1961     fclose(f);
1962     switch (len) {
1963     case 4:
1964         /* property is a 32-bit quantity */
1965         return be32_to_cpu(u.v32);
1966     case 8:
1967         return be64_to_cpu(u.v64);
1968     }
1969
1970     return 0;
1971 }
1972
1973 /* Read a CPU node property from the host device tree that's a single
1974  * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1975  * (can't find or open the property, or doesn't understand the
1976  * format) */
1977 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1978 {
1979     char buf[PATH_MAX], *tmp;
1980     uint64_t val;
1981
1982     if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1983         return -1;
1984     }
1985
1986     tmp = g_strdup_printf("%s/%s", buf, propname);
1987     val = kvmppc_read_int_dt(tmp);
1988     g_free(tmp);
1989
1990     return val;
1991 }
1992
1993 uint64_t kvmppc_get_clockfreq(void)
1994 {
1995     return kvmppc_read_int_cpu_dt("clock-frequency");
1996 }
1997
1998 uint32_t kvmppc_get_vmx(void)
1999 {
2000     return kvmppc_read_int_cpu_dt("ibm,vmx");
2001 }
2002
2003 uint32_t kvmppc_get_dfp(void)
2004 {
2005     return kvmppc_read_int_cpu_dt("ibm,dfp");
2006 }
2007
2008 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
2009  {
2010      PowerPCCPU *cpu = ppc_env_get_cpu(env);
2011      CPUState *cs = CPU(cpu);
2012
2013     if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
2014         !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
2015         return 0;
2016     }
2017
2018     return 1;
2019 }
2020
2021 int kvmppc_get_hasidle(CPUPPCState *env)
2022 {
2023     struct kvm_ppc_pvinfo pvinfo;
2024
2025     if (!kvmppc_get_pvinfo(env, &pvinfo) &&
2026         (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
2027         return 1;
2028     }
2029
2030     return 0;
2031 }
2032
2033 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
2034 {
2035     uint32_t *hc = (uint32_t*)buf;
2036     struct kvm_ppc_pvinfo pvinfo;
2037
2038     if (!kvmppc_get_pvinfo(env, &pvinfo)) {
2039         memcpy(buf, pvinfo.hcall, buf_len);
2040         return 0;
2041     }
2042
2043     /*
2044      * Fallback to always fail hypercalls regardless of endianness:
2045      *
2046      *     tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
2047      *     li r3, -1
2048      *     b .+8       (becomes nop in wrong endian)
2049      *     bswap32(li r3, -1)
2050      */
2051
2052     hc[0] = cpu_to_be32(0x08000048);
2053     hc[1] = cpu_to_be32(0x3860ffff);
2054     hc[2] = cpu_to_be32(0x48000008);
2055     hc[3] = cpu_to_be32(bswap32(0x3860ffff));
2056
2057     return 1;
2058 }
2059
2060 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
2061 {
2062     return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2063 }
2064
2065 void kvmppc_enable_logical_ci_hcalls(void)
2066 {
2067     /*
2068      * FIXME: it would be nice if we could detect the cases where
2069      * we're using a device which requires the in kernel
2070      * implementation of these hcalls, but the kernel lacks them and
2071      * produce a warning.
2072      */
2073     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2074     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2075 }
2076
2077 void kvmppc_enable_set_mode_hcall(void)
2078 {
2079     kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2080 }
2081
2082 void kvmppc_enable_clear_ref_mod_hcalls(void)
2083 {
2084     kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
2085     kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
2086 }
2087
2088 void kvmppc_set_papr(PowerPCCPU *cpu)
2089 {
2090     CPUState *cs = CPU(cpu);
2091     int ret;
2092
2093     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2094     if (ret) {
2095         error_report("This vCPU type or KVM version does not support PAPR");
2096         exit(1);
2097     }
2098
2099     /* Update the capability flag so we sync the right information
2100      * with kvm */
2101     cap_papr = 1;
2102 }
2103
2104 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr)
2105 {
2106     return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr);
2107 }
2108
2109 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2110 {
2111     CPUState *cs = CPU(cpu);
2112     int ret;
2113
2114     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2115     if (ret && mpic_proxy) {
2116         error_report("This KVM version does not support EPR");
2117         exit(1);
2118     }
2119 }
2120
2121 int kvmppc_smt_threads(void)
2122 {
2123     return cap_ppc_smt ? cap_ppc_smt : 1;
2124 }
2125
2126 #ifdef TARGET_PPC64
2127 off_t kvmppc_alloc_rma(void **rma)
2128 {
2129     off_t size;
2130     int fd;
2131     struct kvm_allocate_rma ret;
2132
2133     /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2134      * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2135      *                      not necessary on this hardware
2136      * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2137      *
2138      * FIXME: We should allow the user to force contiguous RMA
2139      * allocation in the cap_ppc_rma==1 case.
2140      */
2141     if (cap_ppc_rma < 2) {
2142         return 0;
2143     }
2144
2145     fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
2146     if (fd < 0) {
2147         fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2148                 strerror(errno));
2149         return -1;
2150     }
2151
2152     size = MIN(ret.rma_size, 256ul << 20);
2153
2154     *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2155     if (*rma == MAP_FAILED) {
2156         fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
2157         return -1;
2158     };
2159
2160     return size;
2161 }
2162
2163 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2164 {
2165     struct kvm_ppc_smmu_info info;
2166     long rampagesize, best_page_shift;
2167     int i;
2168
2169     if (cap_ppc_rma >= 2) {
2170         return current_size;
2171     }
2172
2173     /* Find the largest hardware supported page size that's less than
2174      * or equal to the (logical) backing page size of guest RAM */
2175     kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2176     rampagesize = qemu_getrampagesize();
2177     best_page_shift = 0;
2178
2179     for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2180         struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2181
2182         if (!sps->page_shift) {
2183             continue;
2184         }
2185
2186         if ((sps->page_shift > best_page_shift)
2187             && ((1UL << sps->page_shift) <= rampagesize)) {
2188             best_page_shift = sps->page_shift;
2189         }
2190     }
2191
2192     return MIN(current_size,
2193                1ULL << (best_page_shift + hash_shift - 7));
2194 }
2195 #endif
2196
2197 bool kvmppc_spapr_use_multitce(void)
2198 {
2199     return cap_spapr_multitce;
2200 }
2201
2202 int kvmppc_spapr_enable_inkernel_multitce(void)
2203 {
2204     int ret;
2205
2206     ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2207                             H_PUT_TCE_INDIRECT, 1);
2208     if (!ret) {
2209         ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2210                                 H_STUFF_TCE, 1);
2211     }
2212
2213     return ret;
2214 }
2215
2216 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
2217                               uint64_t bus_offset, uint32_t nb_table,
2218                               int *pfd, bool need_vfio)
2219 {
2220     long len;
2221     int fd;
2222     void *table;
2223
2224     /* Must set fd to -1 so we don't try to munmap when called for
2225      * destroying the table, which the upper layers -will- do
2226      */
2227     *pfd = -1;
2228     if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2229         return NULL;
2230     }
2231
2232     if (cap_spapr_tce_64) {
2233         struct kvm_create_spapr_tce_64 args = {
2234             .liobn = liobn,
2235             .page_shift = page_shift,
2236             .offset = bus_offset >> page_shift,
2237             .size = nb_table,
2238             .flags = 0
2239         };
2240         fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args);
2241         if (fd < 0) {
2242             fprintf(stderr,
2243                     "KVM: Failed to create TCE64 table for liobn 0x%x\n",
2244                     liobn);
2245             return NULL;
2246         }
2247     } else if (cap_spapr_tce) {
2248         uint64_t window_size = (uint64_t) nb_table << page_shift;
2249         struct kvm_create_spapr_tce args = {
2250             .liobn = liobn,
2251             .window_size = window_size,
2252         };
2253         if ((window_size != args.window_size) || bus_offset) {
2254             return NULL;
2255         }
2256         fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2257         if (fd < 0) {
2258             fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2259                     liobn);
2260             return NULL;
2261         }
2262     } else {
2263         return NULL;
2264     }
2265
2266     len = nb_table * sizeof(uint64_t);
2267     /* FIXME: round this up to page size */
2268
2269     table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2270     if (table == MAP_FAILED) {
2271         fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2272                 liobn);
2273         close(fd);
2274         return NULL;
2275     }
2276
2277     *pfd = fd;
2278     return table;
2279 }
2280
2281 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2282 {
2283     long len;
2284
2285     if (fd < 0) {
2286         return -1;
2287     }
2288
2289     len = nb_table * sizeof(uint64_t);
2290     if ((munmap(table, len) < 0) ||
2291         (close(fd) < 0)) {
2292         fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2293                 strerror(errno));
2294         /* Leak the table */
2295     }
2296
2297     return 0;
2298 }
2299
2300 int kvmppc_reset_htab(int shift_hint)
2301 {
2302     uint32_t shift = shift_hint;
2303
2304     if (!kvm_enabled()) {
2305         /* Full emulation, tell caller to allocate htab itself */
2306         return 0;
2307     }
2308     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2309         int ret;
2310         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2311         if (ret == -ENOTTY) {
2312             /* At least some versions of PR KVM advertise the
2313              * capability, but don't implement the ioctl().  Oops.
2314              * Return 0 so that we allocate the htab in qemu, as is
2315              * correct for PR. */
2316             return 0;
2317         } else if (ret < 0) {
2318             return ret;
2319         }
2320         return shift;
2321     }
2322
2323     /* We have a kernel that predates the htab reset calls.  For PR
2324      * KVM, we need to allocate the htab ourselves, for an HV KVM of
2325      * this era, it has allocated a 16MB fixed size hash table already. */
2326     if (kvmppc_is_pr(kvm_state)) {
2327         /* PR - tell caller to allocate htab */
2328         return 0;
2329     } else {
2330         /* HV - assume 16MB kernel allocated htab */
2331         return 24;
2332     }
2333 }
2334
2335 static inline uint32_t mfpvr(void)
2336 {
2337     uint32_t pvr;
2338
2339     asm ("mfpvr %0"
2340          : "=r"(pvr));
2341     return pvr;
2342 }
2343
2344 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2345 {
2346     if (on) {
2347         *word |= flags;
2348     } else {
2349         *word &= ~flags;
2350     }
2351 }
2352
2353 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2354 {
2355     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2356     uint32_t vmx = kvmppc_get_vmx();
2357     uint32_t dfp = kvmppc_get_dfp();
2358     uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2359     uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2360
2361     /* Now fix up the class with information we can query from the host */
2362     pcc->pvr = mfpvr();
2363
2364     if (vmx != -1) {
2365         /* Only override when we know what the host supports */
2366         alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
2367         alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
2368     }
2369     if (dfp != -1) {
2370         /* Only override when we know what the host supports */
2371         alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
2372     }
2373
2374     if (dcache_size != -1) {
2375         pcc->l1_dcache_size = dcache_size;
2376     }
2377
2378     if (icache_size != -1) {
2379         pcc->l1_icache_size = icache_size;
2380     }
2381
2382 #if defined(TARGET_PPC64)
2383     pcc->radix_page_info = kvm_get_radix_page_info();
2384
2385     if ((pcc->pvr & 0xffffff00) == CPU_POWERPC_POWER9_DD1) {
2386         /*
2387          * POWER9 DD1 has some bugs which make it not really ISA 3.00
2388          * compliant.  More importantly, advertising ISA 3.00
2389          * architected mode may prevent guests from activating
2390          * necessary DD1 workarounds.
2391          */
2392         pcc->pcr_supported &= ~(PCR_COMPAT_3_00 | PCR_COMPAT_2_07
2393                                 | PCR_COMPAT_2_06 | PCR_COMPAT_2_05);
2394     }
2395 #endif /* defined(TARGET_PPC64) */
2396 }
2397
2398 bool kvmppc_has_cap_epr(void)
2399 {
2400     return cap_epr;
2401 }
2402
2403 bool kvmppc_has_cap_htab_fd(void)
2404 {
2405     return cap_htab_fd;
2406 }
2407
2408 bool kvmppc_has_cap_fixup_hcalls(void)
2409 {
2410     return cap_fixup_hcalls;
2411 }
2412
2413 bool kvmppc_has_cap_htm(void)
2414 {
2415     return cap_htm;
2416 }
2417
2418 bool kvmppc_has_cap_mmu_radix(void)
2419 {
2420     return cap_mmu_radix;
2421 }
2422
2423 bool kvmppc_has_cap_mmu_hash_v3(void)
2424 {
2425     return cap_mmu_hash_v3;
2426 }
2427
2428 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2429 {
2430     uint32_t host_pvr = mfpvr();
2431     PowerPCCPUClass *pvr_pcc;
2432
2433     pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2434     if (pvr_pcc == NULL) {
2435         pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2436     }
2437
2438     return pvr_pcc;
2439 }
2440
2441 static int kvm_ppc_register_host_cpu_type(void)
2442 {
2443     TypeInfo type_info = {
2444         .name = TYPE_HOST_POWERPC_CPU,
2445         .class_init = kvmppc_host_cpu_class_init,
2446     };
2447     PowerPCCPUClass *pvr_pcc;
2448     DeviceClass *dc;
2449     int i;
2450
2451     pvr_pcc = kvm_ppc_get_host_cpu_class();
2452     if (pvr_pcc == NULL) {
2453         return -1;
2454     }
2455     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2456     type_register(&type_info);
2457
2458 #if defined(TARGET_PPC64)
2459     type_info.name = g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE, "host");
2460     type_info.parent = TYPE_SPAPR_CPU_CORE,
2461     type_info.instance_size = sizeof(sPAPRCPUCore);
2462     type_info.instance_init = NULL;
2463     type_info.class_init = spapr_cpu_core_class_init;
2464     type_info.class_data = (void *) "host";
2465     type_register(&type_info);
2466     g_free((void *)type_info.name);
2467 #endif
2468
2469     /*
2470      * Update generic CPU family class alias (e.g. on a POWER8NVL host,
2471      * we want "POWER8" to be a "family" alias that points to the current
2472      * host CPU type, too)
2473      */
2474     dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc));
2475     for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) {
2476         if (strcmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) {
2477             ObjectClass *oc = OBJECT_CLASS(pvr_pcc);
2478             char *suffix;
2479
2480             ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc));
2481             suffix = strstr(ppc_cpu_aliases[i].model, "-"TYPE_POWERPC_CPU);
2482             if (suffix) {
2483                 *suffix = 0;
2484             }
2485             ppc_cpu_aliases[i].oc = oc;
2486             break;
2487         }
2488     }
2489
2490     return 0;
2491 }
2492
2493 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2494 {
2495     struct kvm_rtas_token_args args = {
2496         .token = token,
2497     };
2498
2499     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2500         return -ENOENT;
2501     }
2502
2503     strncpy(args.name, function, sizeof(args.name));
2504
2505     return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2506 }
2507
2508 int kvmppc_get_htab_fd(bool write)
2509 {
2510     struct kvm_get_htab_fd s = {
2511         .flags = write ? KVM_GET_HTAB_WRITE : 0,
2512         .start_index = 0,
2513     };
2514
2515     if (!cap_htab_fd) {
2516         fprintf(stderr, "KVM version doesn't support saving the hash table\n");
2517         return -1;
2518     }
2519
2520     return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2521 }
2522
2523 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2524 {
2525     int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2526     uint8_t buf[bufsize];
2527     ssize_t rc;
2528
2529     do {
2530         rc = read(fd, buf, bufsize);
2531         if (rc < 0) {
2532             fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2533                     strerror(errno));
2534             return rc;
2535         } else if (rc) {
2536             uint8_t *buffer = buf;
2537             ssize_t n = rc;
2538             while (n) {
2539                 struct kvm_get_htab_header *head =
2540                     (struct kvm_get_htab_header *) buffer;
2541                 size_t chunksize = sizeof(*head) +
2542                      HASH_PTE_SIZE_64 * head->n_valid;
2543
2544                 qemu_put_be32(f, head->index);
2545                 qemu_put_be16(f, head->n_valid);
2546                 qemu_put_be16(f, head->n_invalid);
2547                 qemu_put_buffer(f, (void *)(head + 1),
2548                                 HASH_PTE_SIZE_64 * head->n_valid);
2549
2550                 buffer += chunksize;
2551                 n -= chunksize;
2552             }
2553         }
2554     } while ((rc != 0)
2555              && ((max_ns < 0)
2556                  || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2557
2558     return (rc == 0) ? 1 : 0;
2559 }
2560
2561 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2562                            uint16_t n_valid, uint16_t n_invalid)
2563 {
2564     struct kvm_get_htab_header *buf;
2565     size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2566     ssize_t rc;
2567
2568     buf = alloca(chunksize);
2569     buf->index = index;
2570     buf->n_valid = n_valid;
2571     buf->n_invalid = n_invalid;
2572
2573     qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2574
2575     rc = write(fd, buf, chunksize);
2576     if (rc < 0) {
2577         fprintf(stderr, "Error writing KVM hash table: %s\n",
2578                 strerror(errno));
2579         return rc;
2580     }
2581     if (rc != chunksize) {
2582         /* We should never get a short write on a single chunk */
2583         fprintf(stderr, "Short write, restoring KVM hash table\n");
2584         return -1;
2585     }
2586     return 0;
2587 }
2588
2589 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2590 {
2591     return true;
2592 }
2593
2594 void kvm_arch_init_irq_routing(KVMState *s)
2595 {
2596 }
2597
2598 void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n)
2599 {
2600     struct kvm_get_htab_fd ghf = {
2601         .flags = 0,
2602         .start_index = ptex,
2603     };
2604     int fd, rc;
2605     int i;
2606
2607     fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2608     if (fd < 0) {
2609         hw_error("kvmppc_read_hptes: Unable to open HPT fd");
2610     }
2611
2612     i = 0;
2613     while (i < n) {
2614         struct kvm_get_htab_header *hdr;
2615         int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP;
2616         char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64];
2617
2618         rc = read(fd, buf, sizeof(buf));
2619         if (rc < 0) {
2620             hw_error("kvmppc_read_hptes: Unable to read HPTEs");
2621         }
2622
2623         hdr = (struct kvm_get_htab_header *)buf;
2624         while ((i < n) && ((char *)hdr < (buf + rc))) {
2625             int invalid = hdr->n_invalid;
2626
2627             if (hdr->index != (ptex + i)) {
2628                 hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32
2629                          " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i);
2630             }
2631
2632             memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * hdr->n_valid);
2633             i += hdr->n_valid;
2634
2635             if ((n - i) < invalid) {
2636                 invalid = n - i;
2637             }
2638             memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64);
2639             i += hdr->n_invalid;
2640
2641             hdr = (struct kvm_get_htab_header *)
2642                 ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid);
2643         }
2644     }
2645
2646     close(fd);
2647 }
2648
2649 void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1)
2650 {
2651     int fd, rc;
2652     struct kvm_get_htab_fd ghf;
2653     struct {
2654         struct kvm_get_htab_header hdr;
2655         uint64_t pte0;
2656         uint64_t pte1;
2657     } buf;
2658
2659     ghf.flags = 0;
2660     ghf.start_index = 0;     /* Ignored */
2661     fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2662     if (fd < 0) {
2663         hw_error("kvmppc_write_hpte: Unable to open HPT fd");
2664     }
2665
2666     buf.hdr.n_valid = 1;
2667     buf.hdr.n_invalid = 0;
2668     buf.hdr.index = ptex;
2669     buf.pte0 = cpu_to_be64(pte0);
2670     buf.pte1 = cpu_to_be64(pte1);
2671
2672     rc = write(fd, &buf, sizeof(buf));
2673     if (rc != sizeof(buf)) {
2674         hw_error("kvmppc_write_hpte: Unable to update KVM HPT");
2675     }
2676     close(fd);
2677 }
2678
2679 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2680                              uint64_t address, uint32_t data, PCIDevice *dev)
2681 {
2682     return 0;
2683 }
2684
2685 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2686                                 int vector, PCIDevice *dev)
2687 {
2688     return 0;
2689 }
2690
2691 int kvm_arch_release_virq_post(int virq)
2692 {
2693     return 0;
2694 }
2695
2696 int kvm_arch_msi_data_to_gsi(uint32_t data)
2697 {
2698     return data & 0xffff;
2699 }
2700
2701 int kvmppc_enable_hwrng(void)
2702 {
2703     if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2704         return -1;
2705     }
2706
2707     return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2708 }