target/ppc/kvm.c

   1 /*
   2  * PowerPC implementation of KVM hooks
   3  *
   4  * Copyright IBM Corp. 2007
   5  * Copyright (C) 2011 Freescale Semiconductor, Inc.
   6  *
   7  * Authors:
   8  *  Jerone Young <jyoung5@us.ibm.com>
   9  *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
  10  *  Hollis Blanchard <hollisb@us.ibm.com>
  11  *
  12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  13  * See the COPYING file in the top-level directory.
  14  *
  15  */
  16
  17 #include "qemu/osdep.h"
  18 #include <dirent.h>
  19 #include <sys/ioctl.h>
  20 #include <sys/vfs.h>
  21
  22 #include <linux/kvm.h>
  23
  24 #include "qemu-common.h"
  25 #include "qemu/error-report.h"
  26 #include "cpu.h"
  27 #include "cpu-models.h"
  28 #include "qemu/timer.h"
  29 #include "sysemu/sysemu.h"
  30 #include "sysemu/hw_accel.h"
  31 #include "kvm_ppc.h"
  32 #include "sysemu/cpus.h"
  33 #include "sysemu/device_tree.h"
  34 #include "mmu-hash64.h"
  35
  36 #include "hw/sysbus.h"
  37 #include "hw/ppc/spapr.h"
  38 #include "hw/ppc/spapr_vio.h"
  39 #include "hw/ppc/spapr_cpu_core.h"
  40 #include "hw/ppc/ppc.h"
  41 #include "sysemu/watchdog.h"
  42 #include "trace.h"
  43 #include "exec/gdbstub.h"
  44 #include "exec/memattrs.h"
  45 #include "exec/ram_addr.h"
  46 #include "sysemu/hostmem.h"
  47 #include "qemu/cutils.h"
  48 #include "qemu/mmap-alloc.h"
  49 #if defined(TARGET_PPC64)
  50 #include "hw/ppc/spapr_cpu_core.h"
  51 #endif
  52 #include "elf.h"
  53 #include "sysemu/kvm_int.h"
  54
  55 //#define DEBUG_KVM
  56
  57 #ifdef DEBUG_KVM
  58 #define DPRINTF(fmt, ...) \
  59     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
  60 #else
  61 #define DPRINTF(fmt, ...) \
  62     do { } while (0)
  63 #endif
  64
  65 #define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
  66
  67 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
  68     KVM_CAP_LAST_INFO
  69 };
  70
  71 static int cap_interrupt_unset = false;
  72 static int cap_interrupt_level = false;
  73 static int cap_segstate;
  74 static int cap_booke_sregs;
  75 static int cap_ppc_smt;
  76 static int cap_ppc_rma;
  77 static int cap_spapr_tce;
  78 static int cap_spapr_tce_64;
  79 static int cap_spapr_multitce;
  80 static int cap_spapr_vfio;
  81 static int cap_hior;
  82 static int cap_one_reg;
  83 static int cap_epr;
  84 static int cap_ppc_watchdog;
  85 static int cap_papr;
  86 static int cap_htab_fd;
  87 static int cap_fixup_hcalls;
  88 static int cap_htm;             /* Hardware transactional memory support */
  89 static int cap_mmu_radix;
  90 static int cap_mmu_hash_v3;
  91
  92 static uint32_t debug_inst_opcode;
  93
  94 /* XXX We have a race condition where we actually have a level triggered
  95  *     interrupt, but the infrastructure can't expose that yet, so the guest
  96  *     takes but ignores it, goes to sleep and never gets notified that there's
  97  *     still an interrupt pending.
  98  *
  99  *     As a quick workaround, let's just wake up again 20 ms after we injected
 100  *     an interrupt. That way we can assure that we're always reinjecting
 101  *     interrupts in case the guest swallowed them.
 102  */
 103 static QEMUTimer *idle_timer;
 104
 105 static void kvm_kick_cpu(void *opaque)
 106 {
 107     PowerPCCPU *cpu = opaque;
 108
 109     qemu_cpu_kick(CPU(cpu));
 110 }
 111
 112 /* Check whether we are running with KVM-PR (instead of KVM-HV).  This
 113  * should only be used for fallback tests - generally we should use
 114  * explicit capabilities for the features we want, rather than
 115  * assuming what is/isn't available depending on the KVM variant. */
 116 static bool kvmppc_is_pr(KVMState *ks)
 117 {
 118     /* Assume KVM-PR if the GET_PVINFO capability is available */
 119     return kvm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0;
 120 }
 121
 122 static int kvm_ppc_register_host_cpu_type(void);
 123
 124 int kvm_arch_init(MachineState *ms, KVMState *s)
 125 {
 126     cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
 127     cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
 128     cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
 129     cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
 130     cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
 131     cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
 132     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
 133     cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64);
 134     cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
 135     cap_spapr_vfio = false;
 136     cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
 137     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
 138     cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
 139     cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
 140     /* Note: we don't set cap_papr here, because this capability is
 141      * only activated after this by kvmppc_set_papr() */
 142     cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
 143     cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
 144     cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
 145     cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX);
 146     cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3);
 147
 148     if (!cap_interrupt_level) {
 149         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
 150                         "VM to stall at times!\n");
 151     }
 152
 153     kvm_ppc_register_host_cpu_type();
 154
 155     return 0;
 156 }
 157
 158 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
 159 {
 160     return 0;
 161 }
 162
 163 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
 164 {
 165     CPUPPCState *cenv = &cpu->env;
 166     CPUState *cs = CPU(cpu);
 167     struct kvm_sregs sregs;
 168     int ret;
 169
 170     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 171         /* What we're really trying to say is "if we're on BookE, we use
 172            the native PVR for now". This is the only sane way to check
 173            it though, so we potentially confuse users that they can run
 174            BookE guests on BookS. Let's hope nobody dares enough :) */
 175         return 0;
 176     } else {
 177         if (!cap_segstate) {
 178             fprintf(stderr, "kvm error: missing PVR setting capability\n");
 179             return -ENOSYS;
 180         }
 181     }
 182
 183     ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
 184     if (ret) {
 185         return ret;
 186     }
 187
 188     sregs.pvr = cenv->spr[SPR_PVR];
 189     return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
 190 }
 191
 192 /* Set up a shared TLB array with KVM */
 193 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
 194 {
 195     CPUPPCState *env = &cpu->env;
 196     CPUState *cs = CPU(cpu);
 197     struct kvm_book3e_206_tlb_params params = {};
 198     struct kvm_config_tlb cfg = {};
 199     unsigned int entries = 0;
 200     int ret, i;
 201
 202     if (!kvm_enabled() ||
 203         !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
 204         return 0;
 205     }
 206
 207     assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
 208
 209     for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
 210         params.tlb_sizes[i] = booke206_tlb_size(env, i);
 211         params.tlb_ways[i] = booke206_tlb_ways(env, i);
 212         entries += params.tlb_sizes[i];
 213     }
 214
 215     assert(entries == env->nb_tlb);
 216     assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
 217
 218     env->tlb_dirty = true;
 219
 220     cfg.array = (uintptr_t)env->tlb.tlbm;
 221     cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
 222     cfg.params = (uintptr_t)&params;
 223     cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
 224
 225     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
 226     if (ret < 0) {
 227         fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
 228                 __func__, strerror(-ret));
 229         return ret;
 230     }
 231
 232     env->kvm_sw_tlb = true;
 233     return 0;
 234 }
 235
 236
 237 #if defined(TARGET_PPC64)
 238 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
 239                                        struct kvm_ppc_smmu_info *info)
 240 {
 241     CPUPPCState *env = &cpu->env;
 242     CPUState *cs = CPU(cpu);
 243
 244     memset(info, 0, sizeof(*info));
 245
 246     /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
 247      * need to "guess" what the supported page sizes are.
 248      *
 249      * For that to work we make a few assumptions:
 250      *
 251      * - Check whether we are running "PR" KVM which only supports 4K
 252      *   and 16M pages, but supports them regardless of the backing
 253      *   store characteritics. We also don't support 1T segments.
 254      *
 255      *   This is safe as if HV KVM ever supports that capability or PR
 256      *   KVM grows supports for more page/segment sizes, those versions
 257      *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
 258      *   will not hit this fallback
 259      *
 260      * - Else we are running HV KVM. This means we only support page
 261      *   sizes that fit in the backing store. Additionally we only
 262      *   advertize 64K pages if the processor is ARCH 2.06 and we assume
 263      *   P7 encodings for the SLB and hash table. Here too, we assume
 264      *   support for any newer processor will mean a kernel that
 265      *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
 266      *   this fallback.
 267      */
 268     if (kvmppc_is_pr(cs->kvm_state)) {
 269         /* No flags */
 270         info->flags = 0;
 271         info->slb_size = 64;
 272
 273         /* Standard 4k base page size segment */
 274         info->sps[0].page_shift = 12;
 275         info->sps[0].slb_enc = 0;
 276         info->sps[0].enc[0].page_shift = 12;
 277         info->sps[0].enc[0].pte_enc = 0;
 278
 279         /* Standard 16M large page size segment */
 280         info->sps[1].page_shift = 24;
 281         info->sps[1].slb_enc = SLB_VSID_L;
 282         info->sps[1].enc[0].page_shift = 24;
 283         info->sps[1].enc[0].pte_enc = 0;
 284     } else {
 285         int i = 0;
 286
 287         /* HV KVM has backing store size restrictions */
 288         info->flags = KVM_PPC_PAGE_SIZES_REAL;
 289
 290         if (env->mmu_model & POWERPC_MMU_1TSEG) {
 291             info->flags |= KVM_PPC_1T_SEGMENTS;
 292         }
 293
 294         if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 ||
 295            POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) {
 296             info->slb_size = 32;
 297         } else {
 298             info->slb_size = 64;
 299         }
 300
 301         /* Standard 4k base page size segment */
 302         info->sps[i].page_shift = 12;
 303         info->sps[i].slb_enc = 0;
 304         info->sps[i].enc[0].page_shift = 12;
 305         info->sps[i].enc[0].pte_enc = 0;
 306         i++;
 307
 308         /* 64K on MMU 2.06 and later */
 309         if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 ||
 310             POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) {
 311             info->sps[i].page_shift = 16;
 312             info->sps[i].slb_enc = 0x110;
 313             info->sps[i].enc[0].page_shift = 16;
 314             info->sps[i].enc[0].pte_enc = 1;
 315             i++;
 316         }
 317
 318         /* Standard 16M large page size segment */
 319         info->sps[i].page_shift = 24;
 320         info->sps[i].slb_enc = SLB_VSID_L;
 321         info->sps[i].enc[0].page_shift = 24;
 322         info->sps[i].enc[0].pte_enc = 0;
 323     }
 324 }
 325
 326 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
 327 {
 328     CPUState *cs = CPU(cpu);
 329     int ret;
 330
 331     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
 332         ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
 333         if (ret == 0) {
 334             return;
 335         }
 336     }
 337
 338     kvm_get_fallback_smmu_info(cpu, info);
 339 }
 340
 341 struct ppc_radix_page_info *kvm_get_radix_page_info(void)
 342 {
 343     KVMState *s = KVM_STATE(current_machine->accelerator);
 344     struct ppc_radix_page_info *radix_page_info;
 345     struct kvm_ppc_rmmu_info rmmu_info;
 346     int i;
 347
 348     if (!kvm_check_extension(s, KVM_CAP_PPC_MMU_RADIX)) {
 349         return NULL;
 350     }
 351     if (kvm_vm_ioctl(s, KVM_PPC_GET_RMMU_INFO, &rmmu_info)) {
 352         return NULL;
 353     }
 354     radix_page_info = g_malloc0(sizeof(*radix_page_info));
 355     radix_page_info->count = 0;
 356     for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) {
 357         if (rmmu_info.ap_encodings[i]) {
 358             radix_page_info->entries[i] = rmmu_info.ap_encodings[i];
 359             radix_page_info->count++;
 360         }
 361     }
 362     return radix_page_info;
 363 }
 364
 365 target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu,
 366                                      bool radix, bool gtse,
 367                                      uint64_t proc_tbl)
 368 {
 369     CPUState *cs = CPU(cpu);
 370     int ret;
 371     uint64_t flags = 0;
 372     struct kvm_ppc_mmuv3_cfg cfg = {
 373         .process_table = proc_tbl,
 374     };
 375
 376     if (radix) {
 377         flags |= KVM_PPC_MMUV3_RADIX;
 378     }
 379     if (gtse) {
 380         flags |= KVM_PPC_MMUV3_GTSE;
 381     }
 382     cfg.flags = flags;
 383     ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_CONFIGURE_V3_MMU, &cfg);
 384     switch (ret) {
 385     case 0:
 386         return H_SUCCESS;
 387     case -EINVAL:
 388         return H_PARAMETER;
 389     case -ENODEV:
 390         return H_NOT_AVAILABLE;
 391     default:
 392         return H_HARDWARE;
 393     }
 394 }
 395
 396 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
 397 {
 398     if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
 399         return true;
 400     }
 401
 402     return (1ul << shift) <= rampgsize;
 403 }
 404
 405 static long max_cpu_page_size;
 406
 407 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 408 {
 409     static struct kvm_ppc_smmu_info smmu_info;
 410     static bool has_smmu_info;
 411     CPUPPCState *env = &cpu->env;
 412     int iq, ik, jq, jk;
 413     bool has_64k_pages = false;
 414
 415     /* We only handle page sizes for 64-bit server guests for now */
 416     if (!(env->mmu_model & POWERPC_MMU_64)) {
 417         return;
 418     }
 419
 420     /* Collect MMU info from kernel if not already */
 421     if (!has_smmu_info) {
 422         kvm_get_smmu_info(cpu, &smmu_info);
 423         has_smmu_info = true;
 424     }
 425
 426     if (!max_cpu_page_size) {
 427         max_cpu_page_size = qemu_getrampagesize();
 428     }
 429
 430     /* Convert to QEMU form */
 431     memset(&env->sps, 0, sizeof(env->sps));
 432
 433     /* If we have HV KVM, we need to forbid CI large pages if our
 434      * host page size is smaller than 64K.
 435      */
 436     if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
 437         env->ci_large_pages = getpagesize() >= 0x10000;
 438     }
 439
 440     /*
 441      * XXX This loop should be an entry wide AND of the capabilities that
 442      *     the selected CPU has with the capabilities that KVM supports.
 443      */
 444     for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
 445         struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
 446         struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
 447
 448         if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
 449                                  ksps->page_shift)) {
 450             continue;
 451         }
 452         qsps->page_shift = ksps->page_shift;
 453         qsps->slb_enc = ksps->slb_enc;
 454         for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
 455             if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
 456                                      ksps->enc[jk].page_shift)) {
 457                 continue;
 458             }
 459             if (ksps->enc[jk].page_shift == 16) {
 460                 has_64k_pages = true;
 461             }
 462             qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
 463             qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
 464             if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
 465                 break;
 466             }
 467         }
 468         if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
 469             break;
 470         }
 471     }
 472     env->slb_nr = smmu_info.slb_size;
 473     if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
 474         env->mmu_model &= ~POWERPC_MMU_1TSEG;
 475     }
 476     if (!has_64k_pages) {
 477         env->mmu_model &= ~POWERPC_MMU_64K;
 478     }
 479 }
 480
 481 bool kvmppc_is_mem_backend_page_size_ok(char *obj_path)
 482 {
 483     Object *mem_obj = object_resolve_path(obj_path, NULL);
 484     char *mempath = object_property_get_str(mem_obj, "mem-path", NULL);
 485     long pagesize;
 486
 487     if (mempath) {
 488         pagesize = qemu_mempath_getpagesize(mempath);
 489     } else {
 490         pagesize = getpagesize();
 491     }
 492
 493     return pagesize >= max_cpu_page_size;
 494 }
 495
 496 #else /* defined (TARGET_PPC64) */
 497
 498 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 499 {
 500 }
 501
 502 bool kvmppc_is_mem_backend_page_size_ok(char *obj_path)
 503 {
 504     return true;
 505 }
 506
 507 #endif /* !defined (TARGET_PPC64) */
 508
 509 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
 510 {
 511     return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
 512 }
 513
 514 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
 515  * book3s supports only 1 watchpoint, so array size
 516  * of 4 is sufficient for now.
 517  */
 518 #define MAX_HW_BKPTS 4
 519
 520 static struct HWBreakpoint {
 521     target_ulong addr;
 522     int type;
 523 } hw_debug_points[MAX_HW_BKPTS];
 524
 525 static CPUWatchpoint hw_watchpoint;
 526
 527 /* Default there is no breakpoint and watchpoint supported */
 528 static int max_hw_breakpoint;
 529 static int max_hw_watchpoint;
 530 static int nb_hw_breakpoint;
 531 static int nb_hw_watchpoint;
 532
 533 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
 534 {
 535     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 536         max_hw_breakpoint = 2;
 537         max_hw_watchpoint = 2;
 538     }
 539
 540     if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
 541         fprintf(stderr, "Error initializing h/w breakpoints\n");
 542         return;
 543     }
 544 }
 545
 546 int kvm_arch_init_vcpu(CPUState *cs)
 547 {
 548     PowerPCCPU *cpu = POWERPC_CPU(cs);
 549     CPUPPCState *cenv = &cpu->env;
 550     int ret;
 551
 552     /* Gather server mmu info from KVM and update the CPU state */
 553     kvm_fixup_page_sizes(cpu);
 554
 555     /* Synchronize sregs with kvm */
 556     ret = kvm_arch_sync_sregs(cpu);
 557     if (ret) {
 558         if (ret == -EINVAL) {
 559             error_report("Register sync failed... If you're using kvm-hv.ko,"
 560                          " only \"-cpu host\" is possible");
 561         }
 562         return ret;
 563     }
 564
 565     idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
 566
 567     switch (cenv->mmu_model) {
 568     case POWERPC_MMU_BOOKE206:
 569         /* This target supports access to KVM's guest TLB */
 570         ret = kvm_booke206_tlb_init(cpu);
 571         break;
 572     case POWERPC_MMU_2_07:
 573         if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) {
 574             /* KVM-HV has transactional memory on POWER8 also without the
 575              * KVM_CAP_PPC_HTM extension, so enable it here instead as
 576              * long as it's availble to userspace on the host. */
 577             if (qemu_getauxval(AT_HWCAP2) & PPC_FEATURE2_HAS_HTM) {
 578                 cap_htm = true;
 579             }
 580         }
 581         break;
 582     default:
 583         break;
 584     }
 585
 586     kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
 587     kvmppc_hw_debug_points_init(cenv);
 588
 589     return ret;
 590 }
 591
 592 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
 593 {
 594     CPUPPCState *env = &cpu->env;
 595     CPUState *cs = CPU(cpu);
 596     struct kvm_dirty_tlb dirty_tlb;
 597     unsigned char *bitmap;
 598     int ret;
 599
 600     if (!env->kvm_sw_tlb) {
 601         return;
 602     }
 603
 604     bitmap = g_malloc((env->nb_tlb + 7) / 8);
 605     memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
 606
 607     dirty_tlb.bitmap = (uintptr_t)bitmap;
 608     dirty_tlb.num_dirty = env->nb_tlb;
 609
 610     ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
 611     if (ret) {
 612         fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
 613                 __func__, strerror(-ret));
 614     }
 615
 616     g_free(bitmap);
 617 }
 618
 619 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
 620 {
 621     PowerPCCPU *cpu = POWERPC_CPU(cs);
 622     CPUPPCState *env = &cpu->env;
 623     union {
 624         uint32_t u32;
 625         uint64_t u64;
 626     } val;
 627     struct kvm_one_reg reg = {
 628         .id = id,
 629         .addr = (uintptr_t) &val,
 630     };
 631     int ret;
 632
 633     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 634     if (ret != 0) {
 635         trace_kvm_failed_spr_get(spr, strerror(errno));
 636     } else {
 637         switch (id & KVM_REG_SIZE_MASK) {
 638         case KVM_REG_SIZE_U32:
 639             env->spr[spr] = val.u32;
 640             break;
 641
 642         case KVM_REG_SIZE_U64:
 643             env->spr[spr] = val.u64;
 644             break;
 645
 646         default:
 647             /* Don't handle this size yet */
 648             abort();
 649         }
 650     }
 651 }
 652
 653 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
 654 {
 655     PowerPCCPU *cpu = POWERPC_CPU(cs);
 656     CPUPPCState *env = &cpu->env;
 657     union {
 658         uint32_t u32;
 659         uint64_t u64;
 660     } val;
 661     struct kvm_one_reg reg = {
 662         .id = id,
 663         .addr = (uintptr_t) &val,
 664     };
 665     int ret;
 666
 667     switch (id & KVM_REG_SIZE_MASK) {
 668     case KVM_REG_SIZE_U32:
 669         val.u32 = env->spr[spr];
 670         break;
 671
 672     case KVM_REG_SIZE_U64:
 673         val.u64 = env->spr[spr];
 674         break;
 675
 676     default:
 677         /* Don't handle this size yet */
 678         abort();
 679     }
 680
 681     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 682     if (ret != 0) {
 683         trace_kvm_failed_spr_set(spr, strerror(errno));
 684     }
 685 }
 686
 687 static int kvm_put_fp(CPUState *cs)
 688 {
 689     PowerPCCPU *cpu = POWERPC_CPU(cs);
 690     CPUPPCState *env = &cpu->env;
 691     struct kvm_one_reg reg;
 692     int i;
 693     int ret;
 694
 695     if (env->insns_flags & PPC_FLOAT) {
 696         uint64_t fpscr = env->fpscr;
 697         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 698
 699         reg.id = KVM_REG_PPC_FPSCR;
 700         reg.addr = (uintptr_t)&fpscr;
 701         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 702         if (ret < 0) {
 703             DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
 704             return ret;
 705         }
 706
 707         for (i = 0; i < 32; i++) {
 708             uint64_t vsr[2];
 709
 710 #ifdef HOST_WORDS_BIGENDIAN
 711             vsr[0] = float64_val(env->fpr[i]);
 712             vsr[1] = env->vsr[i];
 713 #else
 714             vsr[0] = env->vsr[i];
 715             vsr[1] = float64_val(env->fpr[i]);
 716 #endif
 717             reg.addr = (uintptr_t) &vsr;
 718             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 719
 720             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 721             if (ret < 0) {
 722                 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
 723                         i, strerror(errno));
 724                 return ret;
 725             }
 726         }
 727     }
 728
 729     if (env->insns_flags & PPC_ALTIVEC) {
 730         reg.id = KVM_REG_PPC_VSCR;
 731         reg.addr = (uintptr_t)&env->vscr;
 732         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 733         if (ret < 0) {
 734             DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
 735             return ret;
 736         }
 737
 738         for (i = 0; i < 32; i++) {
 739             reg.id = KVM_REG_PPC_VR(i);
 740             reg.addr = (uintptr_t)&env->avr[i];
 741             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 742             if (ret < 0) {
 743                 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
 744                 return ret;
 745             }
 746         }
 747     }
 748
 749     return 0;
 750 }
 751
 752 static int kvm_get_fp(CPUState *cs)
 753 {
 754     PowerPCCPU *cpu = POWERPC_CPU(cs);
 755     CPUPPCState *env = &cpu->env;
 756     struct kvm_one_reg reg;
 757     int i;
 758     int ret;
 759
 760     if (env->insns_flags & PPC_FLOAT) {
 761         uint64_t fpscr;
 762         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 763
 764         reg.id = KVM_REG_PPC_FPSCR;
 765         reg.addr = (uintptr_t)&fpscr;
 766         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 767         if (ret < 0) {
 768             DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
 769             return ret;
 770         } else {
 771             env->fpscr = fpscr;
 772         }
 773
 774         for (i = 0; i < 32; i++) {
 775             uint64_t vsr[2];
 776
 777             reg.addr = (uintptr_t) &vsr;
 778             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 779
 780             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 781             if (ret < 0) {
 782                 DPRINTF("Unable to get %s%d from KVM: %s\n",
 783                         vsx ? "VSR" : "FPR", i, strerror(errno));
 784                 return ret;
 785             } else {
 786 #ifdef HOST_WORDS_BIGENDIAN
 787                 env->fpr[i] = vsr[0];
 788                 if (vsx) {
 789                     env->vsr[i] = vsr[1];
 790                 }
 791 #else
 792                 env->fpr[i] = vsr[1];
 793                 if (vsx) {
 794                     env->vsr[i] = vsr[0];
 795                 }
 796 #endif
 797             }
 798         }
 799     }
 800
 801     if (env->insns_flags & PPC_ALTIVEC) {
 802         reg.id = KVM_REG_PPC_VSCR;
 803         reg.addr = (uintptr_t)&env->vscr;
 804         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 805         if (ret < 0) {
 806             DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
 807             return ret;
 808         }
 809
 810         for (i = 0; i < 32; i++) {
 811             reg.id = KVM_REG_PPC_VR(i);
 812             reg.addr = (uintptr_t)&env->avr[i];
 813             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 814             if (ret < 0) {
 815                 DPRINTF("Unable to get VR%d from KVM: %s\n",
 816                         i, strerror(errno));
 817                 return ret;
 818             }
 819         }
 820     }
 821
 822     return 0;
 823 }
 824
 825 #if defined(TARGET_PPC64)
 826 static int kvm_get_vpa(CPUState *cs)
 827 {
 828     PowerPCCPU *cpu = POWERPC_CPU(cs);
 829     CPUPPCState *env = &cpu->env;
 830     struct kvm_one_reg reg;
 831     int ret;
 832
 833     reg.id = KVM_REG_PPC_VPA_ADDR;
 834     reg.addr = (uintptr_t)&env->vpa_addr;
 835     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 836     if (ret < 0) {
 837         DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
 838         return ret;
 839     }
 840
 841     assert((uintptr_t)&env->slb_shadow_size
 842            == ((uintptr_t)&env->slb_shadow_addr + 8));
 843     reg.id = KVM_REG_PPC_VPA_SLB;
 844     reg.addr = (uintptr_t)&env->slb_shadow_addr;
 845     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 846     if (ret < 0) {
 847         DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
 848                 strerror(errno));
 849         return ret;
 850     }
 851
 852     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 853     reg.id = KVM_REG_PPC_VPA_DTL;
 854     reg.addr = (uintptr_t)&env->dtl_addr;
 855     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 856     if (ret < 0) {
 857         DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
 858                 strerror(errno));
 859         return ret;
 860     }
 861
 862     return 0;
 863 }
 864
 865 static int kvm_put_vpa(CPUState *cs)
 866 {
 867     PowerPCCPU *cpu = POWERPC_CPU(cs);
 868     CPUPPCState *env = &cpu->env;
 869     struct kvm_one_reg reg;
 870     int ret;
 871
 872     /* SLB shadow or DTL can't be registered unless a master VPA is
 873      * registered.  That means when restoring state, if a VPA *is*
 874      * registered, we need to set that up first.  If not, we need to
 875      * deregister the others before deregistering the master VPA */
 876     assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
 877
 878     if (env->vpa_addr) {
 879         reg.id = KVM_REG_PPC_VPA_ADDR;
 880         reg.addr = (uintptr_t)&env->vpa_addr;
 881         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 882         if (ret < 0) {
 883             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 884             return ret;
 885         }
 886     }
 887
 888     assert((uintptr_t)&env->slb_shadow_size
 889            == ((uintptr_t)&env->slb_shadow_addr + 8));
 890     reg.id = KVM_REG_PPC_VPA_SLB;
 891     reg.addr = (uintptr_t)&env->slb_shadow_addr;
 892     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 893     if (ret < 0) {
 894         DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
 895         return ret;
 896     }
 897
 898     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 899     reg.id = KVM_REG_PPC_VPA_DTL;
 900     reg.addr = (uintptr_t)&env->dtl_addr;
 901     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 902     if (ret < 0) {
 903         DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
 904                 strerror(errno));
 905         return ret;
 906     }
 907
 908     if (!env->vpa_addr) {
 909         reg.id = KVM_REG_PPC_VPA_ADDR;
 910         reg.addr = (uintptr_t)&env->vpa_addr;
 911         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 912         if (ret < 0) {
 913             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 914             return ret;
 915         }
 916     }
 917
 918     return 0;
 919 }
 920 #endif /* TARGET_PPC64 */
 921
 922 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
 923 {
 924     CPUPPCState *env = &cpu->env;
 925     struct kvm_sregs sregs;
 926     int i;
 927
 928     sregs.pvr = env->spr[SPR_PVR];
 929
 930     sregs.u.s.sdr1 = env->spr[SPR_SDR1];
 931
 932     /* Sync SLB */
 933 #ifdef TARGET_PPC64
 934     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
 935         sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
 936         if (env->slb[i].esid & SLB_ESID_V) {
 937             sregs.u.s.ppc64.slb[i].slbe |= i;
 938         }
 939         sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
 940     }
 941 #endif
 942
 943     /* Sync SRs */
 944     for (i = 0; i < 16; i++) {
 945         sregs.u.s.ppc32.sr[i] = env->sr[i];
 946     }
 947
 948     /* Sync BATs */
 949     for (i = 0; i < 8; i++) {
 950         /* Beware. We have to swap upper and lower bits here */
 951         sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
 952             | env->DBAT[1][i];
 953         sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
 954             | env->IBAT[1][i];
 955     }
 956
 957     return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
 958 }
 959
 960 int kvm_arch_put_registers(CPUState *cs, int level)
 961 {
 962     PowerPCCPU *cpu = POWERPC_CPU(cs);
 963     CPUPPCState *env = &cpu->env;
 964     struct kvm_regs regs;
 965     int ret;
 966     int i;
 967
 968     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
 969     if (ret < 0) {
 970         return ret;
 971     }
 972
 973     regs.ctr = env->ctr;
 974     regs.lr  = env->lr;
 975     regs.xer = cpu_read_xer(env);
 976     regs.msr = env->msr;
 977     regs.pc = env->nip;
 978
 979     regs.srr0 = env->spr[SPR_SRR0];
 980     regs.srr1 = env->spr[SPR_SRR1];
 981
 982     regs.sprg0 = env->spr[SPR_SPRG0];
 983     regs.sprg1 = env->spr[SPR_SPRG1];
 984     regs.sprg2 = env->spr[SPR_SPRG2];
 985     regs.sprg3 = env->spr[SPR_SPRG3];
 986     regs.sprg4 = env->spr[SPR_SPRG4];
 987     regs.sprg5 = env->spr[SPR_SPRG5];
 988     regs.sprg6 = env->spr[SPR_SPRG6];
 989     regs.sprg7 = env->spr[SPR_SPRG7];
 990
 991     regs.pid = env->spr[SPR_BOOKE_PID];
 992
 993     for (i = 0;i < 32; i++)
 994         regs.gpr[i] = env->gpr[i];
 995
 996     regs.cr = 0;
 997     for (i = 0; i < 8; i++) {
 998         regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
 999     }
1000
1001     ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
1002     if (ret < 0)
1003         return ret;
1004
1005     kvm_put_fp(cs);
1006
1007     if (env->tlb_dirty) {
1008         kvm_sw_tlb_put(cpu);
1009         env->tlb_dirty = false;
1010     }
1011
1012     if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
1013         ret = kvmppc_put_books_sregs(cpu);
1014         if (ret < 0) {
1015             return ret;
1016         }
1017     }
1018
1019     if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
1020         kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1021     }
1022
1023     if (cap_one_reg) {
1024         int i;
1025
1026         /* We deliberately ignore errors here, for kernels which have
1027          * the ONE_REG calls, but don't support the specific
1028          * registers, there's a reasonable chance things will still
1029          * work, at least until we try to migrate. */
1030         for (i = 0; i < 1024; i++) {
1031             uint64_t id = env->spr_cb[i].one_reg_id;
1032
1033             if (id != 0) {
1034                 kvm_put_one_spr(cs, id, i);
1035             }
1036         }
1037
1038 #ifdef TARGET_PPC64
1039         if (msr_ts) {
1040             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1041                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1042             }
1043             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1044                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1045             }
1046             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1047             kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1048             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1049             kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1050             kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1051             kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1052             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1053             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1054             kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1055             kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1056         }
1057
1058         if (cap_papr) {
1059             if (kvm_put_vpa(cs) < 0) {
1060                 DPRINTF("Warning: Unable to set VPA information to KVM\n");
1061             }
1062         }
1063
1064         kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1065 #endif /* TARGET_PPC64 */
1066     }
1067
1068     return ret;
1069 }
1070
1071 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1072 {
1073      env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1074 }
1075
1076 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1077 {
1078     CPUPPCState *env = &cpu->env;
1079     struct kvm_sregs sregs;
1080     int ret;
1081
1082     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1083     if (ret < 0) {
1084         return ret;
1085     }
1086
1087     if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1088         env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1089         env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1090         env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1091         env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1092         env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1093         env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1094         env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1095         env->spr[SPR_DECR] = sregs.u.e.dec;
1096         env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1097         env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1098         env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1099     }
1100
1101     if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1102         env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1103         env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1104         env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1105         env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1106         env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1107     }
1108
1109     if (sregs.u.e.features & KVM_SREGS_E_64) {
1110         env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1111     }
1112
1113     if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1114         env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1115     }
1116
1117     if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1118         env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1119         kvm_sync_excp(env, POWERPC_EXCP_CRITICAL,  SPR_BOOKE_IVOR0);
1120         env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1121         kvm_sync_excp(env, POWERPC_EXCP_MCHECK,  SPR_BOOKE_IVOR1);
1122         env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1123         kvm_sync_excp(env, POWERPC_EXCP_DSI,  SPR_BOOKE_IVOR2);
1124         env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1125         kvm_sync_excp(env, POWERPC_EXCP_ISI,  SPR_BOOKE_IVOR3);
1126         env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1127         kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL,  SPR_BOOKE_IVOR4);
1128         env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1129         kvm_sync_excp(env, POWERPC_EXCP_ALIGN,  SPR_BOOKE_IVOR5);
1130         env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1131         kvm_sync_excp(env, POWERPC_EXCP_PROGRAM,  SPR_BOOKE_IVOR6);
1132         env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1133         kvm_sync_excp(env, POWERPC_EXCP_FPU,  SPR_BOOKE_IVOR7);
1134         env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1135         kvm_sync_excp(env, POWERPC_EXCP_SYSCALL,  SPR_BOOKE_IVOR8);
1136         env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1137         kvm_sync_excp(env, POWERPC_EXCP_APU,  SPR_BOOKE_IVOR9);
1138         env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1139         kvm_sync_excp(env, POWERPC_EXCP_DECR,  SPR_BOOKE_IVOR10);
1140         env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1141         kvm_sync_excp(env, POWERPC_EXCP_FIT,  SPR_BOOKE_IVOR11);
1142         env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1143         kvm_sync_excp(env, POWERPC_EXCP_WDT,  SPR_BOOKE_IVOR12);
1144         env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1145         kvm_sync_excp(env, POWERPC_EXCP_DTLB,  SPR_BOOKE_IVOR13);
1146         env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1147         kvm_sync_excp(env, POWERPC_EXCP_ITLB,  SPR_BOOKE_IVOR14);
1148         env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1149         kvm_sync_excp(env, POWERPC_EXCP_DEBUG,  SPR_BOOKE_IVOR15);
1150
1151         if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1152             env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1153             kvm_sync_excp(env, POWERPC_EXCP_SPEU,  SPR_BOOKE_IVOR32);
1154             env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1155             kvm_sync_excp(env, POWERPC_EXCP_EFPDI,  SPR_BOOKE_IVOR33);
1156             env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1157             kvm_sync_excp(env, POWERPC_EXCP_EFPRI,  SPR_BOOKE_IVOR34);
1158         }
1159
1160         if (sregs.u.e.features & KVM_SREGS_E_PM) {
1161             env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1162             kvm_sync_excp(env, POWERPC_EXCP_EPERFM,  SPR_BOOKE_IVOR35);
1163         }
1164
1165         if (sregs.u.e.features & KVM_SREGS_E_PC) {
1166             env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1167             kvm_sync_excp(env, POWERPC_EXCP_DOORI,  SPR_BOOKE_IVOR36);
1168             env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1169             kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1170         }
1171     }
1172
1173     if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1174         env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1175         env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1176         env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1177         env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1178         env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1179         env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1180         env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1181         env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1182         env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1183         env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1184     }
1185
1186     if (sregs.u.e.features & KVM_SREGS_EXP) {
1187         env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1188     }
1189
1190     if (sregs.u.e.features & KVM_SREGS_E_PD) {
1191         env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1192         env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1193     }
1194
1195     if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1196         env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1197         env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1198         env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1199
1200         if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1201             env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1202             env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1203         }
1204     }
1205
1206     return 0;
1207 }
1208
1209 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1210 {
1211     CPUPPCState *env = &cpu->env;
1212     struct kvm_sregs sregs;
1213     int ret;
1214     int i;
1215
1216     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1217     if (ret < 0) {
1218         return ret;
1219     }
1220
1221     if (!cpu->vhyp) {
1222         ppc_store_sdr1(env, sregs.u.s.sdr1);
1223     }
1224
1225     /* Sync SLB */
1226 #ifdef TARGET_PPC64
1227     /*
1228      * The packed SLB array we get from KVM_GET_SREGS only contains
1229      * information about valid entries. So we flush our internal copy
1230      * to get rid of stale ones, then put all valid SLB entries back
1231      * in.
1232      */
1233     memset(env->slb, 0, sizeof(env->slb));
1234     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1235         target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1236         target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1237         /*
1238          * Only restore valid entries
1239          */
1240         if (rb & SLB_ESID_V) {
1241             ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1242         }
1243     }
1244 #endif
1245
1246     /* Sync SRs */
1247     for (i = 0; i < 16; i++) {
1248         env->sr[i] = sregs.u.s.ppc32.sr[i];
1249     }
1250
1251     /* Sync BATs */
1252     for (i = 0; i < 8; i++) {
1253         env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1254         env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1255         env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1256         env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1257     }
1258
1259     return 0;
1260 }
1261
1262 int kvm_arch_get_registers(CPUState *cs)
1263 {
1264     PowerPCCPU *cpu = POWERPC_CPU(cs);
1265     CPUPPCState *env = &cpu->env;
1266     struct kvm_regs regs;
1267     uint32_t cr;
1268     int i, ret;
1269
1270     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1271     if (ret < 0)
1272         return ret;
1273
1274     cr = regs.cr;
1275     for (i = 7; i >= 0; i--) {
1276         env->crf[i] = cr & 15;
1277         cr >>= 4;
1278     }
1279
1280     env->ctr = regs.ctr;
1281     env->lr = regs.lr;
1282     cpu_write_xer(env, regs.xer);
1283     env->msr = regs.msr;
1284     env->nip = regs.pc;
1285
1286     env->spr[SPR_SRR0] = regs.srr0;
1287     env->spr[SPR_SRR1] = regs.srr1;
1288
1289     env->spr[SPR_SPRG0] = regs.sprg0;
1290     env->spr[SPR_SPRG1] = regs.sprg1;
1291     env->spr[SPR_SPRG2] = regs.sprg2;
1292     env->spr[SPR_SPRG3] = regs.sprg3;
1293     env->spr[SPR_SPRG4] = regs.sprg4;
1294     env->spr[SPR_SPRG5] = regs.sprg5;
1295     env->spr[SPR_SPRG6] = regs.sprg6;
1296     env->spr[SPR_SPRG7] = regs.sprg7;
1297
1298     env->spr[SPR_BOOKE_PID] = regs.pid;
1299
1300     for (i = 0;i < 32; i++)
1301         env->gpr[i] = regs.gpr[i];
1302
1303     kvm_get_fp(cs);
1304
1305     if (cap_booke_sregs) {
1306         ret = kvmppc_get_booke_sregs(cpu);
1307         if (ret < 0) {
1308             return ret;
1309         }
1310     }
1311
1312     if (cap_segstate) {
1313         ret = kvmppc_get_books_sregs(cpu);
1314         if (ret < 0) {
1315             return ret;
1316         }
1317     }
1318
1319     if (cap_hior) {
1320         kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1321     }
1322
1323     if (cap_one_reg) {
1324         int i;
1325
1326         /* We deliberately ignore errors here, for kernels which have
1327          * the ONE_REG calls, but don't support the specific
1328          * registers, there's a reasonable chance things will still
1329          * work, at least until we try to migrate. */
1330         for (i = 0; i < 1024; i++) {
1331             uint64_t id = env->spr_cb[i].one_reg_id;
1332
1333             if (id != 0) {
1334                 kvm_get_one_spr(cs, id, i);
1335             }
1336         }
1337
1338 #ifdef TARGET_PPC64
1339         if (msr_ts) {
1340             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1341                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1342             }
1343             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1344                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1345             }
1346             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1347             kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1348             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1349             kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1350             kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1351             kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1352             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1353             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1354             kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1355             kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1356         }
1357
1358         if (cap_papr) {
1359             if (kvm_get_vpa(cs) < 0) {
1360                 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1361             }
1362         }
1363
1364         kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1365 #endif
1366     }
1367
1368     return 0;
1369 }
1370
1371 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1372 {
1373     unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1374
1375     if (irq != PPC_INTERRUPT_EXT) {
1376         return 0;
1377     }
1378
1379     if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1380         return 0;
1381     }
1382
1383     kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1384
1385     return 0;
1386 }
1387
1388 #if defined(TARGET_PPCEMB)
1389 #define PPC_INPUT_INT PPC40x_INPUT_INT
1390 #elif defined(TARGET_PPC64)
1391 #define PPC_INPUT_INT PPC970_INPUT_INT
1392 #else
1393 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1394 #endif
1395
1396 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1397 {
1398     PowerPCCPU *cpu = POWERPC_CPU(cs);
1399     CPUPPCState *env = &cpu->env;
1400     int r;
1401     unsigned irq;
1402
1403     qemu_mutex_lock_iothread();
1404
1405     /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1406      * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1407     if (!cap_interrupt_level &&
1408         run->ready_for_interrupt_injection &&
1409         (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1410         (env->irq_input_state & (1<<PPC_INPUT_INT)))
1411     {
1412         /* For now KVM disregards the 'irq' argument. However, in the
1413          * future KVM could cache it in-kernel to avoid a heavyweight exit
1414          * when reading the UIC.
1415          */
1416         irq = KVM_INTERRUPT_SET;
1417
1418         DPRINTF("injected interrupt %d\n", irq);
1419         r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1420         if (r < 0) {
1421             printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1422         }
1423
1424         /* Always wake up soon in case the interrupt was level based */
1425         timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1426                        (NANOSECONDS_PER_SECOND / 50));
1427     }
1428
1429     /* We don't know if there are more interrupts pending after this. However,
1430      * the guest will return to userspace in the course of handling this one
1431      * anyways, so we will get a chance to deliver the rest. */
1432
1433     qemu_mutex_unlock_iothread();
1434 }
1435
1436 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1437 {
1438     return MEMTXATTRS_UNSPECIFIED;
1439 }
1440
1441 int kvm_arch_process_async_events(CPUState *cs)
1442 {
1443     return cs->halted;
1444 }
1445
1446 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1447 {
1448     CPUState *cs = CPU(cpu);
1449     CPUPPCState *env = &cpu->env;
1450
1451     if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1452         cs->halted = 1;
1453         cs->exception_index = EXCP_HLT;
1454     }
1455
1456     return 0;
1457 }
1458
1459 /* map dcr access to existing qemu dcr emulation */
1460 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1461 {
1462     if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1463         fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1464
1465     return 0;
1466 }
1467
1468 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1469 {
1470     if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1471         fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1472
1473     return 0;
1474 }
1475
1476 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1477 {
1478     /* Mixed endian case is not handled */
1479     uint32_t sc = debug_inst_opcode;
1480
1481     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1482                             sizeof(sc), 0) ||
1483         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1484         return -EINVAL;
1485     }
1486
1487     return 0;
1488 }
1489
1490 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1491 {
1492     uint32_t sc;
1493
1494     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1495         sc != debug_inst_opcode ||
1496         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1497                             sizeof(sc), 1)) {
1498         return -EINVAL;
1499     }
1500
1501     return 0;
1502 }
1503
1504 static int find_hw_breakpoint(target_ulong addr, int type)
1505 {
1506     int n;
1507
1508     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1509            <= ARRAY_SIZE(hw_debug_points));
1510
1511     for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1512         if (hw_debug_points[n].addr == addr &&
1513              hw_debug_points[n].type == type) {
1514             return n;
1515         }
1516     }
1517
1518     return -1;
1519 }
1520
1521 static int find_hw_watchpoint(target_ulong addr, int *flag)
1522 {
1523     int n;
1524
1525     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1526     if (n >= 0) {
1527         *flag = BP_MEM_ACCESS;
1528         return n;
1529     }
1530
1531     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1532     if (n >= 0) {
1533         *flag = BP_MEM_WRITE;
1534         return n;
1535     }
1536
1537     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1538     if (n >= 0) {
1539         *flag = BP_MEM_READ;
1540         return n;
1541     }
1542
1543     return -1;
1544 }
1545
1546 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1547                                   target_ulong len, int type)
1548 {
1549     if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1550         return -ENOBUFS;
1551     }
1552
1553     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1554     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1555
1556     switch (type) {
1557     case GDB_BREAKPOINT_HW:
1558         if (nb_hw_breakpoint >= max_hw_breakpoint) {
1559             return -ENOBUFS;
1560         }
1561
1562         if (find_hw_breakpoint(addr, type) >= 0) {
1563             return -EEXIST;
1564         }
1565
1566         nb_hw_breakpoint++;
1567         break;
1568
1569     case GDB_WATCHPOINT_WRITE:
1570     case GDB_WATCHPOINT_READ:
1571     case GDB_WATCHPOINT_ACCESS:
1572         if (nb_hw_watchpoint >= max_hw_watchpoint) {
1573             return -ENOBUFS;
1574         }
1575
1576         if (find_hw_breakpoint(addr, type) >= 0) {
1577             return -EEXIST;
1578         }
1579
1580         nb_hw_watchpoint++;
1581         break;
1582
1583     default:
1584         return -ENOSYS;
1585     }
1586
1587     return 0;
1588 }
1589
1590 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1591                                   target_ulong len, int type)
1592 {
1593     int n;
1594
1595     n = find_hw_breakpoint(addr, type);
1596     if (n < 0) {
1597         return -ENOENT;
1598     }
1599
1600     switch (type) {
1601     case GDB_BREAKPOINT_HW:
1602         nb_hw_breakpoint--;
1603         break;
1604
1605     case GDB_WATCHPOINT_WRITE:
1606     case GDB_WATCHPOINT_READ:
1607     case GDB_WATCHPOINT_ACCESS:
1608         nb_hw_watchpoint--;
1609         break;
1610
1611     default:
1612         return -ENOSYS;
1613     }
1614     hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1615
1616     return 0;
1617 }
1618
1619 void kvm_arch_remove_all_hw_breakpoints(void)
1620 {
1621     nb_hw_breakpoint = nb_hw_watchpoint = 0;
1622 }
1623
1624 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1625 {
1626     int n;
1627
1628     /* Software Breakpoint updates */
1629     if (kvm_sw_breakpoints_active(cs)) {
1630         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1631     }
1632
1633     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1634            <= ARRAY_SIZE(hw_debug_points));
1635     assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1636
1637     if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1638         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1639         memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1640         for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1641             switch (hw_debug_points[n].type) {
1642             case GDB_BREAKPOINT_HW:
1643                 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1644                 break;
1645             case GDB_WATCHPOINT_WRITE:
1646                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1647                 break;
1648             case GDB_WATCHPOINT_READ:
1649                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1650                 break;
1651             case GDB_WATCHPOINT_ACCESS:
1652                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1653                                         KVMPPC_DEBUG_WATCH_READ;
1654                 break;
1655             default:
1656                 cpu_abort(cs, "Unsupported breakpoint type\n");
1657             }
1658             dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1659         }
1660     }
1661 }
1662
1663 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1664 {
1665     CPUState *cs = CPU(cpu);
1666     CPUPPCState *env = &cpu->env;
1667     struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1668     int handle = 0;
1669     int n;
1670     int flag = 0;
1671
1672     if (cs->singlestep_enabled) {
1673         handle = 1;
1674     } else if (arch_info->status) {
1675         if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1676             if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1677                 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1678                 if (n >= 0) {
1679                     handle = 1;
1680                 }
1681             } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1682                                             KVMPPC_DEBUG_WATCH_WRITE)) {
1683                 n = find_hw_watchpoint(arch_info->address,  &flag);
1684                 if (n >= 0) {
1685                     handle = 1;
1686                     cs->watchpoint_hit = &hw_watchpoint;
1687                     hw_watchpoint.vaddr = hw_debug_points[n].addr;
1688                     hw_watchpoint.flags = flag;
1689                 }
1690             }
1691         }
1692     } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1693         handle = 1;
1694     } else {
1695         /* QEMU is not able to handle debug exception, so inject
1696          * program exception to guest;
1697          * Yes program exception NOT debug exception !!
1698          * When QEMU is using debug resources then debug exception must
1699          * be always set. To achieve this we set MSR_DE and also set
1700          * MSRP_DEP so guest cannot change MSR_DE.
1701          * When emulating debug resource for guest we want guest
1702          * to control MSR_DE (enable/disable debug interrupt on need).
1703          * Supporting both configurations are NOT possible.
1704          * So the result is that we cannot share debug resources
1705          * between QEMU and Guest on BOOKE architecture.
1706          * In the current design QEMU gets the priority over guest,
1707          * this means that if QEMU is using debug resources then guest
1708          * cannot use them;
1709          * For software breakpoint QEMU uses a privileged instruction;
1710          * So there cannot be any reason that we are here for guest
1711          * set debug exception, only possibility is guest executed a
1712          * privileged / illegal instruction and that's why we are
1713          * injecting a program interrupt.
1714          */
1715
1716         cpu_synchronize_state(cs);
1717         /* env->nip is PC, so increment this by 4 to use
1718          * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1719          */
1720         env->nip += 4;
1721         cs->exception_index = POWERPC_EXCP_PROGRAM;
1722         env->error_code = POWERPC_EXCP_INVAL;
1723         ppc_cpu_do_interrupt(cs);
1724     }
1725
1726     return handle;
1727 }
1728
1729 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1730 {
1731     PowerPCCPU *cpu = POWERPC_CPU(cs);
1732     CPUPPCState *env = &cpu->env;
1733     int ret;
1734
1735     qemu_mutex_lock_iothread();
1736
1737     switch (run->exit_reason) {
1738     case KVM_EXIT_DCR:
1739         if (run->dcr.is_write) {
1740             DPRINTF("handle dcr write\n");
1741             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1742         } else {
1743             DPRINTF("handle dcr read\n");
1744             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1745         }
1746         break;
1747     case KVM_EXIT_HLT:
1748         DPRINTF("handle halt\n");
1749         ret = kvmppc_handle_halt(cpu);
1750         break;
1751 #if defined(TARGET_PPC64)
1752     case KVM_EXIT_PAPR_HCALL:
1753         DPRINTF("handle PAPR hypercall\n");
1754         run->papr_hcall.ret = spapr_hypercall(cpu,
1755                                               run->papr_hcall.nr,
1756                                               run->papr_hcall.args);
1757         ret = 0;
1758         break;
1759 #endif
1760     case KVM_EXIT_EPR:
1761         DPRINTF("handle epr\n");
1762         run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1763         ret = 0;
1764         break;
1765     case KVM_EXIT_WATCHDOG:
1766         DPRINTF("handle watchdog expiry\n");
1767         watchdog_perform_action();
1768         ret = 0;
1769         break;
1770
1771     case KVM_EXIT_DEBUG:
1772         DPRINTF("handle debug exception\n");
1773         if (kvm_handle_debug(cpu, run)) {
1774             ret = EXCP_DEBUG;
1775             break;
1776         }
1777         /* re-enter, this exception was guest-internal */
1778         ret = 0;
1779         break;
1780
1781     default:
1782         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1783         ret = -1;
1784         break;
1785     }
1786
1787     qemu_mutex_unlock_iothread();
1788     return ret;
1789 }
1790
1791 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1792 {
1793     CPUState *cs = CPU(cpu);
1794     uint32_t bits = tsr_bits;
1795     struct kvm_one_reg reg = {
1796         .id = KVM_REG_PPC_OR_TSR,
1797         .addr = (uintptr_t) &bits,
1798     };
1799
1800     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1801 }
1802
1803 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1804 {
1805
1806     CPUState *cs = CPU(cpu);
1807     uint32_t bits = tsr_bits;
1808     struct kvm_one_reg reg = {
1809         .id = KVM_REG_PPC_CLEAR_TSR,
1810         .addr = (uintptr_t) &bits,
1811     };
1812
1813     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1814 }
1815
1816 int kvmppc_set_tcr(PowerPCCPU *cpu)
1817 {
1818     CPUState *cs = CPU(cpu);
1819     CPUPPCState *env = &cpu->env;
1820     uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1821
1822     struct kvm_one_reg reg = {
1823         .id = KVM_REG_PPC_TCR,
1824         .addr = (uintptr_t) &tcr,
1825     };
1826
1827     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1828 }
1829
1830 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1831 {
1832     CPUState *cs = CPU(cpu);
1833     int ret;
1834
1835     if (!kvm_enabled()) {
1836         return -1;
1837     }
1838
1839     if (!cap_ppc_watchdog) {
1840         printf("warning: KVM does not support watchdog");
1841         return -1;
1842     }
1843
1844     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1845     if (ret < 0) {
1846         fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1847                 __func__, strerror(-ret));
1848         return ret;
1849     }
1850
1851     return ret;
1852 }
1853
1854 static int read_cpuinfo(const char *field, char *value, int len)
1855 {
1856     FILE *f;
1857     int ret = -1;
1858     int field_len = strlen(field);
1859     char line[512];
1860
1861     f = fopen("/proc/cpuinfo", "r");
1862     if (!f) {
1863         return -1;
1864     }
1865
1866     do {
1867         if (!fgets(line, sizeof(line), f)) {
1868             break;
1869         }
1870         if (!strncmp(line, field, field_len)) {
1871             pstrcpy(value, len, line);
1872             ret = 0;
1873             break;
1874         }
1875     } while(*line);
1876
1877     fclose(f);
1878
1879     return ret;
1880 }
1881
1882 uint32_t kvmppc_get_tbfreq(void)
1883 {
1884     char line[512];
1885     char *ns;
1886     uint32_t retval = NANOSECONDS_PER_SECOND;
1887
1888     if (read_cpuinfo("timebase", line, sizeof(line))) {
1889         return retval;
1890     }
1891
1892     if (!(ns = strchr(line, ':'))) {
1893         return retval;
1894     }
1895
1896     ns++;
1897
1898     return atoi(ns);
1899 }
1900
1901 bool kvmppc_get_host_serial(char **value)
1902 {
1903     return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1904                                NULL);
1905 }
1906
1907 bool kvmppc_get_host_model(char **value)
1908 {
1909     return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1910 }
1911
1912 /* Try to find a device tree node for a CPU with clock-frequency property */
1913 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1914 {
1915     struct dirent *dirp;
1916     DIR *dp;
1917
1918     if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1919         printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1920         return -1;
1921     }
1922
1923     buf[0] = '\0';
1924     while ((dirp = readdir(dp)) != NULL) {
1925         FILE *f;
1926         snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1927                  dirp->d_name);
1928         f = fopen(buf, "r");
1929         if (f) {
1930             snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1931             fclose(f);
1932             break;
1933         }
1934         buf[0] = '\0';
1935     }
1936     closedir(dp);
1937     if (buf[0] == '\0') {
1938         printf("Unknown host!\n");
1939         return -1;
1940     }
1941
1942     return 0;
1943 }
1944
1945 static uint64_t kvmppc_read_int_dt(const char *filename)
1946 {
1947     union {
1948         uint32_t v32;
1949         uint64_t v64;
1950     } u;
1951     FILE *f;
1952     int len;
1953
1954     f = fopen(filename, "rb");
1955     if (!f) {
1956         return -1;
1957     }
1958
1959     len = fread(&u, 1, sizeof(u), f);
1960     fclose(f);
1961     switch (len) {
1962     case 4:
1963         /* property is a 32-bit quantity */
1964         return be32_to_cpu(u.v32);
1965     case 8:
1966         return be64_to_cpu(u.v64);
1967     }
1968
1969     return 0;
1970 }
1971
1972 /* Read a CPU node property from the host device tree that's a single
1973  * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1974  * (can't find or open the property, or doesn't understand the
1975  * format) */
1976 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1977 {
1978     char buf[PATH_MAX], *tmp;
1979     uint64_t val;
1980
1981     if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1982         return -1;
1983     }
1984
1985     tmp = g_strdup_printf("%s/%s", buf, propname);
1986     val = kvmppc_read_int_dt(tmp);
1987     g_free(tmp);
1988
1989     return val;
1990 }
1991
1992 uint64_t kvmppc_get_clockfreq(void)
1993 {
1994     return kvmppc_read_int_cpu_dt("clock-frequency");
1995 }
1996
1997 uint32_t kvmppc_get_vmx(void)
1998 {
1999     return kvmppc_read_int_cpu_dt("ibm,vmx");
2000 }
2001
2002 uint32_t kvmppc_get_dfp(void)
2003 {
2004     return kvmppc_read_int_cpu_dt("ibm,dfp");
2005 }
2006
2007 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
2008  {
2009      PowerPCCPU *cpu = ppc_env_get_cpu(env);
2010      CPUState *cs = CPU(cpu);
2011
2012     if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
2013         !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
2014         return 0;
2015     }
2016
2017     return 1;
2018 }
2019
2020 int kvmppc_get_hasidle(CPUPPCState *env)
2021 {
2022     struct kvm_ppc_pvinfo pvinfo;
2023
2024     if (!kvmppc_get_pvinfo(env, &pvinfo) &&
2025         (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
2026         return 1;
2027     }
2028
2029     return 0;
2030 }
2031
2032 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
2033 {
2034     uint32_t *hc = (uint32_t*)buf;
2035     struct kvm_ppc_pvinfo pvinfo;
2036
2037     if (!kvmppc_get_pvinfo(env, &pvinfo)) {
2038         memcpy(buf, pvinfo.hcall, buf_len);
2039         return 0;
2040     }
2041
2042     /*
2043      * Fallback to always fail hypercalls regardless of endianness:
2044      *
2045      *     tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
2046      *     li r3, -1
2047      *     b .+8       (becomes nop in wrong endian)
2048      *     bswap32(li r3, -1)
2049      */
2050
2051     hc[0] = cpu_to_be32(0x08000048);
2052     hc[1] = cpu_to_be32(0x3860ffff);
2053     hc[2] = cpu_to_be32(0x48000008);
2054     hc[3] = cpu_to_be32(bswap32(0x3860ffff));
2055
2056     return 1;
2057 }
2058
2059 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
2060 {
2061     return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2062 }
2063
2064 void kvmppc_enable_logical_ci_hcalls(void)
2065 {
2066     /*
2067      * FIXME: it would be nice if we could detect the cases where
2068      * we're using a device which requires the in kernel
2069      * implementation of these hcalls, but the kernel lacks them and
2070      * produce a warning.
2071      */
2072     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2073     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2074 }
2075
2076 void kvmppc_enable_set_mode_hcall(void)
2077 {
2078     kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2079 }
2080
2081 void kvmppc_enable_clear_ref_mod_hcalls(void)
2082 {
2083     kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
2084     kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
2085 }
2086
2087 void kvmppc_set_papr(PowerPCCPU *cpu)
2088 {
2089     CPUState *cs = CPU(cpu);
2090     int ret;
2091
2092     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2093     if (ret) {
2094         error_report("This vCPU type or KVM version does not support PAPR");
2095         exit(1);
2096     }
2097
2098     /* Update the capability flag so we sync the right information
2099      * with kvm */
2100     cap_papr = 1;
2101 }
2102
2103 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr)
2104 {
2105     return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr);
2106 }
2107
2108 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2109 {
2110     CPUState *cs = CPU(cpu);
2111     int ret;
2112
2113     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2114     if (ret && mpic_proxy) {
2115         error_report("This KVM version does not support EPR");
2116         exit(1);
2117     }
2118 }
2119
2120 int kvmppc_smt_threads(void)
2121 {
2122     return cap_ppc_smt ? cap_ppc_smt : 1;
2123 }
2124
2125 #ifdef TARGET_PPC64
2126 off_t kvmppc_alloc_rma(void **rma)
2127 {
2128     off_t size;
2129     int fd;
2130     struct kvm_allocate_rma ret;
2131
2132     /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2133      * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2134      *                      not necessary on this hardware
2135      * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2136      *
2137      * FIXME: We should allow the user to force contiguous RMA
2138      * allocation in the cap_ppc_rma==1 case.
2139      */
2140     if (cap_ppc_rma < 2) {
2141         return 0;
2142     }
2143
2144     fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
2145     if (fd < 0) {
2146         fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2147                 strerror(errno));
2148         return -1;
2149     }
2150
2151     size = MIN(ret.rma_size, 256ul << 20);
2152
2153     *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2154     if (*rma == MAP_FAILED) {
2155         fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
2156         return -1;
2157     };
2158
2159     return size;
2160 }
2161
2162 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2163 {
2164     struct kvm_ppc_smmu_info info;
2165     long rampagesize, best_page_shift;
2166     int i;
2167
2168     if (cap_ppc_rma >= 2) {
2169         return current_size;
2170     }
2171
2172     /* Find the largest hardware supported page size that's less than
2173      * or equal to the (logical) backing page size of guest RAM */
2174     kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2175     rampagesize = qemu_getrampagesize();
2176     best_page_shift = 0;
2177
2178     for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2179         struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2180
2181         if (!sps->page_shift) {
2182             continue;
2183         }
2184
2185         if ((sps->page_shift > best_page_shift)
2186             && ((1UL << sps->page_shift) <= rampagesize)) {
2187             best_page_shift = sps->page_shift;
2188         }
2189     }
2190
2191     return MIN(current_size,
2192                1ULL << (best_page_shift + hash_shift - 7));
2193 }
2194 #endif
2195
2196 bool kvmppc_spapr_use_multitce(void)
2197 {
2198     return cap_spapr_multitce;
2199 }
2200
2201 int kvmppc_spapr_enable_inkernel_multitce(void)
2202 {
2203     int ret;
2204
2205     ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2206                             H_PUT_TCE_INDIRECT, 1);
2207     if (!ret) {
2208         ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2209                                 H_STUFF_TCE, 1);
2210     }
2211
2212     return ret;
2213 }
2214
2215 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
2216                               uint64_t bus_offset, uint32_t nb_table,
2217                               int *pfd, bool need_vfio)
2218 {
2219     long len;
2220     int fd;
2221     void *table;
2222
2223     /* Must set fd to -1 so we don't try to munmap when called for
2224      * destroying the table, which the upper layers -will- do
2225      */
2226     *pfd = -1;
2227     if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2228         return NULL;
2229     }
2230
2231     if (cap_spapr_tce_64) {
2232         struct kvm_create_spapr_tce_64 args = {
2233             .liobn = liobn,
2234             .page_shift = page_shift,
2235             .offset = bus_offset >> page_shift,
2236             .size = nb_table,
2237             .flags = 0
2238         };
2239         fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args);
2240         if (fd < 0) {
2241             fprintf(stderr,
2242                     "KVM: Failed to create TCE64 table for liobn 0x%x\n",
2243                     liobn);
2244             return NULL;
2245         }
2246     } else if (cap_spapr_tce) {
2247         uint64_t window_size = (uint64_t) nb_table << page_shift;
2248         struct kvm_create_spapr_tce args = {
2249             .liobn = liobn,
2250             .window_size = window_size,
2251         };
2252         if ((window_size != args.window_size) || bus_offset) {
2253             return NULL;
2254         }
2255         fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2256         if (fd < 0) {
2257             fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2258                     liobn);
2259             return NULL;
2260         }
2261     } else {
2262         return NULL;
2263     }
2264
2265     len = nb_table * sizeof(uint64_t);
2266     /* FIXME: round this up to page size */
2267
2268     table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2269     if (table == MAP_FAILED) {
2270         fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2271                 liobn);
2272         close(fd);
2273         return NULL;
2274     }
2275
2276     *pfd = fd;
2277     return table;
2278 }
2279
2280 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2281 {
2282     long len;
2283
2284     if (fd < 0) {
2285         return -1;
2286     }
2287
2288     len = nb_table * sizeof(uint64_t);
2289     if ((munmap(table, len) < 0) ||
2290         (close(fd) < 0)) {
2291         fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2292                 strerror(errno));
2293         /* Leak the table */
2294     }
2295
2296     return 0;
2297 }
2298
2299 int kvmppc_reset_htab(int shift_hint)
2300 {
2301     uint32_t shift = shift_hint;
2302
2303     if (!kvm_enabled()) {
2304         /* Full emulation, tell caller to allocate htab itself */
2305         return 0;
2306     }
2307     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2308         int ret;
2309         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2310         if (ret == -ENOTTY) {
2311             /* At least some versions of PR KVM advertise the
2312              * capability, but don't implement the ioctl().  Oops.
2313              * Return 0 so that we allocate the htab in qemu, as is
2314              * correct for PR. */
2315             return 0;
2316         } else if (ret < 0) {
2317             return ret;
2318         }
2319         return shift;
2320     }
2321
2322     /* We have a kernel that predates the htab reset calls.  For PR
2323      * KVM, we need to allocate the htab ourselves, for an HV KVM of
2324      * this era, it has allocated a 16MB fixed size hash table already. */
2325     if (kvmppc_is_pr(kvm_state)) {
2326         /* PR - tell caller to allocate htab */
2327         return 0;
2328     } else {
2329         /* HV - assume 16MB kernel allocated htab */
2330         return 24;
2331     }
2332 }
2333
2334 static inline uint32_t mfpvr(void)
2335 {
2336     uint32_t pvr;
2337
2338     asm ("mfpvr %0"
2339          : "=r"(pvr));
2340     return pvr;
2341 }
2342
2343 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2344 {
2345     if (on) {
2346         *word |= flags;
2347     } else {
2348         *word &= ~flags;
2349     }
2350 }
2351
2352 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2353 {
2354     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2355     uint32_t vmx = kvmppc_get_vmx();
2356     uint32_t dfp = kvmppc_get_dfp();
2357     uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2358     uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2359
2360     /* Now fix up the class with information we can query from the host */
2361     pcc->pvr = mfpvr();
2362
2363     if (vmx != -1) {
2364         /* Only override when we know what the host supports */
2365         alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
2366         alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
2367     }
2368     if (dfp != -1) {
2369         /* Only override when we know what the host supports */
2370         alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
2371     }
2372
2373     if (dcache_size != -1) {
2374         pcc->l1_dcache_size = dcache_size;
2375     }
2376
2377     if (icache_size != -1) {
2378         pcc->l1_icache_size = icache_size;
2379     }
2380
2381 #if defined(TARGET_PPC64)
2382     pcc->radix_page_info = kvm_get_radix_page_info();
2383
2384     if ((pcc->pvr & 0xffffff00) == CPU_POWERPC_POWER9_DD1) {
2385         /*
2386          * POWER9 DD1 has some bugs which make it not really ISA 3.00
2387          * compliant.  More importantly, advertising ISA 3.00
2388          * architected mode may prevent guests from activating
2389          * necessary DD1 workarounds.
2390          */
2391         pcc->pcr_supported &= ~(PCR_COMPAT_3_00 | PCR_COMPAT_2_07
2392                                 | PCR_COMPAT_2_06 | PCR_COMPAT_2_05);
2393     }
2394 #endif /* defined(TARGET_PPC64) */
2395 }
2396
2397 bool kvmppc_has_cap_epr(void)
2398 {
2399     return cap_epr;
2400 }
2401
2402 bool kvmppc_has_cap_htab_fd(void)
2403 {
2404     return cap_htab_fd;
2405 }
2406
2407 bool kvmppc_has_cap_fixup_hcalls(void)
2408 {
2409     return cap_fixup_hcalls;
2410 }
2411
2412 bool kvmppc_has_cap_htm(void)
2413 {
2414     return cap_htm;
2415 }
2416
2417 bool kvmppc_has_cap_mmu_radix(void)
2418 {
2419     return cap_mmu_radix;
2420 }
2421
2422 bool kvmppc_has_cap_mmu_hash_v3(void)
2423 {
2424     return cap_mmu_hash_v3;
2425 }
2426
2427 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2428 {
2429     uint32_t host_pvr = mfpvr();
2430     PowerPCCPUClass *pvr_pcc;
2431
2432     pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2433     if (pvr_pcc == NULL) {
2434         pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2435     }
2436
2437     return pvr_pcc;
2438 }
2439
2440 static int kvm_ppc_register_host_cpu_type(void)
2441 {
2442     TypeInfo type_info = {
2443         .name = TYPE_HOST_POWERPC_CPU,
2444         .class_init = kvmppc_host_cpu_class_init,
2445     };
2446     PowerPCCPUClass *pvr_pcc;
2447     DeviceClass *dc;
2448     int i;
2449
2450     pvr_pcc = kvm_ppc_get_host_cpu_class();
2451     if (pvr_pcc == NULL) {
2452         return -1;
2453     }
2454     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2455     type_register(&type_info);
2456
2457 #if defined(TARGET_PPC64)
2458     type_info.name = g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE, "host");
2459     type_info.parent = TYPE_SPAPR_CPU_CORE,
2460     type_info.instance_size = sizeof(sPAPRCPUCore);
2461     type_info.instance_init = NULL;
2462     type_info.class_init = spapr_cpu_core_class_init;
2463     type_info.class_data = (void *) "host";
2464     type_register(&type_info);
2465     g_free((void *)type_info.name);
2466 #endif
2467
2468     /*
2469      * Update generic CPU family class alias (e.g. on a POWER8NVL host,
2470      * we want "POWER8" to be a "family" alias that points to the current
2471      * host CPU type, too)
2472      */
2473     dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc));
2474     for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) {
2475         if (strcmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) {
2476             ObjectClass *oc = OBJECT_CLASS(pvr_pcc);
2477             char *suffix;
2478
2479             ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc));
2480             suffix = strstr(ppc_cpu_aliases[i].model, "-"TYPE_POWERPC_CPU);
2481             if (suffix) {
2482                 *suffix = 0;
2483             }
2484             ppc_cpu_aliases[i].oc = oc;
2485             break;
2486         }
2487     }
2488
2489     return 0;
2490 }
2491
2492 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2493 {
2494     struct kvm_rtas_token_args args = {
2495         .token = token,
2496     };
2497
2498     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2499         return -ENOENT;
2500     }
2501
2502     strncpy(args.name, function, sizeof(args.name));
2503
2504     return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2505 }
2506
2507 int kvmppc_get_htab_fd(bool write)
2508 {
2509     struct kvm_get_htab_fd s = {
2510         .flags = write ? KVM_GET_HTAB_WRITE : 0,
2511         .start_index = 0,
2512     };
2513
2514     if (!cap_htab_fd) {
2515         fprintf(stderr, "KVM version doesn't support saving the hash table\n");
2516         return -1;
2517     }
2518
2519     return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2520 }
2521
2522 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2523 {
2524     int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2525     uint8_t buf[bufsize];
2526     ssize_t rc;
2527
2528     do {
2529         rc = read(fd, buf, bufsize);
2530         if (rc < 0) {
2531             fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2532                     strerror(errno));
2533             return rc;
2534         } else if (rc) {
2535             uint8_t *buffer = buf;
2536             ssize_t n = rc;
2537             while (n) {
2538                 struct kvm_get_htab_header *head =
2539                     (struct kvm_get_htab_header *) buffer;
2540                 size_t chunksize = sizeof(*head) +
2541                      HASH_PTE_SIZE_64 * head->n_valid;
2542
2543                 qemu_put_be32(f, head->index);
2544                 qemu_put_be16(f, head->n_valid);
2545                 qemu_put_be16(f, head->n_invalid);
2546                 qemu_put_buffer(f, (void *)(head + 1),
2547                                 HASH_PTE_SIZE_64 * head->n_valid);
2548
2549                 buffer += chunksize;
2550                 n -= chunksize;
2551             }
2552         }
2553     } while ((rc != 0)
2554              && ((max_ns < 0)
2555                  || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2556
2557     return (rc == 0) ? 1 : 0;
2558 }
2559
2560 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2561                            uint16_t n_valid, uint16_t n_invalid)
2562 {
2563     struct kvm_get_htab_header *buf;
2564     size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2565     ssize_t rc;
2566
2567     buf = alloca(chunksize);
2568     buf->index = index;
2569     buf->n_valid = n_valid;
2570     buf->n_invalid = n_invalid;
2571
2572     qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2573
2574     rc = write(fd, buf, chunksize);
2575     if (rc < 0) {
2576         fprintf(stderr, "Error writing KVM hash table: %s\n",
2577                 strerror(errno));
2578         return rc;
2579     }
2580     if (rc != chunksize) {
2581         /* We should never get a short write on a single chunk */
2582         fprintf(stderr, "Short write, restoring KVM hash table\n");
2583         return -1;
2584     }
2585     return 0;
2586 }
2587
2588 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2589 {
2590     return true;
2591 }
2592
2593 void kvm_arch_init_irq_routing(KVMState *s)
2594 {
2595 }
2596
2597 void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n)
2598 {
2599     struct kvm_get_htab_fd ghf = {
2600         .flags = 0,
2601         .start_index = ptex,
2602     };
2603     int fd, rc;
2604     int i;
2605
2606     fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2607     if (fd < 0) {
2608         hw_error("kvmppc_read_hptes: Unable to open HPT fd");
2609     }
2610
2611     i = 0;
2612     while (i < n) {
2613         struct kvm_get_htab_header *hdr;
2614         int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP;
2615         char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64];
2616
2617         rc = read(fd, buf, sizeof(buf));
2618         if (rc < 0) {
2619             hw_error("kvmppc_read_hptes: Unable to read HPTEs");
2620         }
2621
2622         hdr = (struct kvm_get_htab_header *)buf;
2623         while ((i < n) && ((char *)hdr < (buf + rc))) {
2624             int invalid = hdr->n_invalid;
2625
2626             if (hdr->index != (ptex + i)) {
2627                 hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32
2628                          " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i);
2629             }
2630
2631             memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * hdr->n_valid);
2632             i += hdr->n_valid;
2633
2634             if ((n - i) < invalid) {
2635                 invalid = n - i;
2636             }
2637             memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64);
2638             i += hdr->n_invalid;
2639
2640             hdr = (struct kvm_get_htab_header *)
2641                 ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid);
2642         }
2643     }
2644
2645     close(fd);
2646 }
2647
2648 void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1)
2649 {
2650     int fd, rc;
2651     struct kvm_get_htab_fd ghf;
2652     struct {
2653         struct kvm_get_htab_header hdr;
2654         uint64_t pte0;
2655         uint64_t pte1;
2656     } buf;
2657
2658     ghf.flags = 0;
2659     ghf.start_index = 0;     /* Ignored */
2660     fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2661     if (fd < 0) {
2662         hw_error("kvmppc_write_hpte: Unable to open HPT fd");
2663     }
2664
2665     buf.hdr.n_valid = 1;
2666     buf.hdr.n_invalid = 0;
2667     buf.hdr.index = ptex;
2668     buf.pte0 = cpu_to_be64(pte0);
2669     buf.pte1 = cpu_to_be64(pte1);
2670
2671     rc = write(fd, &buf, sizeof(buf));
2672     if (rc != sizeof(buf)) {
2673         hw_error("kvmppc_write_hpte: Unable to update KVM HPT");
2674     }
2675     close(fd);
2676 }
2677
2678 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2679                              uint64_t address, uint32_t data, PCIDevice *dev)
2680 {
2681     return 0;
2682 }
2683
2684 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2685                                 int vector, PCIDevice *dev)
2686 {
2687     return 0;
2688 }
2689
2690 int kvm_arch_release_virq_post(int virq)
2691 {
2692     return 0;
2693 }
2694
2695 int kvm_arch_msi_data_to_gsi(uint32_t data)
2696 {
2697     return data & 0xffff;
2698 }
2699
2700 int kvmppc_enable_hwrng(void)
2701 {
2702     if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2703         return -1;
2704     }
2705
2706     return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2707 }